diff --git a/java/pom.xml b/java/pom.xml index cd1eece..0b2172e 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,269 +1,274 @@ 4.0.0 org.softwareheritage.graph swh-graph ${git.closest.tag.name} swh-graph https://forge.softwareheritage.org/source/swh-graph/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 org.junit.jupiter junit-jupiter-api 5.7.0 test org.junit.jupiter junit-jupiter-engine 5.7.0 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big - 3.6.5 + 3.6.6 it.unimi.dsi fastutil 8.4.4 it.unimi.dsi dsiutils - 2.6.16 + 2.6.17 + + + it.unimi.dsi + sux4j + 5.2.3 it.unimi.dsi law 2.7.1 org.apache.hadoop hadoop-common org.umlgraph umlgraph org.eclipse.jetty.aggregate jetty-all it.unimi.di mg4j it.unimi.di mg4j-big com.martiansoftware jsap 2.1 net.sf.py4j py4j 0.10.8.1 commons-codec commons-codec 1.11 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 11 11 -verbose -Xlint:all maven-surefire-plugin 2.22.2 maven-failsafe-plugin 2.22.2 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin 3.3.0 org.softwareheritage.graph.server.App jar-with-dependencies false make-assembly package single com.diffplug.spotless spotless-maven-plugin 2.4.1 *.md .gitignore true 4 4.16.0 .coding-style.xml pl.project13.maven git-commit-id-plugin 3.0.1 get-the-git-infos revision initialize true true true true v* git.closest.tag.name ^v true org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java index 6f9c00e..27019fa 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java @@ -1,307 +1,307 @@ package org.softwareheritage.graph.maps; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.bytes.ByteArrays; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.io.FastBufferedInputStream; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.OutputBitStream; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.labels.DirEntry; import org.softwareheritage.graph.labels.SwhLabel; import java.io.*; import java.lang.reflect.Array; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.TimeUnit; public class LabelMapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class); String graphPath; String debugPath; String tmpDir; ImmutableGraph graph; - Object2LongFunction swhIdMph; + Object2LongFunction swhIdMph; long[][] orderMap; - Object2LongFunction filenameMph; + Object2LongFunction filenameMph; long numFilenames; int totalLabelWidth; public LabelMapBuilder(String graphPath, String debugPath, String tmpDir) { this.graphPath = graphPath; this.debugPath = debugPath; this.tmpDir = tmpDir; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "debug-path", "Store the intermediate representation here for debug"), new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp", "Temporary directory path"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String tmpDir = config.getString("tmpDir"); String debugPath = config.getString("debugPath"); LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, tmpDir); builder.computeLabelMap(); } @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast - static Object2LongFunction loadMPH(String mphBasename) throws IOException { - Object2LongFunction mphMap = null; + static Object2LongFunction loadMPH(String mphBasename) throws IOException { + Object2LongFunction mphMap = null; try { - mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph"); + mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph"); } catch (ClassNotFoundException e) { logger.error("unknown class object in .mph file: " + e); System.exit(2); } return mphMap; } - static long getMPHSize(Object2LongFunction mph) { + static long getMPHSize(Object2LongFunction mph) { return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size(); } void computeLabelMap() throws IOException { /* * Pass the intermediate representation to sort(1) so that we see the labels in the order they will * appear in the label file. */ logger.info("Loading graph and MPH functions..."); loadGraph(); logger.info("Hashing the input labels..."); ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort "--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir); Process sort = processBuilder.start(); BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream()); BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream()); final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in); hashLabelStream(fbis, sort_stdin); sort_stdin.close(); logger.info("Writing label map to file..."); writeLabels(sort_stdout); logger.info("Done"); } void loadGraph() throws IOException { graph = BVGraph.loadMapped(graphPath); swhIdMph = loadMPH(graphPath); orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph)); BinIO.loadLongs(graphPath + ".order", orderMap); filenameMph = loadMPH(graphPath + "-labels"); numFilenames = getMPHSize(filenameMph); totalLabelWidth = DirEntry.labelWidth(numFilenames); } void hashLabelStream(FastBufferedInputStream input, BufferedOutputStream output) throws IOException { // Compute intermediate representation and write it on : // "