diff --git a/java/README.md b/java/README.md index 623e98e..8d7edf7 100644 --- a/java/README.md +++ b/java/README.md @@ -1,51 +1,51 @@ Graph service - Java backend ============================ Server side Java RPC API. Build ----- ```bash $ mvn compile assembly:single ``` Start RPC API ------------- ```bash $ java -cp target/swh-graph-*.jar \ org.softwareheritage.graph.server.App \ ``` Default port is 5009 (use the `--port` option to change port number). If you need timings metadata send back to the client in addition to the result, use the `--timings` flag. Tests ----- Unit tests rely on test data that are already available in the Git repository (under `src/swh/graph/tests/dataset/`). You generally only need to run them using Maven: ```bash $ mvn test ``` In case you want to regenerate the test data: ```bash # Graph compression $ cd src/swh/graph/tests/dataset $ ./generate_graph.sh $ cd ../../../.. $ mvn compile assembly:single # Dump mapping files $ java -cp target/swh-graph-*.jar \ - org.softwareheritage.graph.maps.NodeMapBuilder \ + org.softwareheritage.graph.compress.NodeMapBuilder \ src/swh/graph/tests/dataset/example.nodes.csv.gz \ src/swh/graph/tests/dataset/output/example ``` diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/compress/LabelMapBuilder.java similarity index 99% rename from java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java rename to java/src/main/java/org/softwareheritage/graph/compress/LabelMapBuilder.java index 0ace4bd..8b824e1 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/compress/LabelMapBuilder.java @@ -1,527 +1,528 @@ -package org.softwareheritage.graph.maps; +package org.softwareheritage.graph.compress; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.bytes.ByteArrays; import it.unimi.dsi.fastutil.io.FastBufferedInputStream; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.OutputBitStream; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.labels.DirEntry; import org.softwareheritage.graph.labels.SwhLabel; +import org.softwareheritage.graph.maps.NodeIdMap; import java.io.*; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.TimeUnit; public class LabelMapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class); String graphPath; String outputGraphPath; String debugPath; String tmpDir; ImmutableGraph graph; long numNodes; long numArcs; NodeIdMap nodeIdMap; Object2LongFunction filenameMph; long numFilenames; int totalLabelWidth; public LabelMapBuilder(String graphPath, String debugPath, String outputGraphPath, String tmpDir) throws IOException { this.graphPath = graphPath; if (outputGraphPath == null) { this.outputGraphPath = graphPath; } else { this.outputGraphPath = outputGraphPath; } this.debugPath = debugPath; this.tmpDir = tmpDir; // Load the graph in offline mode to retrieve the number of nodes/edges, // then immediately destroy it. XXX: not even needed? // ImmutableGraph graphOffline = BVGraph.loadMapped(graphPath); graph = BVGraph.loadMapped(graphPath); numArcs = graph.numArcs(); numNodes = graph.numNodes(); nodeIdMap = new NodeIdMap(graphPath); filenameMph = NodeIdMap.loadMph(graphPath + "-labels.mph"); numFilenames = getMPHSize(filenameMph); totalLabelWidth = DirEntry.labelWidth(numFilenames); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "debug-path", "Store the intermediate representation here for debug"), new FlaggedOption("outputGraphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "output-graph", "Basename of the output graph, same as --graph if not specified"), new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp", "Temporary directory path"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException, InterruptedException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outputGraphPath = config.getString("outputGraphPath"); String tmpDir = config.getString("tmpDir"); String debugPath = config.getString("debugPath"); LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, outputGraphPath, tmpDir); logger.info("Loading graph and MPH functions..."); builder.computeLabelMap(); } static long getMPHSize(Object2LongFunction mph) { return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size(); } void computeLabelMap() throws IOException, InterruptedException { this.loadGraph(); // this.computeLabelMapSort(); this.computeLabelMapBsort(); } void computeLabelMapSort() throws IOException { // Pass the intermediate representation to sort(1) so that we see the labels in the order they will // appear in the label file. ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort "--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir); Process sort = processBuilder.start(); BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream()); // BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream()); FastBufferedInputStream sort_stdout = new FastBufferedInputStream(sort.getInputStream()); final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in); hashLabelStream(fbis, new EdgeLabelLineWriter() { @Override public void writeLine(long src, long dst, long filenameId, int permission) throws IOException { sort_stdin.write((src + "\t" + dst + "\t" + filenameId + "\t" + permission + "\n") .getBytes(StandardCharsets.US_ASCII)); } }); sort_stdin.close(); EdgeLabelLineIterator mapLines = new TextualEdgeLabelLineIterator(sort_stdout); writeLabels(mapLines); logger.info("Done"); } void computeLabelMapBsort() throws IOException, InterruptedException { // Pass the intermediate representation to bsort(1) so that we see the labels in the order they will // appear in the label file. String tmpFile = tmpDir + "/labelsToSort.bin"; final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in); final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmpFile))); // Number of bytes to represent a node. final int nodeBytes = (Long.SIZE - Long.numberOfLeadingZeros(graph.numNodes())) / 8 + 1; ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); logger.info("Writing labels to a packed binary files (node bytes: {})", nodeBytes); hashLabelStream(fbis, new EdgeLabelLineWriter() { @Override public void writeLine(long src, long dst, long filenameId, int permission) throws IOException { buffer.putLong(0, src); out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes); buffer.putLong(0, dst); out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes); out.writeLong(filenameId); out.writeInt(permission); } }); ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("/home/seirl/bsort/src/bsort", "-v", "-r", String.valueOf(nodeBytes * 2 + Long.BYTES + Integer.BYTES), "-k", String.valueOf(nodeBytes * 2), tmpFile); Process sort = processBuilder.start(); sort.waitFor(); final DataInputStream sortedLabels = new DataInputStream(new BufferedInputStream(new FileInputStream(tmpFile))); BinaryEdgeLabelLineIterator mapLines = new BinaryEdgeLabelLineIterator(sortedLabels, nodeBytes); writeLabels(mapLines); logger.info("Done"); } void loadGraph() throws IOException { } void hashLabelStream(FastBufferedInputStream input, EdgeLabelLineWriter writer) throws IOException { // Compute intermediate representation and write it on : // "