diff --git a/java/pom.xml b/java/pom.xml
index cd1eece..0b2172e 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1,269 +1,274 @@
4.0.0
org.softwareheritage.graph
swh-graph
${git.closest.tag.name}
swh-graph
https://forge.softwareheritage.org/source/swh-graph/
UTF-8
11
ch.qos.logback
logback-classic
1.2.3
org.junit.jupiter
junit-jupiter-api
5.7.0
test
org.junit.jupiter
junit-jupiter-engine
5.7.0
test
org.hamcrest
hamcrest
2.1
test
io.javalin
javalin
3.0.0
org.slf4j
slf4j-simple
1.7.26
com.fasterxml.jackson.core
jackson-databind
2.9.8
it.unimi.dsi
webgraph-big
- 3.6.5
+ 3.6.6
it.unimi.dsi
fastutil
8.4.4
it.unimi.dsi
dsiutils
- 2.6.16
+ 2.6.17
+
+
+ it.unimi.dsi
+ sux4j
+ 5.2.3
it.unimi.dsi
law
2.7.1
org.apache.hadoop
hadoop-common
org.umlgraph
umlgraph
org.eclipse.jetty.aggregate
jetty-all
it.unimi.di
mg4j
it.unimi.di
mg4j-big
com.martiansoftware
jsap
2.1
net.sf.py4j
py4j
0.10.8.1
commons-codec
commons-codec
1.11
maven-clean-plugin
3.1.0
maven-resources-plugin
3.0.2
maven-compiler-plugin
3.8.0
11
11
-verbose
-Xlint:all
maven-surefire-plugin
2.22.2
maven-failsafe-plugin
2.22.2
maven-jar-plugin
3.0.2
maven-install-plugin
2.5.2
maven-deploy-plugin
2.8.2
maven-site-plugin
3.7.1
maven-project-info-reports-plugin
3.0.0
maven-assembly-plugin
3.3.0
org.softwareheritage.graph.server.App
jar-with-dependencies
false
make-assembly
package
single
com.diffplug.spotless
spotless-maven-plugin
2.4.1
*.md
.gitignore
true
4
4.16.0
.coding-style.xml
pl.project13.maven
git-commit-id-plugin
3.0.1
get-the-git-infos
revision
initialize
true
true
true
true
v*
git.closest.tag.name
^v
true
org.apache.maven.plugins
maven-javadoc-plugin
3.1.1
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
index 6f9c00e..27019fa 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -1,307 +1,307 @@
package org.softwareheritage.graph.maps;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.labels.DirEntry;
import org.softwareheritage.graph.labels.SwhLabel;
import java.io.*;
import java.lang.reflect.Array;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class LabelMapBuilder {
final static String SORT_BUFFER_SIZE = "40%";
final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class);
String graphPath;
String debugPath;
String tmpDir;
ImmutableGraph graph;
- Object2LongFunction swhIdMph;
+ Object2LongFunction swhIdMph;
long[][] orderMap;
- Object2LongFunction filenameMph;
+ Object2LongFunction filenameMph;
long numFilenames;
int totalLabelWidth;
public LabelMapBuilder(String graphPath, String debugPath, String tmpDir) {
this.graphPath = graphPath;
this.debugPath = debugPath;
this.tmpDir = tmpDir;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd',
"debug-path", "Store the intermediate representation here for debug"),
new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp",
"Temporary directory path"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String tmpDir = config.getString("tmpDir");
String debugPath = config.getString("debugPath");
LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, tmpDir);
builder.computeLabelMap();
}
@SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast
- static Object2LongFunction loadMPH(String mphBasename) throws IOException {
- Object2LongFunction mphMap = null;
+ static Object2LongFunction loadMPH(String mphBasename) throws IOException {
+ Object2LongFunction mphMap = null;
try {
- mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph");
+ mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph");
} catch (ClassNotFoundException e) {
logger.error("unknown class object in .mph file: " + e);
System.exit(2);
}
return mphMap;
}
- static long getMPHSize(Object2LongFunction mph) {
+ static long getMPHSize(Object2LongFunction mph) {
return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size();
}
void computeLabelMap() throws IOException {
/*
* Pass the intermediate representation to sort(1) so that we see the labels in the order they will
* appear in the label file.
*/
logger.info("Loading graph and MPH functions...");
loadGraph();
logger.info("Hashing the input labels...");
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort
"--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir);
Process sort = processBuilder.start();
BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream());
BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream());
final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in);
hashLabelStream(fbis, sort_stdin);
sort_stdin.close();
logger.info("Writing label map to file...");
writeLabels(sort_stdout);
logger.info("Done");
}
void loadGraph() throws IOException {
graph = BVGraph.loadMapped(graphPath);
swhIdMph = loadMPH(graphPath);
orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph));
BinIO.loadLongs(graphPath + ".order", orderMap);
filenameMph = loadMPH(graphPath + "-labels");
numFilenames = getMPHSize(filenameMph);
totalLabelWidth = DirEntry.labelWidth(numFilenames);
}
void hashLabelStream(FastBufferedInputStream input, BufferedOutputStream output) throws IOException {
// Compute intermediate representation and write it on :
// "