diff --git a/api/server/pom.xml b/api/server/pom.xml index d68bb29..8e4f6d0 100644 --- a/api/server/pom.xml +++ b/api/server/pom.xml @@ -1,121 +1,127 @@ 4.0.0 org.softwareheritage.graph graph 1.0-ALPHA graph https://www.softwareheritage.org/ UTF-8 1.8 1.8 junit junit 4.11 test io.javalin javalin 2.8.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.0 it.unimi.dsi fastutil 8.2.2 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 + + + -verbose + -Xlint:all + + maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single diff --git a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java index 410c13c..cdf0e14 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java @@ -1,125 +1,126 @@ package org.softwareheritage.graph; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collection; import java.util.HashMap; import java.util.zip.GZIPInputStream; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import it.unimi.dsi.lang.MutableString; public class Dataset { public enum Name { DIR_TO_DIR, DIR_TO_FILE, DIR_TO_REV, ORIGIN_TO_SNAPSHOT, RELEASE_TO_OBJ, REV_TO_DIR, REV_TO_REV, SNAPSHOT_TO_OBJ } BVGraph graph; String path; HashMap hashToNode; HashMap nodeToHash; public Dataset(String datasetPath) { try { this.graph = BVGraph.load(datasetPath); this.path = datasetPath; setupNodesMapping(); } catch (Exception e) { System.out.println("[WARNING] Could not load dataset " + datasetPath + ": " + e); } } void setupNodesMapping() throws IOException, ClassNotFoundException { this.hashToNode = new HashMap(); this.nodeToHash = new HashMap(); // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs) HashMap mphToHash = new HashMap(); + @SuppressWarnings("unchecked") Object2LongFunction mphMap = (Object2LongFunction) BinIO.loadObject(path + ".mph"); InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz"); Collection hashes = new LineIterator( new FastBufferedReader( new InputStreamReader( new GZIPInputStream(nodeFile), "UTF-8"))).allLines(); for (MutableString h : hashes) { String hash = new String(h.toString()); long mph = mphMap.getLong(hash); mphToHash.put(mph, hash); } // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs) long n = mphMap.size(); long[][] bfsMap = LongBigArrays.newBigArray(n); long loaded = BinIO.loadLongs(path + ".order", bfsMap); if (loaded != n) throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded); // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs) for (long id = 0; id < n; id++) { String hash = mphToHash.get(id); long node = LongBigArrays.get(bfsMap, id); hashToNode.put(hash, node); nodeToHash.put(node, hash); } } public String getPath() { return path; } public long getNode(String hash) { return hashToNode.get(hash); } public String getHash(long node) { return nodeToHash.get(node); } public long getNbNodes() { return graph.numNodes(); } public long getNbEdges() { return graph.numArcs(); } public LazyLongIterator successors(long node) { return graph.successors(node); } public long outdegree(long node) { return graph.outdegree(node); } }