diff --git a/api/server/pom.xml b/api/server/pom.xml index 137e9bb..d68bb29 100644 --- a/api/server/pom.xml +++ b/api/server/pom.xml @@ -1,116 +1,121 @@ 4.0.0 org.softwareheritage.graph graph 1.0-ALPHA graph https://www.softwareheritage.org/ UTF-8 1.8 1.8 junit junit 4.11 test io.javalin javalin 2.8.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.0 + + it.unimi.dsi + fastutil + 8.2.2 + maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java index ffad3ad..f93e671 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,18 +1,23 @@ package org.softwareheritage.graph; +import java.nio.file.Path; +import java.nio.file.Paths; + import io.javalin.Javalin; -import org.softwareheritage.graph.GraphAPI; +import org.softwareheritage.graph.Dataset; +import org.softwareheritage.graph.Graph; public class App { public static void main(String[] args) { - GraphAPI graph = new GraphAPI("/path/to/graph"); + Path path = Paths.get(args[0]); + Graph graph = new Graph(path.toString()); Javalin app = Javalin.create().start(5010); app.get("/nb_nodes", ctx -> { ctx.json(graph.nbNodes()); }); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java new file mode 100644 index 0000000..dda4917 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java @@ -0,0 +1,115 @@ +package org.softwareheritage.graph; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.HashMap; +import java.util.zip.GZIPInputStream; + +import it.unimi.dsi.big.webgraph.BVGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; +import it.unimi.dsi.lang.MutableString; + +public class Dataset +{ + public enum Name { + DIR_TO_DIR, + DIR_TO_FILE, + DIR_TO_REV, + ORIGIN_TO_SNAPSHOT, + RELEASE_TO_OBJ, + REV_TO_DIR, + REV_TO_REV, + SNAPSHOT_TO_OBJ + } + + BVGraph graph; + String path; + HashMap hashToNode; + HashMap nodeToHash; + + public Dataset(String datasetPath) + { + try { + this.graph = BVGraph.load(datasetPath); + this.path = datasetPath; + setupNodesMapping(); + } catch (Exception e) { + System.out.println("[WARNING] Could not load dataset " + datasetPath + ": " + e); + } + } + + void setupNodesMapping() throws IOException, ClassNotFoundException + { + this.hashToNode = new HashMap(); + this.nodeToHash = new HashMap(); + + // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs) + HashMap mphToHash = new HashMap(); + Object2LongFunction mphMap = + (Object2LongFunction) BinIO.loadObject(path + ".mph"); + + InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz"); + Collection hashes = + new LineIterator( + new FastBufferedReader( + new InputStreamReader( + new GZIPInputStream(nodeFile), "UTF-8"))).allLines(); + + for (MutableString h : hashes) + { + String hash = new String(h.toString()); + long mph = mphMap.getLong(hash); + mphToHash.put(mph, hash); + } + + // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs) + long n = mphMap.size(); + long[][] bfsMap = LongBigArrays.newBigArray(n); + long loaded = BinIO.loadLongs(path + ".order", bfsMap); + if (loaded != n) + throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded); + + // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs) + for (long id = 0; id < n; id++) + { + String hash = mphToHash.get(id); + long node = LongBigArrays.get(bfsMap, id); + + hashToNode.put(hash, node); + nodeToHash.put(node, hash); + } + } + + public String getPath() + { + return path; + } + + public long getNode(String hash) + { + return hashToNode.get(hash); + } + + public String getHash(long node) + { + return nodeToHash.get(node); + } + + public LazyLongIterator successors(long node) + { + return graph.successors(node); + } + + public long outdegree(long node) + { + return graph.outdegree(node); + } +} diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java new file mode 100644 index 0000000..8c02922 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -0,0 +1,33 @@ +package org.softwareheritage.graph; + +import java.util.EnumMap; + +import org.softwareheritage.graph.Dataset; + +public class Graph +{ + EnumMap graph; + String path; + + public Graph(String graphPath) + { + this.graph = new EnumMap(Dataset.Name.class); + this.path = graphPath; + if (!path.endsWith("/")) + path += "/"; + + for (Dataset.Name dataset : Dataset.Name.values()) + addDataset(dataset); + } + + public void addDataset(Dataset.Name dataset) + { + String datasetPath = path + dataset.name().toLowerCase(); + graph.put(dataset, new Dataset(datasetPath)); + } + + public Dataset getDataset(Dataset.Name dataset) + { + return graph.get(dataset); + } +} diff --git a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java b/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java deleted file mode 100644 index c16ff19..0000000 --- a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.softwareheritage.graph; - -import java.io.IOException; - -import it.unimi.dsi.webgraph.BVGraph; - -public class GraphAPI -{ - String graphName; - BVGraph graph; - - public GraphAPI(String graphName) - { - this.graphName = graphName; - try { - this.graph = BVGraph.load(graphName); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - - public int nbNodes() - { - return graph.numNodes(); - } -}