diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java index 49a9a11..cbbd03b 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,97 +1,46 @@ package org.softwareheritage.graph; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Collection; -import java.util.HashMap; -import java.util.zip.GZIPInputStream; - import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.fastutil.io.BinIO; -import it.unimi.dsi.fastutil.longs.LongBigArrays; -import it.unimi.dsi.fastutil.objects.Object2LongFunction; -import it.unimi.dsi.io.FastBufferedReader; -import it.unimi.dsi.io.LineIterator; -import it.unimi.dsi.lang.MutableString; + +import org.softwareheritage.graph.NodeIdMap; public class Graph { BVGraph graph; String path; - HashMap hashToNode; - HashMap nodeToHash; + NodeIdMap nodeIdMap; public Graph(String graphPath) throws Exception { this.graph = BVGraph.load(graphPath); this.path = graphPath; - this.hashToNode = new HashMap(); - this.nodeToHash = new HashMap(); - setupNodesMapping(); - } - - private void setupNodesMapping() throws IOException, ClassNotFoundException { - // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs) - HashMap mphToHash = new HashMap(); - @SuppressWarnings("unchecked") - Object2LongFunction mphMap = - (Object2LongFunction) BinIO.loadObject(path + ".mph"); - - InputStream nodeFile = new GZIPInputStream(new FileInputStream(path + ".nodes.csv.gz")); - Collection hashes = - new LineIterator(new FastBufferedReader(new InputStreamReader(nodeFile, "UTF-8"))) - .allLines(); - - for (MutableString h : hashes) { - String hash = new String(h.toString()); - long mph = mphMap.getLong(hash); - mphToHash.put(mph, hash); - } - - // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs) - long n = mphMap.size(); - long[][] bfsMap = LongBigArrays.newBigArray(n); - long loaded = BinIO.loadLongs(path + ".order", bfsMap); - if (loaded != n) { - throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded); - } - - // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs) - for (long id = 0; id < n; id++) { - String hash = mphToHash.get(id); - long node = LongBigArrays.get(bfsMap, id); - - hashToNode.put(hash, node); - nodeToHash.put(node, hash); - } + this.nodeIdMap = new NodeIdMap(graphPath); } public String getPath() { return path; } public long getNode(String hash) { - return hashToNode.get(hash); + return nodeIdMap.getNode(hash); } public String getHash(long node) { - return nodeToHash.get(node); + return nodeIdMap.getHash(node); } public long getNbNodes() { return graph.numNodes(); } public long getNbEdges() { return graph.numArcs(); } public LazyLongIterator successors(long node) { return graph.successors(node); } public long outdegree(long node) { return graph.outdegree(node); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java new file mode 100644 index 0000000..65884e5 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java @@ -0,0 +1,21 @@ +package org.softwareheritage.graph; + +// TODO: decide on how to do the disk-based node id map +public class NodeIdMap { + String graphPath; + + public NodeIdMap(String graphPath) { + this.graphPath = graphPath; + } + + public long getNode(String hash) { + return 42; + } + + public String getHash(long node) { + return null; + } + + public void dump() { + } +}