diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java index 4b4b75b..5fe688e 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,41 +1,38 @@ package org.softwareheritage.graph; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import io.javalin.Javalin; -import org.softwareheritage.graph.Dataset; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.algo.Stats; public class App { - public static void main(String[] args) + public static void main(String[] args) throws IOException, Exception { Path path = Paths.get(args[0]); Graph graph = new Graph(path.toString()); - Stats stats = new Stats(graph); Javalin app = Javalin.create().start(5010); - app.get("/stats/:dataset", ctx -> { + app.get("/stats/", ctx -> { try { - String dataset = ctx.pathParam("dataset").toUpperCase(); - ctx.json(stats.getStats(Dataset.Name.valueOf(dataset))); - } catch (IllegalArgumentException | IOException e) { + ctx.json(stats); + } catch (IllegalArgumentException e) { ctx.status(404); } catch (Exception e) { ctx.status(400); ctx.result(e.toString()); } }); app.error(404, ctx -> { ctx.result("Not found"); }); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java deleted file mode 100644 index cdf0e14..0000000 --- a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java +++ /dev/null @@ -1,126 +0,0 @@ -package org.softwareheritage.graph; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Collection; -import java.util.HashMap; -import java.util.zip.GZIPInputStream; - -import it.unimi.dsi.big.webgraph.BVGraph; -import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.fastutil.io.BinIO; -import it.unimi.dsi.fastutil.longs.LongBigArrays; -import it.unimi.dsi.fastutil.objects.Object2LongFunction; -import it.unimi.dsi.io.FastBufferedReader; -import it.unimi.dsi.io.LineIterator; -import it.unimi.dsi.lang.MutableString; - -public class Dataset -{ - public enum Name { - DIR_TO_DIR, - DIR_TO_FILE, - DIR_TO_REV, - ORIGIN_TO_SNAPSHOT, - RELEASE_TO_OBJ, - REV_TO_DIR, - REV_TO_REV, - SNAPSHOT_TO_OBJ - } - - BVGraph graph; - String path; - HashMap hashToNode; - HashMap nodeToHash; - - public Dataset(String datasetPath) - { - try { - this.graph = BVGraph.load(datasetPath); - this.path = datasetPath; - setupNodesMapping(); - } catch (Exception e) { - System.out.println("[WARNING] Could not load dataset " + datasetPath + ": " + e); - } - } - - void setupNodesMapping() throws IOException, ClassNotFoundException - { - this.hashToNode = new HashMap(); - this.nodeToHash = new HashMap(); - - // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs) - HashMap mphToHash = new HashMap(); - @SuppressWarnings("unchecked") - Object2LongFunction mphMap = - (Object2LongFunction) BinIO.loadObject(path + ".mph"); - - InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz"); - Collection hashes = - new LineIterator( - new FastBufferedReader( - new InputStreamReader( - new GZIPInputStream(nodeFile), "UTF-8"))).allLines(); - - for (MutableString h : hashes) - { - String hash = new String(h.toString()); - long mph = mphMap.getLong(hash); - mphToHash.put(mph, hash); - } - - // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs) - long n = mphMap.size(); - long[][] bfsMap = LongBigArrays.newBigArray(n); - long loaded = BinIO.loadLongs(path + ".order", bfsMap); - if (loaded != n) - throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded); - - // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs) - for (long id = 0; id < n; id++) - { - String hash = mphToHash.get(id); - long node = LongBigArrays.get(bfsMap, id); - - hashToNode.put(hash, node); - nodeToHash.put(node, hash); - } - } - - public String getPath() - { - return path; - } - - public long getNode(String hash) - { - return hashToNode.get(hash); - } - - public String getHash(long node) - { - return nodeToHash.get(node); - } - - public long getNbNodes() - { - return graph.numNodes(); - } - - public long getNbEdges() - { - return graph.numArcs(); - } - - public LazyLongIterator successors(long node) - { - return graph.successors(node); - } - - public long outdegree(long node) - { - return graph.outdegree(node); - } -} diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java index 8c02922..fdf2b47 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,33 +1,110 @@ package org.softwareheritage.graph; -import java.util.EnumMap; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.HashMap; +import java.util.zip.GZIPInputStream; -import org.softwareheritage.graph.Dataset; +import it.unimi.dsi.big.webgraph.BVGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; +import it.unimi.dsi.lang.MutableString; public class Graph { - EnumMap graph; + BVGraph graph; String path; + HashMap hashToNode; + HashMap nodeToHash; - public Graph(String graphPath) + public Graph(String graphPath) throws Exception { - this.graph = new EnumMap(Dataset.Name.class); + this.graph = BVGraph.load(graphPath); this.path = graphPath; - if (!path.endsWith("/")) - path += "/"; + this.hashToNode = new HashMap(); + this.nodeToHash = new HashMap(); + setupNodesMapping(); + } + + private void setupNodesMapping() throws IOException, ClassNotFoundException + { + // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs) + HashMap mphToHash = new HashMap(); + @SuppressWarnings("unchecked") + Object2LongFunction mphMap = + (Object2LongFunction) BinIO.loadObject(path + ".mph"); + + InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz"); + Collection hashes = + new LineIterator( + new FastBufferedReader( + new InputStreamReader( + new GZIPInputStream(nodeFile), "UTF-8"))).allLines(); + + for (MutableString h : hashes) + { + String hash = new String(h.toString()); + long mph = mphMap.getLong(hash); + mphToHash.put(mph, hash); + } + + // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs) + long n = mphMap.size(); + long[][] bfsMap = LongBigArrays.newBigArray(n); + long loaded = BinIO.loadLongs(path + ".order", bfsMap); + if (loaded != n) + throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded); + + // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs) + for (long id = 0; id < n; id++) + { + String hash = mphToHash.get(id); + long node = LongBigArrays.get(bfsMap, id); + + hashToNode.put(hash, node); + nodeToHash.put(node, hash); + } + } + + public String getPath() + { + return path; + } + + public long getNode(String hash) + { + return hashToNode.get(hash); + } - for (Dataset.Name dataset : Dataset.Name.values()) - addDataset(dataset); + public String getHash(long node) + { + return nodeToHash.get(node); + } + + public long getNbNodes() + { + return graph.numNodes(); + } + + public long getNbEdges() + { + return graph.numArcs(); } - public void addDataset(Dataset.Name dataset) + public LazyLongIterator successors(long node) { - String datasetPath = path + dataset.name().toLowerCase(); - graph.put(dataset, new Dataset(datasetPath)); + return graph.successors(node); } - public Dataset getDataset(Dataset.Name dataset) + public long outdegree(long node) { - return graph.get(dataset); + return graph.outdegree(node); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java index cbbf994..59b0061 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -1,69 +1,55 @@ package org.softwareheritage.graph.algo; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.List; -import org.softwareheritage.graph.Dataset; import org.softwareheritage.graph.Graph; public class Stats { - class DatasetStat { - public long nbNodes; - public long nbEdges; - public double compressionRatio; - public double bitsPerNode; - public double bitsPerEdge; - public double avgLocality; - public long minIndegree; - public long maxIndegree; - public double avgIndegree; - public long minOutdegree; - public long maxOutdegree; - public double avgOutdegree; - } - - Graph graph; - - public Stats(Graph graph) + public long nbNodes; + public long nbEdges; + public double compressionRatio; + public double bitsPerNode; + public double bitsPerEdge; + public double avgLocality; + public long minIndegree; + public long maxIndegree; + public double avgIndegree; + public long minOutdegree; + public long maxOutdegree; + public double avgOutdegree; + + public Stats(Graph graph) throws IOException { - this.graph = graph; - } - - public DatasetStat getStats(Dataset.Name datasetName) throws IOException - { - Dataset dataset = graph.getDataset(datasetName); HashMap statsMap = new HashMap<>(); // Parse statistics from generated files - Path dotProperties = Paths.get(dataset.getPath() + ".properties"); - Path dotStats = Paths.get(dataset.getPath() + ".stats"); + Path dotProperties = Paths.get(graph.getPath() + ".properties"); + Path dotStats = Paths.get(graph.getPath() + ".stats"); List lines = Files.readAllLines(dotProperties); lines.addAll(Files.readAllLines(dotStats)); for (String line : lines) { String[] parts = line.split("="); if (parts.length == 2) statsMap.put(parts[0], parts[1]); } - DatasetStat stats = new DatasetStat(); - stats.nbNodes = Long.parseLong(statsMap.get("nodes")); - stats.nbEdges = Long.parseLong(statsMap.get("arcs")); - stats.compressionRatio = Double.parseDouble(statsMap.get("compratio")); - stats.bitsPerNode = Double.parseDouble(statsMap.get("bitspernode")); - stats.bitsPerEdge = Double.parseDouble(statsMap.get("bitsperlink")); - stats.avgLocality = Double.parseDouble(statsMap.get("avglocality")); - stats.minIndegree = Long.parseLong(statsMap.get("minindegree")); - stats.maxIndegree = Long.parseLong(statsMap.get("maxindegree")); - stats.avgIndegree = Double.parseDouble(statsMap.get("avgindegree")); - stats.minOutdegree = Long.parseLong(statsMap.get("minoutdegree")); - stats.maxOutdegree = Long.parseLong(statsMap.get("maxoutdegree")); - stats.avgOutdegree = Double.parseDouble(statsMap.get("avgoutdegree")); - - return stats; + this.nbNodes = Long.parseLong(statsMap.get("nodes")); + this.nbEdges = Long.parseLong(statsMap.get("arcs")); + this.compressionRatio = Double.parseDouble(statsMap.get("compratio")); + this.bitsPerNode = Double.parseDouble(statsMap.get("bitspernode")); + this.bitsPerEdge = Double.parseDouble(statsMap.get("bitsperlink")); + this.avgLocality = Double.parseDouble(statsMap.get("avglocality")); + this.minIndegree = Long.parseLong(statsMap.get("minindegree")); + this.maxIndegree = Long.parseLong(statsMap.get("maxindegree")); + this.avgIndegree = Double.parseDouble(statsMap.get("avgindegree")); + this.minOutdegree = Long.parseLong(statsMap.get("minoutdegree")); + this.maxOutdegree = Long.parseLong(statsMap.get("maxoutdegree")); + this.avgOutdegree = Double.parseDouble(statsMap.get("avgoutdegree")); } }