diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java index 1b7be1d..faf513d 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,195 +1,230 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to * PID for users results). However, since graph traversal can be restricted depending on the node * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full * PID lookup. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.NodeIdMap; * @see org.softwareheritage.graph.NodeTypesMap; */ public class Graph { /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.csv"; /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.csv"; /** File extension for the long node id to node typ map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ BVGraph graph; /** Transposed compressed graph (used for backward traversals) */ BVGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); this.path = path; this.nodeIdMap = new NodeIdMap(path, getNbNodes()); this.nodeTypesMap = new NodeTypesMap(path); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SwhId} node to long. * * @param swhId node specified as a {@link SwhId} * @return internal long node id * @see org.softwareheritage.graph.SwhId */ public long getNodeId(SwhId swhId) { return nodeIdMap.getNodeId(swhId); } /** * Converts long id node to {@link SwhId}. * * @param nodeId node specified as a long id * @return external SWH PID * @see org.softwareheritage.graph.SwhId */ public SwhId getSwhId(long nodeId) { return nodeIdMap.getSwhId(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ public long getNbNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ public long getNbEdges() { return graph.numArcs(); } /** * Returns list of successors of a node. * * @param nodeId node specified as a long id * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] successors(long nodeId) { return graph.successorBigArray(nodeId); } + /** + * Returns lazy iterator of successors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator lazySuccessors(long nodeId) { + return graph.successors(nodeId); + } + /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns list of predecessors of a node. * * @param nodeId node specified as a long id - * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] predecessors(long nodeId) { return graphTransposed.successorBigArray(nodeId); } + /** + * Returns lazy iterator of predecessors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator lazyPredecessors(long nodeId) { + return graphTransposed.successors(nodeId); + } + /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } /** * Returns the degree of a node, depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return degree of a node */ public long degree(long nodeId, boolean useTransposed) { return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } /** - * Returns the neighbors of a node, depending on graph orientation. + * Returns the neighbors of a node (as a list), depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph - * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } + + /** + * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. + * + * @param nodeId node specified as a long id + * @param useTransposed boolean value to use transposed graph + * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator lazyNeighbors(long nodeId, boolean useTransposed) { + return (useTransposed) ? lazyPredecessors(nodeId) : lazySuccessors(nodeId); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java new file mode 100644 index 0000000..a263cfb --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -0,0 +1,49 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; +import java.util.ArrayList; + +import it.unimi.dsi.big.webgraph.LazyLongIterator; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.utils.Random; +import org.softwareheritage.graph.utils.Statistics; +import org.softwareheritage.graph.utils.Timing; + +/** + * Benchmark to time edge access time. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class AccessEdge { + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final long seed = 42; + final long nbNodes = 1_000_000; + Random random = new Random(seed); + long[] nodeIds = random.generateNodeIds(graph, nbNodes); + + ArrayList timings = new ArrayList<>(); + for (long nodeId : nodeIds) { + long startTime = Timing.start(); + LazyLongIterator neighbors = graph.lazySuccessors(nodeId); + long firstNeighbor = neighbors.nextLong(); + double duration = (double) Timing.stop(startTime); + timings.add(duration); + } + + System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); + Statistics stats = new Statistics(timings); + stats.printAll(); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java new file mode 100644 index 0000000..93fdf78 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java @@ -0,0 +1,43 @@ +package org.softwareheritage.graph.utils; + +import org.softwareheritage.graph.Graph; + +/** + * Random related utility class. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Random { + /** Internal pseudorandom generator */ + java.util.Random random; + + /** + * Constructor. + */ + public Random() { + this.random = new java.util.Random(); + } + + /** + * Constructor. + * + * @param seed random generator seed + */ + public Random(long seed) { + this.random = new java.util.Random(seed); + } + + /** + * Generates random node ids. + * + * @param graph graph from which node ids will be picked + * @param nbNodes number of node ids to generate + * @return an array of random node ids + */ + public long[] generateNodeIds(Graph graph, long nbNodes) { + return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java new file mode 100644 index 0000000..ff9db25 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java @@ -0,0 +1,89 @@ +package org.softwareheritage.graph.utils; + +import java.util.ArrayList; + +/** + * Compute various statistics on a list of values. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Statistics { + /** Input values */ + ArrayList values; + + /** + * Constructor. + * + * @param values input values + */ + public Statistics(ArrayList values) { + this.values = values; + } + + /** + * Returns the minimum value. + * + * @return minimum value + */ + public double getMin() { + double min = Double.POSITIVE_INFINITY; + for (double v : values) { + min = Math.min(min, v); + } + return min; + } + + /** + * Returns the maximum value. + * + * @return maximum value + */ + public double getMax() { + double max = Double.NEGATIVE_INFINITY; + for (double v : values) { + max = Math.max(max, v); + } + return max; + } + + /** + * Computes the average. + * + * @return average value + */ + public double getAverage() { + double sum = 0; + for (double v : values) { + sum += v; + } + return sum / (double) values.size(); + } + + /** + * Computes the standard deviation. + * + * @return standard deviation value + */ + public double getStandardDeviation() { + double average = getAverage(); + double variance = 0; + for (double v : values) { + variance += (v - average) * (v - average); + } + variance /= (double) values.size(); + return Math.sqrt(variance); + } + + /** + * Computes and prints all statistical values. + */ + public void printAll() { + System.out.println("min value: " + getMin()); + System.out.println("max value: " + getMax()); + System.out.println("average: " + getAverage()); + System.out.println("standard deviation: " + getStandardDeviation()); + } +}