diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -3,6 +3,7 @@ import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; @@ -129,14 +130,14 @@ } /** - * Returns list of successors of a node. + * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id - * @return list of successors of the node, specified as a fastutil LongBigArrays + * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator */ - public long[][] successors(long nodeId) { - return graph.successorBigArray(nodeId); + public LazyLongIterator successors(long nodeId) { + return graph.successors(nodeId); } /** @@ -150,14 +151,14 @@ } /** - * Returns list of predecessors of a node. + * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id - * @return list of successors of the node, specified as a fastutil LongBigArrays + * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator */ - public long[][] predecessors(long nodeId) { - return graphTransposed.successorBigArray(nodeId); + public LazyLongIterator predecessors(long nodeId) { + return graphTransposed.successors(nodeId); } /** @@ -182,14 +183,14 @@ } /** - * Returns the neighbors of a node, depending on graph orientation. + * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph - * @return list of successors of the node, specified as a fastutil LongBigArrays + * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator */ - public long[][] neighbors(long nodeId, boolean useTransposed) { + public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -2,7 +2,7 @@ import java.util.Iterator; -import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; @@ -57,33 +57,24 @@ */ public class NeighborsIterator implements Iterator { - long nextNeighborIdx; - long nbNeighbors; - // LongBigArrays to support 64-bit indexing. - long[][] neighbors; + LazyLongIterator neighbors; + long nextNeighborId; public NeighborsIterator() { - this.nextNeighborIdx = -1; - this.nbNeighbors = graph.degree(srcNodeId, useTransposed); this.neighbors = graph.neighbors(srcNodeId, useTransposed); + this.nextNeighborId = -1; } public boolean hasNext() { // Case 1: no edge restriction, bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { - if (nextNeighborIdx + 1 < nbNeighbors) { - nextNeighborIdx++; - return true; - } else { - return false; - } + nextNeighborId = neighbors.nextLong(); + return (nextNeighborId != -1); } // Case 2: edge restriction, look ahead for next neighbor - for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) { - long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx); - if (edges.isAllowed(srcNodeId, nextNodeId)) { - nextNeighborIdx = lookAheadIdx; + while ((nextNeighborId = neighbors.nextLong()) != -1) { + if (edges.isAllowed(srcNodeId, nextNeighborId)) { return true; } } @@ -91,8 +82,7 @@ } public Long next() { - long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx); - return nextNodeId; + return nextNeighborId; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -0,0 +1,49 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; +import java.util.ArrayList; + +import it.unimi.dsi.big.webgraph.LazyLongIterator; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.utils.Random; +import org.softwareheritage.graph.utils.Statistics; +import org.softwareheritage.graph.utils.Timing; + +/** + * Benchmark to time edge access time. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class AccessEdge { + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final long seed = 42; + final long nbNodes = 1_000_000; + Random random = new Random(seed); + long[] nodeIds = random.generateNodeIds(graph, nbNodes); + + ArrayList timings = new ArrayList<>(); + for (long nodeId : nodeIds) { + long startTime = Timing.start(); + LazyLongIterator neighbors = graph.successors(nodeId); + long firstNeighbor = neighbors.nextLong(); + double duration = (double) Timing.stop(startTime); + timings.add(duration); + } + + System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); + Statistics stats = new Statistics(timings); + stats.printAll(); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java @@ -0,0 +1,43 @@ +package org.softwareheritage.graph.utils; + +import org.softwareheritage.graph.Graph; + +/** + * Random related utility class. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Random { + /** Internal pseudorandom generator */ + java.util.Random random; + + /** + * Constructor. + */ + public Random() { + this.random = new java.util.Random(); + } + + /** + * Constructor. + * + * @param seed random generator seed + */ + public Random(long seed) { + this.random = new java.util.Random(seed); + } + + /** + * Generates random node ids. + * + * @param graph graph from which node ids will be picked + * @param nbNodes number of node ids to generate + * @return an array of random node ids + */ + public long[] generateNodeIds(Graph graph, long nbNodes) { + return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java @@ -0,0 +1,89 @@ +package org.softwareheritage.graph.utils; + +import java.util.ArrayList; + +/** + * Compute various statistics on a list of values. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Statistics { + /** Input values */ + ArrayList values; + + /** + * Constructor. + * + * @param values input values + */ + public Statistics(ArrayList values) { + this.values = values; + } + + /** + * Returns the minimum value. + * + * @return minimum value + */ + public double getMin() { + double min = Double.POSITIVE_INFINITY; + for (double v : values) { + min = Math.min(min, v); + } + return min; + } + + /** + * Returns the maximum value. + * + * @return maximum value + */ + public double getMax() { + double max = Double.NEGATIVE_INFINITY; + for (double v : values) { + max = Math.max(max, v); + } + return max; + } + + /** + * Computes the average. + * + * @return average value + */ + public double getAverage() { + double sum = 0; + for (double v : values) { + sum += v; + } + return sum / (double) values.size(); + } + + /** + * Computes the standard deviation. + * + * @return standard deviation value + */ + public double getStandardDeviation() { + double average = getAverage(); + double variance = 0; + for (double v : values) { + variance += (v - average) * (v - average); + } + variance /= (double) values.size(); + return Math.sqrt(variance); + } + + /** + * Computes and prints all statistical values. + */ + public void printAll() { + System.out.println("min value: " + getMin()); + System.out.println("max value: " + getMax()); + System.out.println("average: " + getAverage()); + System.out.println("standard deviation: " + getStandardDeviation()); + } +}