diff --git a/docs/api.rst b/docs/api.rst --- a/docs/api.rst +++ b/docs/api.rst @@ -30,15 +30,19 @@ nodes, or from directories nodes. - ``"*:rel"`` node types allowing all edges to releases. -Timings -~~~~~~~ +Metadata +-------- -When configured to do so (see the server's README), the server can provide -timings metadata in addition to the result: +Extra metadata are given in addition to the result: -- ``traversal``: time in seconds to do the actual graph traversal. -- ``pid2node``: time in seconds to convert input PID to node id. -- ``node2pid``: time in seconds to convert output node ids to PIDs. +- ``timings``: only when configured to do so (see the server's README): + + - ``traversal``: time in seconds to do the actual graph traversal. + - ``pid2node``: time in seconds to convert input PID to node id. + - ``node2pid``: time in seconds to convert output node ids to PIDs. + +- ``nb_edges_accessed``: number of edges accessed during the traversal + operation. Leaves ------ @@ -75,7 +79,8 @@ "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } @@ -113,7 +118,8 @@ "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } @@ -159,7 +165,8 @@ "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } @@ -202,7 +209,8 @@ "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } @@ -231,7 +239,8 @@ "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -42,9 +42,12 @@ public class Meta { /** Operations timings */ public Timings timings; + /** Number of edges accessed during traversal */ + public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); + this.nbEdgesAccessed = 0; } /** @@ -142,6 +145,7 @@ startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -169,6 +173,7 @@ startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -218,6 +223,8 @@ } } + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); + startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); @@ -244,6 +251,7 @@ startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -272,6 +280,7 @@ startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -40,6 +40,8 @@ LongArrayBitVector visited; /** LongBigArray storing parent node id for each nodes during a traversal */ long[][] nodeParent; + /** Number of edges accessed during traversal */ + long nbEdgesAccessed; /** * Constructor. @@ -61,6 +63,16 @@ long nbNodes = graph.getNbNodes(); this.visited = LongArrayBitVector.ofLength(nbNodes); this.nodeParent = LongBigArrays.newBigArray(nbNodes); + this.nbEdgesAccessed = 0; + } + + /** + * Returns number of accessed edges during traversal. + * + * @return number of edges accessed in last traversal + */ + public long getnbEdgesAccessed() { + return nbEdgesAccessed; } /** @@ -73,6 +85,7 @@ ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); @@ -83,6 +96,7 @@ long neighborsCnt = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { neighborsCnt++; + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); @@ -105,8 +119,10 @@ */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); + this.nbEdgesAccessed = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { nodeIds.add(neighborNodeId); + nbEdgesAccessed++; } return nodeIds; } @@ -121,6 +137,7 @@ ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); @@ -130,6 +147,7 @@ nodeIds.add(currentNodeId); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); @@ -149,6 +167,7 @@ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); Stack currentPath = new Stack(); + this.nbEdgesAccessed = 0; visitPathsInternal(srcNodeId, paths, currentPath); return paths; } @@ -168,6 +187,7 @@ for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { visitPathsInternal(neighborNodeId, paths, currentPath); visitedNeighbors++; + this.nbEdgesAccessed++; } if (visitedNeighbors == 0) { @@ -216,6 +236,7 @@ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); @@ -227,6 +248,7 @@ } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); @@ -248,6 +270,7 @@ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.visited.fill(false); + this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.set(srcNodeId); @@ -259,6 +282,7 @@ } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -0,0 +1,47 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.utils.Random; + +/** + * Benchmark Software Heritage browsing use-cases scenarios: use cases. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Browsing { + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final long seed = 42; + final int nbNodes = 100_000; + Random random = new Random(seed); + long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR); + long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV); + + Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); + Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); + + System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("\n'ls' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); + System.out.println("\n'ls -R' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); + System.out.println("\n'git log' use-case"); + Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java @@ -0,0 +1,51 @@ +package org.softwareheritage.graph.benchmark; + +import java.util.ArrayList; +import java.util.function.Function; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.utils.Statistics; + +/** + * Benchmark common utility functions. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Common { + /** + * Times a specific endpoint and prints aggregated statistics. + * + * @param graph compressed graph used in the benchmark + * @param nodeIds node ids to use as starting point for the endpoint traversal + * @param operation endpoint function to benchmark + */ + public static void timeEndpoint( + Graph graph, long[] nodeIds, Function operation) { + ArrayList timings = new ArrayList<>(); + ArrayList timingsNormalized = new ArrayList<>(); + + for (long nodeId : nodeIds) { + SwhPID swhPID = graph.getSwhPID(nodeId); + + Endpoint.Output output = operation.apply(swhPID); + + timings.add(output.meta.timings.traversal); + if (output.meta.nbEdgesAccessed != 0) { + timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + } + } + + System.out.println("timings:"); + Statistics stats = new Statistics(timings); + stats.printAll(); + + System.out.println("timings normalized:"); + Statistics statsNormalized = new Statistics(timingsNormalized); + statsNormalized.printAll(); + } +}