diff --git a/docs/api.rst b/docs/api.rst --- a/docs/api.rst +++ b/docs/api.rst @@ -37,6 +37,8 @@ timings metadata in addition to the result: - ``traversal``: time in seconds to do the actual graph traversal. +- ``traversal_normalized``: time in seconds to do the traversal relative to the + output size. - ``pid2node``: time in seconds to convert input PID to node id. - ``node2pid``: time in seconds to convert output node ids to PIDs. @@ -73,6 +75,7 @@ "meta": { "timings": { "traversal": 0.002942681, + "traversal_normalized": 0.000906440, "pid2node": 0.000178051, "node2pid": 0.000956569 } @@ -111,6 +114,7 @@ "meta": { "timings": { "traversal": 0.002942681, + "traversal_normalized": 0.000906440, "pid2node": 0.000178051, "node2pid": 0.000956569 } @@ -157,6 +161,7 @@ "meta": { "timings": { "traversal": 0.002942681, + "traversal_normalized": 0.000906440, "pid2node": 0.000178051, "node2pid": 0.000956569 } @@ -200,6 +205,7 @@ "meta": { "timings": { "traversal": 0.002942681, + "traversal_normalized": 0.000906440, "pid2node": 0.000178051, "node2pid": 0.000956569 } @@ -229,6 +235,7 @@ "meta": { "timings" : { "traversal": 0.002942681, + "traversal_normalized": 0.000906440, "pid2node": 0.000178051, "node2pid": 0.000956569 } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -53,6 +53,8 @@ public class Timings { /** Time in seconds to do the traversal */ public double traversal; + /** Time in seconds to do the traversal relative to the size of the visit output */ + public double traversalNormalized; /** Time in seconds to convert input SWH PID to node id */ public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ @@ -142,6 +144,7 @@ startTime = Timing.start(); ArrayList<Long> nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.timings.traversalNormalized = output.meta.timings.traversal / nodeIds.size(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -169,6 +172,7 @@ startTime = Timing.start(); ArrayList<Long> nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.timings.traversalNormalized = output.meta.timings.traversal / nodeIds.size(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -218,6 +222,8 @@ } } + output.meta.timings.traversalNormalized = output.meta.timings.traversal / nodeIds.size(); + startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); @@ -244,6 +250,7 @@ startTime = Timing.start(); ArrayList<Long> nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.timings.traversalNormalized = output.meta.timings.traversal / nodeIds.size(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); @@ -272,6 +279,7 @@ startTime = Timing.start(); ArrayList<ArrayList<Long>> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.timings.traversalNormalized = output.meta.timings.traversal / paths.size(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -0,0 +1,47 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.utils.Random; + +/** + * Benchmark Software Heritage browsing use-cases scenarios: <a + * href="https://docs.softwareheritage.org/devel/swh-graph/use-cases.html">use cases</a>. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Browsing { + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final long seed = 42; + final int nbNodes = 100_000; + Random random = new Random(seed); + long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR); + long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV); + + Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); + Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); + + System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("\n'ls' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); + System.out.println("\n'ls -R' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); + System.out.println("\n'git log' use-case"); + Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java new file mode 100644 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java @@ -0,0 +1,49 @@ +package org.softwareheritage.graph.benchmark; + +import java.util.ArrayList; +import java.util.function.Function; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.utils.Statistics; + +/** + * Benchmark common utility functions. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Common { + /** + * Times a specific endpoint and prints aggregated statistics. + * + * @param graph compressed graph used in the benchmark + * @param nodeIds node ids to use as starting point for the endpoint traversal + * @param operation endpoint function to benchmark + */ + public static void timeEndpoint( + Graph graph, long[] nodeIds, Function<SwhPID, Endpoint.Output> operation) { + ArrayList<Double> timings = new ArrayList<>(); + ArrayList<Double> timingsNormalized = new ArrayList<>(); + + for (long nodeId : nodeIds) { + SwhPID swhPID = graph.getSwhPID(nodeId); + + Endpoint.Output output = operation.apply(swhPID); + + timings.add(output.meta.timings.traversal); + timingsNormalized.add(output.meta.timings.traversalNormalized); + } + + System.out.println("timings:"); + Statistics stats = new Statistics(timings); + stats.printAll(); + + System.out.println("timings normalized:"); + Statistics statsNormalized = new Statistics(timingsNormalized); + statsNormalized.printAll(); + } +}