Page MenuHomeSoftware Heritage

D1846.id6210.diff
No OneTemporary

D1846.id6210.diff

diff --git a/docs/api.rst b/docs/api.rst
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -30,15 +30,19 @@
nodes, or from directories nodes.
- ``"*:rel"`` node types allowing all edges to releases.
-Timings
-~~~~~~~
+Metadata
+--------
-When configured to do so (see the server's README), the server can provide
-timings metadata in addition to the result:
+Extra metadata are given in addition to the result:
-- ``traversal``: time in seconds to do the actual graph traversal.
-- ``pid2node``: time in seconds to convert input PID to node id.
-- ``node2pid``: time in seconds to convert output node ids to PIDs.
+- ``timings``: only when configured to do so (see the server's README):
+
+ - ``traversal``: time in seconds to do the actual graph traversal.
+ - ``pid2node``: time in seconds to convert input PID to node id.
+ - ``node2pid``: time in seconds to convert output node ids to PIDs.
+
+- ``nb_edges_accessed``: number of edges accessed during the traversal
+ operation.
Leaves
------
@@ -75,7 +79,8 @@
"traversal": 0.002942681,
"pid2node": 0.000178051,
"node2pid": 0.000956569
- }
+ },
+ "nb_edges_accessed": 12
}
}
@@ -113,7 +118,8 @@
"traversal": 0.002942681,
"pid2node": 0.000178051,
"node2pid": 0.000956569
- }
+ },
+ "nb_edges_accessed": 12
}
}
@@ -159,7 +165,8 @@
"traversal": 0.002942681,
"pid2node": 0.000178051,
"node2pid": 0.000956569
- }
+ },
+ "nb_edges_accessed": 12
}
}
@@ -202,7 +209,8 @@
"traversal": 0.002942681,
"pid2node": 0.000178051,
"node2pid": 0.000956569
- }
+ },
+ "nb_edges_accessed": 12
}
}
@@ -231,7 +239,8 @@
"traversal": 0.002942681,
"pid2node": 0.000178051,
"node2pid": 0.000956569
- }
+ },
+ "nb_edges_accessed": 12
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
--- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -6,7 +6,7 @@
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
-import org.softwareheritage.graph.utils.Timing;
+import org.softwareheritage.graph.benchmark.utils.Timing;
/**
* REST API endpoints wrapper functions.
@@ -42,9 +42,12 @@
public class Meta {
/** Operations timings */
public Timings timings;
+ /** Number of edges accessed during traversal */
+ public long nbEdgesAccessed;
public Meta() {
this.timings = new Timings();
+ this.nbEdgesAccessed = 0;
}
/**
@@ -142,6 +145,7 @@
startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.leaves(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
+ output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
@@ -169,6 +173,7 @@
startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.neighbors(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
+ output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
@@ -218,6 +223,8 @@
}
}
+ output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed();
+
startTime = Timing.start();
output.result = convertNodesToSwhPath(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
@@ -244,6 +251,7 @@
startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.visitNodes(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
+ output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
@@ -272,6 +280,7 @@
startTime = Timing.start();
ArrayList<ArrayList<Long>> paths = traversal.visitPaths(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
+ output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed();
startTime = Timing.start();
output.result = convertPathsToSwhPIDs(paths);
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -40,6 +40,8 @@
LongArrayBitVector visited;
/** LongBigArray storing parent node id for each nodes during a traversal */
long[][] nodeParent;
+ /** Number of edges accessed during traversal */
+ long nbEdgesAccessed;
/**
* Constructor.
@@ -61,6 +63,16 @@
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
+ this.nbEdgesAccessed = 0;
+ }
+
+ /**
+ * Returns number of accessed edges during traversal.
+ *
+ * @return number of edges accessed in last traversal
+ */
+ public long getnbEdgesAccessed() {
+ return nbEdgesAccessed;
}
/**
@@ -73,6 +85,7 @@
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
+ this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
@@ -83,6 +96,7 @@
long neighborsCnt = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
+ nbEdgesAccessed++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
@@ -105,8 +119,10 @@
*/
public ArrayList<Long> neighbors(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
+ this.nbEdgesAccessed = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
nodeIds.add(neighborNodeId);
+ nbEdgesAccessed++;
}
return nodeIds;
}
@@ -121,6 +137,7 @@
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
+ this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
@@ -130,6 +147,7 @@
nodeIds.add(currentNodeId);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
+ nbEdgesAccessed++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
@@ -149,6 +167,7 @@
public ArrayList<ArrayList<Long>> visitPaths(long srcNodeId) {
ArrayList<ArrayList<Long>> paths = new ArrayList<>();
Stack<Long> currentPath = new Stack<Long>();
+ this.nbEdgesAccessed = 0;
visitPathsInternal(srcNodeId, paths, currentPath);
return paths;
}
@@ -168,6 +187,7 @@
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
visitPathsInternal(neighborNodeId, paths, currentPath);
visitedNeighbors++;
+ this.nbEdgesAccessed++;
}
if (visitedNeighbors == 0) {
@@ -216,6 +236,7 @@
private <T> long walkInternalDfs(long srcNodeId, T dst) {
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
+ this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
@@ -227,6 +248,7 @@
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
+ nbEdgesAccessed++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
@@ -248,6 +270,7 @@
private <T> long walkInternalBfs(long srcNodeId, T dst) {
Queue<Long> queue = new LinkedList<Long>();
this.visited.fill(false);
+ this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.set(srcNodeId);
@@ -259,6 +282,7 @@
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
+ nbEdgesAccessed++;
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
@@ -6,9 +6,9 @@
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
-import org.softwareheritage.graph.utils.Random;
-import org.softwareheritage.graph.utils.Statistics;
-import org.softwareheritage.graph.utils.Timing;
+import org.softwareheritage.graph.benchmark.utils.Random;
+import org.softwareheritage.graph.benchmark.utils.Statistics;
+import org.softwareheritage.graph.benchmark.utils.Timing;
/**
* Benchmark to time edge access time.
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
new file mode 100644
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -0,0 +1,47 @@
+package org.softwareheritage.graph.benchmark;
+
+import java.io.IOException;
+
+import org.softwareheritage.graph.Endpoint;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.Node;
+import org.softwareheritage.graph.benchmark.Common;
+import org.softwareheritage.graph.benchmark.utils.Random;
+
+/**
+ * Benchmark Software Heritage browsing use-cases scenarios: <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/use-cases.html">use cases</a>.
+ *
+ * @author Thibault Allançon
+ * @version 0.0.1
+ * @since 0.0.1
+ */
+
+public class Browsing {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
+ public static void main(String[] args) throws IOException {
+ String path = args[0];
+ Graph graph = new Graph(path);
+
+ final long seed = 42;
+ final int nbNodes = 100_000;
+ Random random = new Random(seed);
+ long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR);
+ long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV);
+
+ Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
+ Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
+
+ System.out.println("Used " + nbNodes + " random nodes (results are in seconds):");
+ System.out.println("\n'ls' use-case");
+ Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors);
+ System.out.println("\n'ls -R' use-case");
+ Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths);
+ System.out.println("\n'git log' use-case");
+ Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes);
+ }
+}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java
new file mode 100644
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java
@@ -0,0 +1,51 @@
+package org.softwareheritage.graph.benchmark;
+
+import java.util.ArrayList;
+import java.util.function.Function;
+
+import org.softwareheritage.graph.Endpoint;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhPID;
+import org.softwareheritage.graph.benchmark.utils.Statistics;
+
+/**
+ * Benchmark common utility functions.
+ *
+ * @author Thibault Allançon
+ * @version 0.0.1
+ * @since 0.0.1
+ */
+
+public class Common {
+ /**
+ * Times a specific endpoint and prints aggregated statistics.
+ *
+ * @param graph compressed graph used in the benchmark
+ * @param nodeIds node ids to use as starting point for the endpoint traversal
+ * @param operation endpoint function to benchmark
+ */
+ public static void timeEndpoint(
+ Graph graph, long[] nodeIds, Function<SwhPID, Endpoint.Output> operation) {
+ ArrayList<Double> timings = new ArrayList<>();
+ ArrayList<Double> timingsNormalized = new ArrayList<>();
+
+ for (long nodeId : nodeIds) {
+ SwhPID swhPID = graph.getSwhPID(nodeId);
+
+ Endpoint.Output output = operation.apply(swhPID);
+
+ timings.add(output.meta.timings.traversal);
+ if (output.meta.nbEdgesAccessed != 0) {
+ timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
+ }
+ }
+
+ System.out.println("timings:");
+ Statistics stats = new Statistics(timings);
+ stats.printAll();
+
+ System.out.println("timings normalized:");
+ Statistics statsNormalized = new Statistics(timingsNormalized);
+ statsNormalized.printAll();
+ }
+}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
rename from java/server/src/main/java/org/softwareheritage/graph/utils/Random.java
rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
--- a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
@@ -1,4 +1,4 @@
-package org.softwareheritage.graph.utils;
+package org.softwareheritage.graph.benchmark.utils;
import java.util.PrimitiveIterator;
diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
rename from java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java
rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
--- a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
@@ -1,4 +1,4 @@
-package org.softwareheritage.graph.utils;
+package org.softwareheritage.graph.benchmark.utils;
import java.util.ArrayList;
diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
rename from java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java
rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
--- a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
@@ -1,4 +1,4 @@
-package org.softwareheritage.graph.utils;
+package org.softwareheritage.graph.benchmark.utils;
/**
* Time measurement utility class.

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 8:01 PM (5 d, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221857

Event Timeline