diff --git a/docs/api.rst b/docs/api.rst index 3ed7802..4ddbb44 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,273 +1,282 @@ Graph traversal API =================== Terminology ----------- This API uses the following notions: - **Node**: a node in the `Software Heritage graph `_, represented by a `persistent identifier `_ (abbreviated as *SWH PID*, or simply *PID*). - **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``, ``rel``, ``rev``, ``snp``), or ``*`` for all node types. - **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and ``dst`` are node types, or ``*`` for all edge types. Examples ~~~~~~~~ - ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of type content containing the full text of the GPL3 license. - ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of type revision corresponding to the commit in Linux that merged the 'x86/urgent' branch on 31 December 2017. - ``"dir:dir,dir:cnt"`` node types allowing edges from directories to directories nodes, or directories to contents nodes. - ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions nodes, or from directories nodes. - ``"*:rel"`` node types allowing all edges to releases. -Timings -~~~~~~~ +Metadata +-------- -When configured to do so (see the server's README), the server can provide -timings metadata in addition to the result: +Extra metadata are given in addition to the result: -- ``traversal``: time in seconds to do the actual graph traversal. -- ``pid2node``: time in seconds to convert input PID to node id. -- ``node2pid``: time in seconds to convert output node ids to PIDs. +- ``timings``: only when configured to do so (see the server's README): + + - ``traversal``: time in seconds to do the actual graph traversal. + - ``pid2node``: time in seconds to convert input PID to node id. + - ``node2pid``: time in seconds to convert output node ids to PIDs. + +- ``nb_edges_accessed``: number of edges accessed during the traversal + operation. Leaves ------ .. http:get:: /graph/leaves/:src Performs a graph traversal and returns the leaves of the subgraph rooted at the specified source node. :param string src: source node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "result": [ "swh:1:cnt:669ac7c32292798644b21dbb5a0dc657125f444d", "swh:1:cnt:da4cb28febe66172a9fdf1a235525ae6c00cde1d", ... ], "meta": { "timings": { "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } Neighbors --------- .. http:get:: /graph/neighbors/:src Returns node direct neighbors (linked with exactly one edge) in the graph. :param string src: source node specified as a SWH PID :query string edges: edges types allowed to be listed as neighbors; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "result": [ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", ... ], "meta": { "timings": { "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } Walk ---- .. http:get:: /graph/walk/:src/:dst Performs a graph traversal and returns the first found path from source to destination (final destination node included). :param string src: starting node specified as a SWH PID :param string dst: destination node, either as a node PID or a node type. The traversal will stop at the first node encountered matching the desired destination. :query string edges: edges types the traversal can follow; default to ``"*"`` :query string traversal: traversal algorithm; can be either ``dfs`` or ``bfs``, default to ``dfs`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "result": [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", "swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c", "swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02", ... ], "meta": { "timings": { "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } Visit ----- .. http:get:: /graph/visit/nodes/:src .. http:get:: /graph/visit/paths/:src Performs a graph traversal and returns explored nodes or paths (in the order of the traversal). :param string src: starting node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http GET /graph/visit/nodes/ HTTP/1.1 200 OK Content-Type: application/json { "result": [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], "meta": { "timings": { "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } .. sourcecode:: http GET /graph/visit/paths/ HTTP/1.1 200 OK Content-Type: application/json { "result": [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ], "meta": { "timings" : { "traversal": 0.002942681, "pid2node": 0.000178051, "node2pid": 0.000956569 - } + }, + "nb_edges_accessed": 12 } } Stats ----- .. http:get:: /graph/stats Returns statistics on the compressed graph. :statuscode 200: success .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "counts": { "nodes": 16222788, "edges": 9907464 }, "ratios": { "compression": 0.367, "bits_per_node": 5.846, "bits_per_edge": 9.573, "avg_locality": 270.369 }, "indegree": { "min": 0, "max": 12382, "avg": 0.6107127825377487 }, "outdegree": { "min": 0, "max": 1, "avg": 0.6107127825377487 } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index 3dd7fa4..fcbce3a 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,282 +1,291 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; import org.softwareheritage.graph.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; + /** Number of edges accessed during traversal */ + public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); + this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWH PID to node id */ public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ public double node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public Output leaves(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public Output neighbors(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public Output walk(SwhPID src, String dstFmt, String algorithm) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); + startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public Output visitNodes(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public Output visitPaths(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java index 65f5fb3..503e068 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -1,310 +1,334 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.Queue; import java.util.Stack; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.longs.LongBigArrays; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage * PID. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.Endpoint */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Bit array storing if we have visited a node */ LongArrayBitVector visited; /** LongBigArray storing parent node id for each nodes during a traversal */ long[][] nodeParent; + /** Number of edges accessed during traversal */ + long nbEdgesAccessed; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } this.graph = graph; this.useTransposed = (direction.equals("backward")); this.edges = new AllowedEdges(graph, edgesFmt); long nbNodes = graph.getNbNodes(); this.visited = LongArrayBitVector.ofLength(nbNodes); this.nodeParent = LongBigArrays.newBigArray(nbNodes); + this.nbEdgesAccessed = 0; + } + + /** + * Returns number of accessed edges during traversal. + * + * @return number of edges accessed in last traversal + */ + public long getnbEdgesAccessed() { + return nbEdgesAccessed; } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { neighborsCnt++; + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } if (neighborsCnt == 0) { nodeIds.add(currentNodeId); } } return nodeIds; } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); + this.nbEdgesAccessed = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { nodeIds.add(neighborNodeId); + nbEdgesAccessed++; } return nodeIds; } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); nodeIds.add(currentNodeId); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } } return nodeIds; } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); Stack currentPath = new Stack(); + this.nbEdgesAccessed = 0; visitPathsInternal(srcNodeId, paths, currentPath); return paths; } /** * Internal recursive function of {@link #visitPaths}. * * @param currentNodeId current node * @param paths list of currently stored paths * @param currentPath current path as node ids */ private void visitPathsInternal( long currentNodeId, ArrayList> paths, Stack currentPath) { currentPath.push(currentNodeId); long visitedNeighbors = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { visitPathsInternal(neighborNodeId, paths, currentPath); visitedNeighbors++; + this.nbEdgesAccessed++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList(); for (long nodeId : currentPath) { path.add(nodeId); } paths.add(path); } currentPath.pop(); } /** * Performs a graph traversal and returns the first found path from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String algorithm) { long dstNodeId = -1; if (algorithm.equals("dfs")) { dstNodeId = walkInternalDfs(srcNodeId, dst); } else if (algorithm.equals("bfs")) { dstNodeId = walkInternalBfs(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); } if (dstNodeId == -1) { throw new IllegalArgumentException("Unable to find destination point: " + dst); } ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); return nodeIds; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.visited.fill(false); + this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.visited.fill(false); + this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.set(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + nbEdgesAccessed++; if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = LongBigArrays.get(nodeParent, currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java new file mode 100644 index 0000000..205ba52 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -0,0 +1,47 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.utils.Random; + +/** + * Benchmark Software Heritage browsing use-cases scenarios: use cases. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Browsing { + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final long seed = 42; + final int nbNodes = 100_000; + Random random = new Random(seed); + long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR); + long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV); + + Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); + Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); + + System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("\n'ls' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); + System.out.println("\n'ls -R' use-case"); + Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); + System.out.println("\n'git log' use-case"); + Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java new file mode 100644 index 0000000..f6b1c0b --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java @@ -0,0 +1,51 @@ +package org.softwareheritage.graph.benchmark; + +import java.util.ArrayList; +import java.util.function.Function; + +import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.utils.Statistics; + +/** + * Benchmark common utility functions. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ + +public class Common { + /** + * Times a specific endpoint and prints aggregated statistics. + * + * @param graph compressed graph used in the benchmark + * @param nodeIds node ids to use as starting point for the endpoint traversal + * @param operation endpoint function to benchmark + */ + public static void timeEndpoint( + Graph graph, long[] nodeIds, Function operation) { + ArrayList timings = new ArrayList<>(); + ArrayList timingsNormalized = new ArrayList<>(); + + for (long nodeId : nodeIds) { + SwhPID swhPID = graph.getSwhPID(nodeId); + + Endpoint.Output output = operation.apply(swhPID); + + timings.add(output.meta.timings.traversal); + if (output.meta.nbEdgesAccessed != 0) { + timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + } + } + + System.out.println("timings:"); + Statistics stats = new Statistics(timings); + stats.printAll(); + + System.out.println("timings normalized:"); + Statistics statsNormalized = new Statistics(timingsNormalized); + statsNormalized.printAll(); + } +}