diff --git a/docs/api.rst b/docs/api.rst --- a/docs/api.rst +++ b/docs/api.rst @@ -227,10 +227,11 @@ ----- .. http:get:: /graph/visit/nodes/:src +.. http:get:: /graph/visit/edges/:src .. http:get:: /graph/visit/paths/:src - Performs a graph traversal and returns explored nodes or paths (in the order - of the traversal). + Performs a graph traversal and returns explored nodes, edges or paths (in + the order of the traversal). :param string src: starting node specified as a SWHID @@ -272,7 +273,32 @@ .. sourcecode:: http - GET /graph/visit/nodes/swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb HTTP/1.1 + GET /graph/visit/edges/swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc HTTP/1.1 + + Content-Type: text/plain + Transfer-Encoding: chunked + + .. sourcecode:: http + + HTTP/1.1 200 OK + + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:61f92a7db95f5a6d1fcb94d2b897ed3797584d7b + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:00e81c89c29ff3e58745fdaf7abb68daa1389e85 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:7596fdc31c9aa00aed281ccb026a74cabf2383bb + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:ec7a2341ac3d9d8b571bbdfb90a089d4e54dea56 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:1c5b5eac61eda2454034a43eb124ab490885ef3a + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:4dfa88ca55e04e8afe05e8543ddddee32dde7236 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:d56ae79e43ff1b37534370911c8a78ec7f38d437 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:19ba5d6203a040a39ecc4a77b165d3f097c1e662 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:9c56102eefea23c95405533e1de23da4b873ecc4 + swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc swh:1:rev:3f54e816b46c2e179cd164e17fea93b3013a9db4 + ... + + **Example:** + + .. sourcecode:: http + + GET /graph/visit/paths/swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb HTTP/1.1 Content-Type: application/x-ndjson Transfer-Encoding: chunked diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -86,6 +86,11 @@ } } + public void writeEdge(long srcId, long dstId) { + writeNode(srcId); + writeNode(dstId); + } + public void writePath(ArrayList path) { for (Long nodeId : path) { writeNode(nodeId); @@ -131,6 +136,13 @@ close(); } + public void visit_edges(String direction, String edgesFmt, long srcNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + t.visitNodesVisitor(srcNodeId, null, this::writeEdge); + close(); + } + public void visit_paths(String direction, String edgesFmt, long srcNodeId) { open(); diff --git a/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java b/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java new file mode 100644 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java @@ -0,0 +1,9 @@ +package org.softwareheritage.graph.algo; + +public interface EdgeIdConsumer { + + /** Callback for incrementally receiving edge identifiers during a graph + * visit. + */ + void accept(long srcId, long dstId); +} diff --git a/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java --- a/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -149,7 +149,7 @@ * Push version of {@link visitNodes}: will fire passed callback on each * visited node. */ - public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { + public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) { Stack stack = new Stack(); this.nbEdgesAccessed = 0; @@ -158,10 +158,15 @@ while (!stack.isEmpty()) { long currentNodeId = stack.pop(); - cb.accept(currentNodeId); + if (nodeCb != null) { + nodeCb.accept(currentNodeId); + } nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + if (edgeCb != null) { + edgeCb.accept(currentNodeId, neighborNodeId); + } if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); @@ -170,6 +175,11 @@ } } + /** One-argument version to handle callbacks properly */ + public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { + visitNodesVisitor(srcNodeId, cb, null); + } + /** * Performs a graph traversal and returns explored nodes. * diff --git a/swh/graph/backend.py b/swh/graph/backend.py --- a/swh/graph/backend.py +++ b/swh/graph/backend.py @@ -92,6 +92,17 @@ async for node_id in it: # TODO return 404 if path is empty yield node_id + async def visit_edges(self, direction, edges_fmt, src): + it = self.stream_proxy.visit_edges(direction, edges_fmt, src) + # convert stream a, b, c, d -> (a, b), (c, d) + prevNode = None + async for node in it: + if prevNode is not None: + yield (prevNode, node) + prevNode = None + else: + prevNode = node + async def visit_paths(self, direction, edges_fmt, src): path = [] async for node in self.stream_proxy.visit_paths(direction, edges_fmt, src): diff --git a/swh/graph/client.py b/swh/graph/client.py --- a/swh/graph/client.py +++ b/swh/graph/client.py @@ -51,6 +51,13 @@ params={"edges": edges, "direction": direction}, ) + def visit_edges(self, src, edges="*", direction="forward"): + for edge in self.get_lines( + "visit/edges/{}".format(src), + params={"edges": edges, "direction": direction}, + ): + yield tuple(edge.split()) + def visit_paths(self, src, edges="*", direction="forward"): def decode_path_wrapper(it): for e in it: diff --git a/swh/graph/config.py b/swh/graph/config.py --- a/swh/graph/config.py +++ b/swh/graph/config.py @@ -107,5 +107,4 @@ logback=conf["logback"] ) - print(conf) return conf diff --git a/swh/graph/graph.py b/swh/graph/graph.py --- a/swh/graph/graph.py +++ b/swh/graph/graph.py @@ -86,6 +86,12 @@ def visit_nodes(self, *args, **kwargs): yield from self.simple_traversal("visit_nodes", *args, **kwargs) + def visit_edges(self, direction="forward", edges="*"): + for src, dst in call_async_gen( + self.graph.backend.visit_edges, direction, edges, self.id + ): + yield (self.graph[src], self.graph[dst]) + def visit_paths(self, direction="forward", edges="*"): for path in call_async_gen( self.graph.backend.visit_paths, direction, edges, self.id diff --git a/swh/graph/pid.py b/swh/graph/pid.py --- a/swh/graph/pid.py +++ b/swh/graph/pid.py @@ -12,7 +12,7 @@ from mmap import MAP_SHARED, PROT_READ, PROT_WRITE from typing import BinaryIO, Iterator, Tuple -from swh.model.identifiers import PersistentId, parse_persistent_identifier +from swh.model.identifiers import SWHID, parse_swhid PID_BIN_FMT = "BB20s" # 2 unsigned chars + 20 bytes @@ -56,7 +56,7 @@ bytes: byte sequence representation of pid """ - pid = parse_persistent_identifier(pid_str) + pid = parse_swhid(pid_str) return struct.pack( PID_BIN_FMT, pid.scheme_version, @@ -78,7 +78,7 @@ """ (version, type, bin_digest) = struct.unpack(PID_BIN_FMT, bytes) - pid = PersistentId(object_type=PidType(type).name, object_id=bin_digest) + pid = SWHID(object_type=PidType(type).name, object_id=bin_digest) return str(pid) diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py --- a/swh/graph/server/app.py +++ b/swh/graph/server/app.py @@ -202,6 +202,23 @@ return response +async def visit_edges(request): + backend = request.app["backend"] + + src = request.match_info["src"] + edges = get_edges(request) + direction = get_direction(request) + + src_node = node_of_pid(src, backend) + it = backend.visit_edges(direction, edges, src_node) + async with stream_response(request) as response: + async for (res_src, res_dst) in it: + res_src_pid = pid_of_node(res_src, backend) + res_dst_pid = pid_of_node(res_dst, backend) + await response.write("{} {}\n".format(res_src_pid, res_dst_pid).encode()) + return response + + def get_count_handler(ttype): async def count(request): loop = asyncio.get_event_loop() @@ -233,6 +250,7 @@ app.router.add_get( "/graph/visit/nodes/{src}", get_simple_traversal_handler("visit_nodes") ) + app.router.add_get("/graph/visit/edges/{src}", visit_edges) app.router.add_get("/graph/visit/paths/{src}", visit_paths) # temporarily disabled in wait of a proper fix for T1969 diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py --- a/swh/graph/tests/test_api_client.py +++ b/swh/graph/tests/test_api_client.py @@ -77,6 +77,81 @@ assert set(actual) == set(expected) +def test_visit_edges(graph_client): + actual = list( + graph_client.visit_edges( + "swh:1:rel:0000000000000000000000000000000000000010", + edges="rel:rev,rev:rev,rev:dir", + ) + ) + expected = [ + ( + "swh:1:rel:0000000000000000000000000000000000000010", + "swh:1:rev:0000000000000000000000000000000000000009", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000003", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:dir:0000000000000000000000000000000000000002", + ), + ] + assert set(actual) == set(expected) + + +def test_visit_edges_diamond_pattern(graph_client): + actual = list( + graph_client.visit_edges( + "swh:1:rev:0000000000000000000000000000000000000009", edges="*", + ) + ) + expected = [ + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000003", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:dir:0000000000000000000000000000000000000002", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000002", + "swh:1:cnt:0000000000000000000000000000000000000001", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:cnt:0000000000000000000000000000000000000001", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:cnt:0000000000000000000000000000000000000007", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:dir:0000000000000000000000000000000000000006", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000004", + ), + ( + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000005", + ), + ] + assert set(actual) == set(expected) + + def test_visit_paths(graph_client): actual = list( graph_client.visit_paths( diff --git a/swh/graph/tests/test_graph.py b/swh/graph/tests/test_graph.py --- a/swh/graph/tests/test_graph.py +++ b/swh/graph/tests/test_graph.py @@ -65,6 +65,34 @@ assert set(actual) == set(expected) +def test_visit_edges(graph): + actual = list( + graph["swh:1:rel:0000000000000000000000000000000000000010"].visit_edges( + edges="rel:rev,rev:rev,rev:dir" + ) + ) + actual = [(src.pid, dst.pid) for src, dst in actual] + expected = [ + ( + "swh:1:rel:0000000000000000000000000000000000000010", + "swh:1:rev:0000000000000000000000000000000000000009", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000003", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + ), + ( + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:dir:0000000000000000000000000000000000000002", + ), + ] + assert set(actual) == set(expected) + + def test_visit_paths(graph): actual = list( graph["swh:1:snp:0000000000000000000000000000000000000020"].visit_paths(