diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -1,15 +1,12 @@ package org.softwareheritage.graph; -import java.io.DataOutputStream; -import java.io.FileOutputStream; -import java.io.IOException; +import java.io.*; import java.util.ArrayList; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; public class Entry { - private final long PATH_SEPARATOR_ID = -1; private Graph graph; public void load_graph(String graphBasename) throws IOException { @@ -33,6 +30,10 @@ } } + public void check_swhid(String src) { + graph.getNodeId(new SWHID(src)); + } + private int count_visitor(NodeCountVisitor f, long srcNodeId) { int[] count = {0}; f.accept(srcNodeId, (node) -> { @@ -41,23 +42,26 @@ return count[0]; } - public int count_leaves(String direction, String edgesFmt, long srcNodeId) { - Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); + public int count_leaves(String direction, String edgesFmt, String src) { + long srcNodeId = graph.getNodeId(new SWHID(src)); + Traversal t = new Traversal(graph.copy(), direction, edgesFmt); return count_visitor(t::leavesVisitor, srcNodeId); } - public int count_neighbors(String direction, String edgesFmt, long srcNodeId) { - Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); + public int count_neighbors(String direction, String edgesFmt, String src) { + long srcNodeId = graph.getNodeId(new SWHID(src)); + Traversal t = new Traversal(graph.copy(), direction, edgesFmt); return count_visitor(t::neighborsVisitor, srcNodeId); } - public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) { - Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); + public int count_visit_nodes(String direction, String edgesFmt, String src) { + long srcNodeId = graph.getNodeId(new SWHID(src)); + Traversal t = new Traversal(graph.copy(), direction, edgesFmt); return count_visitor(t::visitNodesVisitor, srcNodeId); } public QueryHandler get_handler(String clientFIFO) { - return new QueryHandler(this.graph.copy(), clientFIFO); + return new QueryHandler(graph.copy(), clientFIFO); } private interface NodeCountVisitor { @@ -66,7 +70,7 @@ public class QueryHandler { Graph graph; - DataOutputStream out; + BufferedWriter out; String clientFIFO; public QueryHandler(Graph graph, String clientFIFO) { @@ -75,30 +79,26 @@ this.out = null; } - public void writeNode(long nodeId) { + public void writeNode(SWHID swhid) { try { - out.writeLong(nodeId); + out.write(swhid.toString() + "\n"); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } - public void writeEdge(long srcId, long dstId) { - writeNode(srcId); - writeNode(dstId); - } - - public void writePath(ArrayList path) { - for (Long nodeId : path) { - writeNode(nodeId); + public void writeEdge(SWHID src, SWHID dst) { + try { + out.write(src.toString() + " " + dst.toString() + "\n"); + } catch (IOException e) { + throw new RuntimeException("Cannot write response to client: " + e); } - writeNode(PATH_SEPARATOR_ID); } public void open() { try { FileOutputStream file = new FileOutputStream(this.clientFIFO); - this.out = new DataOutputStream(file); + this.out = new BufferedWriter(new OutputStreamWriter(file)); } catch (IOException e) { throw new RuntimeException("Cannot open client FIFO: " + e); } @@ -112,83 +112,80 @@ } } - public void leaves(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { + public void leaves(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); + Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.leaves(srcNodeId)) { - writeNode(nodeId); + writeNode(graph.getSWHID(nodeId)); } close(); } - public void neighbors(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { + public void neighbors(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); + Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.neighbors(srcNodeId)) { - writeNode(nodeId); + writeNode(graph.getSWHID(nodeId)); } close(); } - public void visit_nodes(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { + public void visit_nodes(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); + Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.visitNodes(srcNodeId)) { - writeNode(nodeId); + writeNode(graph.getSWHID(nodeId)); } close(); } - public void visit_edges(String direction, String edgesFmt, long srcNodeId, long maxEdges) { + public void visit_edges(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges); - t.visitNodesVisitor(srcNodeId, null, this::writeEdge); + Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges); + t.visitNodesVisitor(srcNodeId, null, (srcId, dstId) -> { + writeEdge(graph.getSWHID(srcId), graph.getSWHID(dstId)); + }); close(); } - public void visit_paths(String direction, String edgesFmt, long srcNodeId, long maxEdges) { + public void walk(String direction, String edgesFmt, String algorithm, String src, String dst) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges); - t.visitPathsVisitor(srcNodeId, this::writePath); - close(); - } - - public void walk(String direction, String edgesFmt, String algorithm, long srcNodeId, long dstNodeId) { - open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt); - for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) { - writeNode(nodeId); + ArrayList res; + if (dst.matches("ori|snp|rel|rev|dir|cnt")) { + Node.Type dstType = Node.Type.fromStr(dst); + Traversal t = new Traversal(graph, direction, edgesFmt); + res = t.walk(srcNodeId, dstType, algorithm); + } else { + long dstNodeId = graph.getNodeId(new SWHID(dst)); + Traversal t = new Traversal(graph, direction, edgesFmt); + res = t.walk(srcNodeId, dstNodeId, algorithm); } - close(); - } - - public void walk_type(String direction, String edgesFmt, String algorithm, long srcNodeId, String dst) { - open(); - Node.Type dstType = Node.Type.fromStr(dst); - Traversal t = new Traversal(this.graph, direction, edgesFmt); - for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) { - writeNode(nodeId); + for (Long nodeId : res) { + writeNode(graph.getSWHID(nodeId)); } close(); } - public void random_walk(String direction, String edgesFmt, int retries, long srcNodeId, long dstNodeId, - String returnTypes) { + public void random_walk(String direction, String edgesFmt, int retries, String src, String dst) { + long srcNodeId = graph.getNodeId(new SWHID(src)); open(); - Traversal t = new Traversal(this.graph, direction, edgesFmt, 0, returnTypes); - for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) { - writeNode(nodeId); + ArrayList res; + if (dst.matches("ori|snp|rel|rev|dir|cnt")) { + Node.Type dstType = Node.Type.fromStr(dst); + Traversal t = new Traversal(graph, direction, edgesFmt); + res = t.randomWalk(srcNodeId, dstType, retries); + } else { + long dstNodeId = graph.getNodeId(new SWHID(dst)); + Traversal t = new Traversal(graph, direction, edgesFmt); + res = t.randomWalk(srcNodeId, dstNodeId, retries); } - close(); - } - - public void random_walk_type(String direction, String edgesFmt, int retries, long srcNodeId, String dst, - String returnTypes) { - open(); - Node.Type dstType = Node.Type.fromStr(dst); - Traversal t = new Traversal(this.graph, direction, edgesFmt, 0, returnTypes); - for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) { - writeNode(nodeId); + for (Long nodeId : res) { + writeNode(graph.getSWHID(nodeId)); } close(); } diff --git a/swh/graph/backend.py b/swh/graph/backend.py --- a/swh/graph/backend.py +++ b/swh/graph/backend.py @@ -7,22 +7,17 @@ import contextlib import io import os -import struct +import re import subprocess import sys import tempfile from py4j.java_gateway import JavaGateway +from py4j.protocol import Py4JJavaError from swh.graph.config import check_config -from swh.graph.swhid import NodeToSwhidMap, SwhidToNodeMap -from swh.model.swhids import EXTENDED_SWHID_TYPES -BUF_SIZE = 64 * 1024 -BIN_FMT = ">q" # 64 bit integer, big endian -PATH_SEPARATOR_ID = -1 -NODE2SWHID_EXT = "node2swhid.bin" -SWHID2NODE_EXT = "swhid2node.bin" +BUF_LINES = 1024 def _get_pipe_stderr(): @@ -53,8 +48,6 @@ ) self.entry = self.gateway.jvm.org.softwareheritage.graph.Entry() self.entry.load_graph(self.graph_path) - self.node2swhid = NodeToSwhidMap(self.graph_path + "." + NODE2SWHID_EXT) - self.swhid2node = SwhidToNodeMap(self.graph_path + "." + SWHID2NODE_EXT) self.stream_proxy = JavaStreamProxy(self.entry) def stop_gateway(self): @@ -70,59 +63,26 @@ def stats(self): return self.entry.stats() + def check_swhid(self, swhid): + try: + self.entry.check_swhid(swhid) + except Py4JJavaError as e: + m = re.search(r"malformed SWHID: (\w+)", str(e)) + if m: + raise ValueError(f"malformed SWHID: {m[1]}") + m = re.search(r"Unknown SWHID: (\w+)", str(e)) + if m: + raise NameError(f"Unknown SWHID: {m[1]}") + raise + def count(self, ttype, direction, edges_fmt, src): method = getattr(self.entry, "count_" + ttype) return method(direction, edges_fmt, src) - async def simple_traversal( - self, ttype, direction, edges_fmt, src, max_edges, return_types - ): - assert ttype in ("leaves", "neighbors", "visit_nodes") + async def traversal(self, ttype, *args): method = getattr(self.stream_proxy, ttype) - async for node_id in method(direction, edges_fmt, src, max_edges, return_types): - yield node_id - - async def walk(self, direction, edges_fmt, algo, src, dst): - if dst in EXTENDED_SWHID_TYPES: - it = self.stream_proxy.walk_type(direction, edges_fmt, algo, src, dst) - else: - it = self.stream_proxy.walk(direction, edges_fmt, algo, src, dst) - async for node_id in it: - yield node_id - - async def random_walk(self, direction, edges_fmt, retries, src, dst, return_types): - if dst in EXTENDED_SWHID_TYPES: - it = self.stream_proxy.random_walk_type( - direction, edges_fmt, retries, src, dst, return_types - ) - else: - it = self.stream_proxy.random_walk( - direction, edges_fmt, retries, src, dst, return_types - ) - async for node_id in it: # TODO return 404 if path is empty - yield node_id - - async def visit_edges(self, direction, edges_fmt, src, max_edges): - it = self.stream_proxy.visit_edges(direction, edges_fmt, src, max_edges) - # convert stream a, b, c, d -> (a, b), (c, d) - prevNode = None - async for node in it: - if prevNode is not None: - yield (prevNode, node) - prevNode = None - else: - prevNode = node - - async def visit_paths(self, direction, edges_fmt, src, max_edges): - path = [] - async for node in self.stream_proxy.visit_paths( - direction, edges_fmt, src, max_edges - ): - if node == PATH_SEPARATOR_ID: - yield path - path = [] - else: - path.append(node) + async for line in method(*args): + yield line.decode().rstrip("\n") class JavaStreamProxy: @@ -152,12 +112,22 @@ # on the Java side, we await it with a timeout in case there is an # exception that prevents the write-side open(). with (await asyncio.wait_for(open_thread, timeout=2)) as f: + + def read_n_lines(f, n): + buf = [] + for _ in range(n): + try: + buf.append(next(f)) + except StopIteration: + break + return buf + while True: - data = await loop.run_in_executor(None, f.read, BUF_SIZE) - if not data: + lines = await loop.run_in_executor(None, read_n_lines, f, BUF_LINES) + if not lines: break - for data in struct.iter_unpack(BIN_FMT, data): - yield data[0] + for line in lines: + yield line class _HandlerWrapper: def __init__(self, handler): diff --git a/swh/graph/graph.py b/swh/graph/graph.py deleted file mode 100644 --- a/swh/graph/graph.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (C) 2019 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import asyncio -import contextlib -import functools - -from swh.graph.backend import Backend -from swh.graph.dot import KIND_TO_SHAPE, dot_to_svg, graph_dot - -BASE_URL = "https://archive.softwareheritage.org/browse" -KIND_TO_URL_FRAGMENT = { - "ori": "/origin/{}", - "snp": "/snapshot/{}", - "rel": "/release/{}", - "rev": "/revision/{}", - "dir": "/directory/{}", - "cnt": "/content/sha1_git:{}/", -} - - -def call_async_gen(generator, *args, **kwargs): - loop = asyncio.get_event_loop() - it = generator(*args, **kwargs).__aiter__() - while True: - try: - res = loop.run_until_complete(it.__anext__()) - yield res - except StopAsyncIteration: - break - - -class Neighbors: - """Neighbor iterator with custom O(1) length method""" - - def __init__(self, graph, iterator, length_func): - self.graph = graph - self.iterator = iterator - self.length_func = length_func - - def __iter__(self): - return self - - def __next__(self): - succ = self.iterator.nextLong() - if succ == -1: - raise StopIteration - return GraphNode(self.graph, succ) - - def __len__(self): - return self.length_func() - - -class GraphNode: - """Node in the SWH graph""" - - def __init__(self, graph, node_id): - self.graph = graph - self.id = node_id - - def children(self): - return Neighbors( - self.graph, - self.graph.java_graph.successors(self.id), - lambda: self.graph.java_graph.outdegree(self.id), - ) - - def parents(self): - return Neighbors( - self.graph, - self.graph.java_graph.predecessors(self.id), - lambda: self.graph.java_graph.indegree(self.id), - ) - - def simple_traversal( - self, ttype, direction="forward", edges="*", max_edges=0, return_types="*" - ): - for node in call_async_gen( - self.graph.backend.simple_traversal, - ttype, - direction, - edges, - self.id, - max_edges, - return_types, - ): - yield self.graph[node] - - def leaves(self, *args, **kwargs): - yield from self.simple_traversal("leaves", *args, **kwargs) - - def visit_nodes(self, *args, **kwargs): - yield from self.simple_traversal("visit_nodes", *args, **kwargs) - - def visit_edges(self, direction="forward", edges="*", max_edges=0): - for src, dst in call_async_gen( - self.graph.backend.visit_edges, direction, edges, self.id, max_edges - ): - yield (self.graph[src], self.graph[dst]) - - def visit_paths(self, direction="forward", edges="*", max_edges=0): - for path in call_async_gen( - self.graph.backend.visit_paths, direction, edges, self.id, max_edges - ): - yield [self.graph[node] for node in path] - - def walk(self, dst, direction="forward", edges="*", traversal="dfs"): - for node in call_async_gen( - self.graph.backend.walk, direction, edges, traversal, self.id, dst - ): - yield self.graph[node] - - def _count(self, ttype, direction="forward", edges="*"): - return self.graph.backend.count(ttype, direction, edges, self.id) - - count_leaves = functools.partialmethod(_count, ttype="leaves") - count_neighbors = functools.partialmethod(_count, ttype="neighbors") - count_visit_nodes = functools.partialmethod(_count, ttype="visit_nodes") - - @property - def swhid(self): - return self.graph.node2swhid[self.id] - - @property - def kind(self): - return self.swhid.split(":")[2] - - def __str__(self): - return self.swhid - - def __repr__(self): - return "<{}>".format(self.swhid) - - def dot_fragment(self): - swh, version, kind, hash = self.swhid.split(":") - label = "{}:{}..{}".format(kind, hash[0:2], hash[-2:]) - url = BASE_URL + KIND_TO_URL_FRAGMENT[kind].format(hash) - shape = KIND_TO_SHAPE[kind] - return '{} [label="{}", href="{}", target="_blank", shape="{}"];'.format( - self.id, label, url, shape - ) - - def _repr_svg_(self): - nodes = [self, *list(self.children()), *list(self.parents())] - dot = graph_dot(nodes) - svg = dot_to_svg(dot) - return svg - - -class Graph: - def __init__(self, backend, node2swhid, swhid2node): - self.backend = backend - self.java_graph = backend.entry.get_graph() - self.node2swhid = node2swhid - self.swhid2node = swhid2node - - def stats(self): - return self.backend.stats() - - @property - def path(self): - return self.java_graph.getPath() - - def __len__(self): - return self.java_graph.numNodes() - - def __getitem__(self, node_id): - if isinstance(node_id, int): - self.node2swhid[node_id] # check existence - return GraphNode(self, node_id) - elif isinstance(node_id, str): - node_id = self.swhid2node[node_id] - return GraphNode(self, node_id) - - def __iter__(self): - for swhid, pos in self.backend.swhid2node: - yield self[swhid] - - def iter_prefix(self, prefix): - for swhid, pos in self.backend.swhid2node.iter_prefix(prefix): - yield self[swhid] - - def iter_type(self, swhid_type): - for swhid, pos in self.backend.swhid2node.iter_type(swhid_type): - yield self[swhid] - - -@contextlib.contextmanager -def load(graph_path): - with Backend(graph_path) as backend: - yield Graph(backend, backend.node2swhid, backend.swhid2node) diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py --- a/swh/graph/server/app.py +++ b/swh/graph/server/app.py @@ -10,7 +10,6 @@ import asyncio from collections import deque -import json import os from typing import Optional @@ -19,7 +18,6 @@ from swh.core.api.asynchronous import RPCServerApp from swh.core.config import read as config_read from swh.graph.backend import Backend -from swh.model.exceptions import ValidationError from swh.model.swhids import EXTENDED_SWHID_TYPES try: @@ -74,26 +72,6 @@ super().__init__(*args, **kwargs) self.backend = self.request.app["backend"] - def node_of_swhid(self, swhid): - """Lookup a SWHID in a swhid2node map, failing in an HTTP-nice way if - needed.""" - try: - return self.backend.swhid2node[swhid] - except KeyError: - raise aiohttp.web.HTTPNotFound(text=f"SWHID not found: {swhid}") - except ValidationError: - raise aiohttp.web.HTTPBadRequest(text=f"malformed SWHID: {swhid}") - - def swhid_of_node(self, node): - """Lookup a node in a node2swhid map, failing in an HTTP-nice way if - needed.""" - try: - return self.backend.node2swhid[node] - except KeyError: - raise aiohttp.web.HTTPInternalServerError( - text=f"reverse lookup failed for node id: {node}" - ) - def get_direction(self): """Validate HTTP query parameter `direction`""" s = self.request.query.get("direction", "forward") @@ -156,6 +134,13 @@ except ValueError: raise aiohttp.web.HTTPBadRequest(text=f"invalid max_edges value: {s}") + def check_swhid(self, swhid): + """Validate that the given SWHID exists in the graph""" + try: + self.backend.check_swhid(swhid) + except (NameError, ValueError) as e: + raise aiohttp.web.HTTPBadRequest(text=str(e)) + class StreamingGraphView(GraphView): """Base class for views streaming their response line by line.""" @@ -218,25 +203,23 @@ simple_traversal_type: Optional[str] = None async def prepare_response(self): - src = self.request.match_info["src"] - self.src_node = self.node_of_swhid(src) - + self.src = self.request.match_info["src"] self.edges = self.get_edges() self.direction = self.get_direction() self.max_edges = self.get_max_edges() self.return_types = self.get_return_types() + self.check_swhid(self.src) async def stream_response(self): - async for res_node in self.backend.simple_traversal( + async for res_line in self.backend.traversal( self.simple_traversal_type, self.direction, self.edges, - self.src_node, + self.src, self.max_edges, self.return_types, ): - res_swhid = self.swhid_of_node(res_node) - await self.stream_line(res_swhid) + await self.stream_line(res_line) class LeavesView(SimpleTraversalView): @@ -251,41 +234,41 @@ simple_traversal_type = "visit_nodes" +class VisitEdgesView(SimpleTraversalView): + simple_traversal_type = "visit_edges" + + class WalkView(StreamingGraphView): async def prepare_response(self): - src = self.request.match_info["src"] - dst = self.request.match_info["dst"] - self.src_node = self.node_of_swhid(src) - if dst not in EXTENDED_SWHID_TYPES: - self.dst_thing = self.node_of_swhid(dst) - else: - self.dst_thing = dst + self.src = self.request.match_info["src"] + self.dst = self.request.match_info["dst"] self.edges = self.get_edges() self.direction = self.get_direction() self.algo = self.get_traversal() self.limit = self.get_limit() - self.return_types = self.get_return_types() + + self.check_swhid(self.src) + if self.dst not in EXTENDED_SWHID_TYPES: + self.check_swhid(self.dst) async def get_walk_iterator(self): - return self.backend.walk( - self.direction, self.edges, self.algo, self.src_node, self.dst_thing + return self.backend.traversal( + "walk", self.direction, self.edges, self.algo, self.src, self.dst ) async def stream_response(self): it = self.get_walk_iterator() if self.limit < 0: queue = deque(maxlen=-self.limit) - async for res_node in it: - res_swhid = self.swhid_of_node(res_node) + async for res_swhid in it: queue.append(res_swhid) while queue: await self.stream_line(queue.popleft()) else: count = 0 - async for res_node in it: + async for res_swhid in it: if self.limit == 0 or count < self.limit: - res_swhid = self.swhid_of_node(res_node) await self.stream_line(res_swhid) count += 1 else: @@ -294,38 +277,14 @@ class RandomWalkView(WalkView): def get_walk_iterator(self): - return self.backend.random_walk( + return self.backend.traversal( + "random_walk", self.direction, self.edges, RANDOM_RETRIES, - self.src_node, - self.dst_thing, - self.return_types, - ) - - -class VisitEdgesView(SimpleTraversalView): - async def stream_response(self): - it = self.backend.visit_edges( - self.direction, self.edges, self.src_node, self.max_edges - ) - async for (res_src, res_dst) in it: - res_src_swhid = self.swhid_of_node(res_src) - res_dst_swhid = self.swhid_of_node(res_dst) - await self.stream_line("{} {}".format(res_src_swhid, res_dst_swhid)) - - -class VisitPathsView(SimpleTraversalView): - content_type = "application/x-ndjson" - - async def stream_response(self): - it = self.backend.visit_paths( - self.direction, self.edges, self.src_node, self.max_edges + self.src, + self.dst, ) - async for res_path in it: - res_path_swhid = [self.swhid_of_node(n) for n in res_path] - line = json.dumps(res_path_swhid) - await self.stream_line(line) class CountView(GraphView): @@ -334,8 +293,8 @@ count_type: Optional[str] = None async def get(self): - src = self.request.match_info["src"] - self.src_node = self.node_of_swhid(src) + self.src = self.request.match_info["src"] + self.check_swhid(self.src) self.edges = self.get_edges() self.direction = self.get_direction() @@ -347,7 +306,7 @@ self.count_type, self.direction, self.edges, - self.src_node, + self.src, ) return aiohttp.web.Response(body=str(cnt), content_type="application/json") @@ -379,7 +338,6 @@ aiohttp.web.view("/graph/neighbors/{src}", NeighborsView), aiohttp.web.view("/graph/visit/nodes/{src}", VisitNodesView), aiohttp.web.view("/graph/visit/edges/{src}", VisitEdgesView), - aiohttp.web.view("/graph/visit/paths/{src}", VisitPathsView), # temporarily disabled in wait of a proper fix for T1969 # aiohttp.web.view("/graph/walk/{src}/{dst}", WalkView) aiohttp.web.view("/graph/randomwalk/{src}/{dst}", RandomWalkView), diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py --- a/swh/graph/tests/conftest.py +++ b/swh/graph/tests/conftest.py @@ -57,12 +57,3 @@ with open(SWH_GRAPH_TESTS_ROOT / "dataset/example.edges.csv") as fd: edges = list(csv.reader(fd, delimiter=" ")) yield NaiveClient(nodes=nodes, edges=edges) - - -@pytest.fixture(scope="module") -def graph(): - # Lazy import to allow debian packaging - from swh.graph.graph import load as graph_load - - with graph_load(str(TEST_GRAPH_PATH)) as g: - yield g diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py --- a/swh/graph/tests/test_api_client.py +++ b/swh/graph/tests/test_api_client.py @@ -218,33 +218,6 @@ assert set(actual) == set(expected) -def test_visit_paths(graph_client): - actual = list( - graph_client.visit_paths( - "swh:1:snp:0000000000000000000000000000000000000020", edges="snp:*,rev:*" - ) - ) - actual = [tuple(path) for path in actual] - expected = [ - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:dir:0000000000000000000000000000000000000002", - ), - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - ), - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rel:0000000000000000000000000000000000000010", - ), - ] - assert set(actual) == set(expected) - - @pytest.mark.skip(reason="currently disabled due to T1969") def test_walk(graph_client): args = ("swh:1:dir:0000000000000000000000000000000000000016", "rel") @@ -279,7 +252,7 @@ assert set(actual) == set(expected) -def test_random_walk(graph_client): +def test_random_walk_dst_is_type(graph_client): """as the walk is random, we test a visit from a cnt node to the only origin in the dataset, and only check the final node of the path (i.e., the origin) @@ -308,6 +281,37 @@ assert len(actual) == 3 +def test_random_walk_dst_is_node(graph_client): + """Same as test_random_walk_dst_is_type, but we target the specific origin + node instead of a type + """ + args = ( + "swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:ori:0000000000000000000000000000000000000021", + ) + kwargs = {"direction": "backward"} + expected_root = "swh:1:ori:0000000000000000000000000000000000000021" + + actual = list(graph_client.random_walk(*args, **kwargs)) + assert len(actual) > 1 # no origin directly links to a content + assert actual[0] == args[0] + assert actual[-1] == expected_root + + kwargs2 = kwargs.copy() + kwargs2["limit"] = -1 + actual = list(graph_client.random_walk(*args, **kwargs2)) + assert actual == [expected_root] + + kwargs2["limit"] = -2 + actual = list(graph_client.random_walk(*args, **kwargs2)) + assert len(actual) == 2 + assert actual[-1] == expected_root + + kwargs2["limit"] = 3 + actual = list(graph_client.random_walk(*args, **kwargs2)) + assert len(actual) == 3 + + def test_count(graph_client): actual = graph_client.count_leaves( "swh:1:ori:0000000000000000000000000000000000000021" diff --git a/swh/graph/tests/test_graph.py b/swh/graph/tests/test_graph.py deleted file mode 100644 --- a/swh/graph/tests/test_graph.py +++ /dev/null @@ -1,166 +0,0 @@ -import pytest - - -def test_graph(graph): - assert len(graph) == 21 - - obj = "swh:1:dir:0000000000000000000000000000000000000008" - node = graph[obj] - - assert str(node) == obj - assert len(node.children()) == 3 - assert len(node.parents()) == 2 - - actual = {p.swhid for p in node.children()} - expected = { - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000007", - } - assert expected == actual - - actual = {p.swhid for p in node.parents()} - expected = { - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000012", - } - assert expected == actual - - -def test_invalid_swhid(graph): - with pytest.raises(IndexError): - graph[1337] - - with pytest.raises(IndexError): - graph[len(graph) + 1] - - with pytest.raises(KeyError): - graph["swh:1:dir:0000000000000000000000000000000420000012"] - - -def test_leaves(graph): - actual = list(graph["swh:1:ori:0000000000000000000000000000000000000021"].leaves()) - actual = [p.swhid for p in actual] - expected = [ - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:cnt:0000000000000000000000000000000000000004", - "swh:1:cnt:0000000000000000000000000000000000000005", - "swh:1:cnt:0000000000000000000000000000000000000007", - ] - assert set(actual) == set(expected) - - -def test_visit_nodes(graph): - actual = list( - graph["swh:1:rel:0000000000000000000000000000000000000010"].visit_nodes( - edges="rel:rev,rev:rev" - ) - ) - actual = [p.swhid for p in actual] - expected = [ - "swh:1:rel:0000000000000000000000000000000000000010", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003", - ] - assert set(actual) == set(expected) - - -def test_visit_edges(graph): - actual = list( - graph["swh:1:rel:0000000000000000000000000000000000000010"].visit_edges( - edges="rel:rev,rev:rev,rev:dir" - ) - ) - actual = [(src.swhid, dst.swhid) for src, dst in actual] - expected = [ - ( - "swh:1:rel:0000000000000000000000000000000000000010", - "swh:1:rev:0000000000000000000000000000000000000009", - ), - ( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003", - ), - ( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - ), - ( - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:dir:0000000000000000000000000000000000000002", - ), - ] - assert set(actual) == set(expected) - - -def test_visit_paths(graph): - actual = list( - graph["swh:1:snp:0000000000000000000000000000000000000020"].visit_paths( - edges="snp:*,rev:*" - ) - ) - actual = [tuple(n.swhid for n in path) for path in actual] - expected = [ - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:dir:0000000000000000000000000000000000000002", - ), - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - ), - ( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rel:0000000000000000000000000000000000000010", - ), - ] - assert set(actual) == set(expected) - - -def test_walk(graph): - actual = list( - graph["swh:1:dir:0000000000000000000000000000000000000016"].walk( - "rel", edges="dir:dir,dir:rev,rev:*", direction="backward", traversal="bfs" - ) - ) - actual = [p.swhid for p in actual] - expected = [ - "swh:1:dir:0000000000000000000000000000000000000016", - "swh:1:dir:0000000000000000000000000000000000000017", - "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019", - ] - assert set(actual) == set(expected) - - -def test_count(graph): - assert ( - graph["swh:1:ori:0000000000000000000000000000000000000021"].count_leaves() == 4 - ) - assert ( - graph["swh:1:rel:0000000000000000000000000000000000000010"].count_visit_nodes( - edges="rel:rev,rev:rev" - ) - == 3 - ) - assert ( - graph["swh:1:rev:0000000000000000000000000000000000000009"].count_neighbors( - direction="backward" - ) - == 3 - ) - - -def test_iter_type(graph): - rev_list = list(graph.iter_type("rev")) - actual = [n.swhid for n in rev_list] - expected = [ - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000013", - "swh:1:rev:0000000000000000000000000000000000000018", - ] - assert expected == actual