diff --git a/swh/graph/client.py b/swh/graph/client.py index c5e9c88..aa66108 100644 --- a/swh/graph/client.py +++ b/swh/graph/client.py @@ -1,156 +1,156 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from swh.core.api import RPCClient class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return """An unexpected error occurred in the Graph backend: {}""".format( self.args ) class GraphArgumentException(Exception): - def __init__(self, *args, response): + def __init__(self, *args, response=None): super().__init__(*args) self.response = response class RemoteGraphClient(RPCClient): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__(api_exception=GraphAPIError, url=url, timeout=timeout) def raw_verb_lines(self, verb, endpoint, **kwargs): response = self.raw_verb(verb, endpoint, stream=True, **kwargs) self.raise_for_status(response) for line in response.iter_lines(): yield line.decode().lstrip("\n") def get_lines(self, endpoint, **kwargs): yield from self.raw_verb_lines("get", endpoint, **kwargs) def raise_for_status(self, response) -> None: if response.status_code // 100 == 4: raise GraphArgumentException( response.content.decode("ascii"), response=response ) super().raise_for_status(response) # Web API endpoints def stats(self): return self.get("stats") def leaves( self, src, edges="*", direction="forward", max_edges=0, return_types="*" ): return self.get_lines( "leaves/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, }, ) def neighbors( self, src, edges="*", direction="forward", max_edges=0, return_types="*" ): return self.get_lines( "neighbors/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, }, ) def visit_nodes( self, src, edges="*", direction="forward", max_edges=0, return_types="*" ): return self.get_lines( "visit/nodes/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, }, ) def visit_edges(self, src, edges="*", direction="forward", max_edges=0): for edge in self.get_lines( "visit/edges/{}".format(src), params={"edges": edges, "direction": direction, "max_edges": max_edges}, ): yield tuple(edge.split()) def visit_paths(self, src, edges="*", direction="forward", max_edges=0): def decode_path_wrapper(it): for e in it: yield json.loads(e) return decode_path_wrapper( self.get_lines( "visit/paths/{}".format(src), params={"edges": edges, "direction": direction, "max_edges": max_edges}, ) ) def walk( self, src, dst, edges="*", traversal="dfs", direction="forward", limit=None ): endpoint = "walk/{}/{}" return self.get_lines( endpoint.format(src, dst), params={ "edges": edges, "traversal": traversal, "direction": direction, "limit": limit, }, ) def random_walk( self, src, dst, edges="*", direction="forward", limit=None, return_types="*" ): endpoint = "randomwalk/{}/{}" return self.get_lines( endpoint.format(src, dst), params={ "edges": edges, "direction": direction, "limit": limit, "return_types": return_types, }, ) def count_leaves(self, src, edges="*", direction="forward"): return self.get( "leaves/count/{}".format(src), params={"edges": edges, "direction": direction}, ) def count_neighbors(self, src, edges="*", direction="forward"): return self.get( "neighbors/count/{}".format(src), params={"edges": edges, "direction": direction}, ) def count_visit_nodes(self, src, edges="*", direction="forward"): return self.get( "visit/nodes/count/{}".format(src), params={"edges": edges, "direction": direction}, ) diff --git a/swh/graph/naive_client.py b/swh/graph/naive_client.py new file mode 100644 index 0000000..8191311 --- /dev/null +++ b/swh/graph/naive_client.py @@ -0,0 +1,369 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import functools +import inspect +import re +import statistics +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Tuple, + TypeVar, +) + +from swh.model.identifiers import ExtendedSWHID, ValidationError + +from .client import GraphArgumentException + +_NODE_TYPES = "ori|snp|rel|rev|dir|cnt" +NODES_RE = re.compile(fr"(\*|{_NODE_TYPES})") +EDGES_RE = re.compile(fr"(\*|{_NODE_TYPES}):(\*|{_NODE_TYPES})") + + +T = TypeVar("T", bound=Callable) + + +def check_arguments(f: T) -> T: + """Decorator for generic argument checking for methods of NaiveClient. + Checks ``src`` is a valid and known SWHID, and ``edges`` has the right format.""" + signature = inspect.signature(f) + + @functools.wraps(f) + def newf(*args, **kwargs): + __tracebackhide__ = True # for pytest + try: + bound_args = signature.bind(*args, **kwargs) + except TypeError as e: + # rethrow the exception from here so pytest doesn't flood the terminal + # with signature.bind's call stack. + raise TypeError(*e.args) from None + self = bound_args.arguments["self"] + + src = bound_args.arguments.get("src") + if src: + self._check_swhid(src) + + edges = bound_args.arguments.get("edges") + if edges: + if edges != "*" and not EDGES_RE.match(edges): + raise GraphArgumentException(f"invalid edge restriction: {edges}") + + return_types = bound_args.arguments.get("return_types") + if return_types: + if not NODES_RE.match(return_types): + raise GraphArgumentException( + f"invalid return_types restriction: {return_types}" + ) + + return f(*args, **kwargs) + + return newf # type: ignore + + +def filter_node_types(node_types: str, nodes: Iterable[str]) -> Iterator[str]: + if node_types == "*": + yield from nodes + else: + prefixes = tuple(f"swh:1:{type_}:" for type_ in node_types.split(",")) + for node in nodes: + if node.startswith(prefixes): + yield node + + +class NaiveClient: + """An alternative implementation of :class:`swh.graph.backend.Backend`, + written in pure-python and meant for simulating it in other components' test + cases. + + It is NOT meant to be efficient in any way; only to be a very simple + implementation that provides the same behavior.""" + + def __init__(self, *, nodes: List[str], edges: List[Tuple[str, str]]): + self.graph = Graph(nodes, edges) + + def _check_swhid(self, swhid): + try: + ExtendedSWHID.from_string(swhid) + except ValidationError as e: + raise GraphArgumentException(*e.args) from None + if swhid not in self.graph.nodes: + raise GraphArgumentException(f"SWHID not found: {swhid}") + + def stats(self) -> Dict: + return { + "counts": { + "nodes": len(self.graph.nodes), + "edges": sum(map(len, self.graph.forward_edges.values())), + }, + "ratios": { + "compression": 1.0, + "bits_per_edge": 100.0, + "bits_per_node": 100.0, + "avg_locality": 0.0, + }, + "indegree": { + "min": min(map(len, self.graph.backward_edges.values())), + "max": max(map(len, self.graph.backward_edges.values())), + "avg": statistics.mean(map(len, self.graph.backward_edges.values())), + }, + "outdegree": { + "min": min(map(len, self.graph.forward_edges.values())), + "max": max(map(len, self.graph.forward_edges.values())), + "avg": statistics.mean(map(len, self.graph.forward_edges.values())), + }, + } + + @check_arguments + def leaves( + self, + src: str, + edges: str = "*", + direction: str = "forward", + max_edges: int = 0, + return_types: str = "*", + ) -> Iterator[str]: + # TODO: max_edges + yield from filter_node_types( + return_types, + [ + node + for node in self.graph.get_subgraph(src, edges, direction) + if not self.graph.get_filtered_neighbors(node, edges, direction) + ], + ) + + @check_arguments + def neighbors( + self, + src: str, + edges: str = "*", + direction: str = "forward", + max_edges: int = 0, + return_types: str = "*", + ) -> Iterator[str]: + # TODO: max_edges + yield from filter_node_types( + return_types, self.graph.get_filtered_neighbors(src, edges, direction) + ) + + @check_arguments + def visit_nodes( + self, + src: str, + edges: str = "*", + direction: str = "forward", + max_edges: int = 0, + return_types: str = "*", + ) -> Iterator[str]: + # TODO: max_edges + yield from filter_node_types( + return_types, self.graph.get_subgraph(src, edges, direction) + ) + + @check_arguments + def visit_edges( + self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0 + ) -> Iterator[Tuple[str, str]]: + if max_edges == 0: + max_edges = None # type: ignore + else: + max_edges -= 1 + yield from list(self.graph.iter_edges_dfs(direction, edges, src))[:max_edges] + + @check_arguments + def visit_paths( + self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0 + ) -> Iterator[List[str]]: + # TODO: max_edges + for path in self.graph.iter_paths_dfs(direction, edges, src): + if path[-1] in self.leaves(src, edges, direction): + yield list(path) + + @check_arguments + def walk( + self, + src: str, + dst: str, + edges: str = "*", + traversal: str = "dfs", + direction: str = "forward", + limit: Optional[int] = None, + ) -> Iterator[str]: + # TODO: implement algo="bfs" + # TODO: limit + match_path: Callable[[str], bool] + if ":" in dst: + match_path = dst.__eq__ + self._check_swhid(dst) + else: + match_path = lambda node: node.startswith(f"swh:1:{dst}:") # noqa + for path in self.graph.iter_paths_dfs(direction, edges, src): + if match_path(path[-1]): + if not limit: + # 0 or None + yield from path + elif limit > 0: + yield from path[0:limit] + else: + yield from path[limit:] + + @check_arguments + def random_walk( + self, + src: str, + dst: str, + edges: str = "*", + direction: str = "forward", + limit: Optional[int] = None, + ): + # TODO: limit + yield from self.walk(src, dst, edges, "dfs", direction, limit) + + @check_arguments + def count_leaves( + self, src: str, edges: str = "*", direction: str = "forward" + ) -> int: + return len(list(self.leaves(src, edges, direction))) + + @check_arguments + def count_neighbors( + self, src: str, edges: str = "*", direction: str = "forward" + ) -> int: + return len(self.graph.get_filtered_neighbors(src, edges, direction)) + + @check_arguments + def count_visit_nodes( + self, src: str, edges: str = "*", direction: str = "forward" + ) -> int: + return len(self.graph.get_subgraph(src, edges, direction)) + + +class Graph: + def __init__(self, nodes: List[str], edges: List[Tuple[str, str]]): + self.nodes = nodes + self.forward_edges: Dict[str, List[str]] = {} + self.backward_edges: Dict[str, List[str]] = {} + for node in nodes: + self.forward_edges[node] = [] + self.backward_edges[node] = [] + for (src, dst) in edges: + self.forward_edges[src].append(dst) + self.backward_edges[dst].append(src) + + def get_filtered_neighbors( + self, src: str, edges_fmt: str, direction: str, + ) -> Set[str]: + if direction == "forward": + edges = self.forward_edges + elif direction == "backward": + edges = self.backward_edges + else: + raise GraphArgumentException(f"invalid direction: {direction}") + + neighbors = edges.get(src, []) + + if edges_fmt == "*": + return set(neighbors) + else: + filtered_neighbors: Set[str] = set() + for edges_fmt_item in edges_fmt.split(","): + (src_fmt, dst_fmt) = edges_fmt_item.split(":") + if src_fmt != "*" and not src.startswith(f"swh:1:{src_fmt}:"): + continue + if dst_fmt == "*": + filtered_neighbors.update(neighbors) + else: + prefix = f"swh:1:{dst_fmt}:" + filtered_neighbors.update( + n for n in neighbors if n.startswith(prefix) + ) + return filtered_neighbors + + def get_subgraph(self, src: str, edges_fmt: str, direction: str) -> Set[str]: + seen = set() + to_visit = {src} + while to_visit: + node = to_visit.pop() + seen.add(node) + neighbors = set(self.get_filtered_neighbors(node, edges_fmt, direction)) + new_nodes = neighbors - seen + to_visit.update(new_nodes) + + return seen + + def iter_paths_dfs( + self, direction: str, edges_fmt: str, src: str + ) -> Iterator[Tuple[str, ...]]: + for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src): + yield path + (node,) + + def iter_edges_dfs( + self, direction: str, edges_fmt: str, src: str + ) -> Iterator[Tuple[str, str]]: + for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src): + if len(path) > 0: + yield (path[-1], node) + + +class SubgraphIterator(Iterator[Tuple[Tuple[str, ...], str]]): + def __init__(self, graph: Graph, direction: str, edges_fmt: str, src: str): + self.graph = graph + self.direction = direction + self.edges_fmt = edges_fmt + self.seen: Set[str] = set() + self.src = src + + def more_work(self) -> bool: + raise NotImplementedError() + + def pop(self) -> Tuple[Tuple[str, ...], str]: + raise NotImplementedError() + + def push(self, new_path: Tuple[str, ...], neighbor: str) -> None: + raise NotImplementedError() + + def __next__(self) -> Tuple[Tuple[str, ...], str]: + # Stores (path, next_node) + if not self.more_work(): + raise StopIteration() + + (path, node) = self.pop() + + new_path = path + (node,) + + if node not in self.seen: + neighbors = self.graph.get_filtered_neighbors( + node, self.edges_fmt, self.direction + ) + + # We want to visit the first neighbor first, and to_visit is a stack; + # so we need to reversed() the list of neighbors to get it on top + # of the stack. + for neighbor in reversed(list(neighbors)): + self.push(new_path, neighbor) + + self.seen.add(node) + return (path, node) + + +class DfsSubgraphIterator(SubgraphIterator): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.to_visit: List[Tuple[Tuple[str, ...], str]] = [((), self.src)] + + def more_work(self) -> bool: + return bool(self.to_visit) + + def pop(self) -> Tuple[Tuple[str, ...], str]: + return self.to_visit.pop() + + def push(self, new_path: Tuple[str, ...], neighbor: str) -> None: + self.to_visit.append((new_path, neighbor)) diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py index 497062e..e66a789 100644 --- a/swh/graph/tests/conftest.py +++ b/swh/graph/tests/conftest.py @@ -1,51 +1,65 @@ +# Copyright (C) 2019-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import csv import multiprocessing from pathlib import Path from aiohttp.test_utils import TestClient, TestServer, loop_context import pytest from swh.graph.backend import Backend from swh.graph.client import RemoteGraphClient from swh.graph.graph import load as graph_load +from swh.graph.naive_client import NaiveClient from swh.graph.server.app import make_app SWH_GRAPH_TESTS_ROOT = Path(__file__).parents[0] TEST_GRAPH_PATH = SWH_GRAPH_TESTS_ROOT / "dataset/output/example" class GraphServerProcess(multiprocessing.Process): def __init__(self, q, *args, **kwargs): self.q = q super().__init__(*args, **kwargs) def run(self): try: backend = Backend(graph_path=str(TEST_GRAPH_PATH)) with backend: with loop_context() as loop: app = make_app(backend=backend, debug=True) client = TestClient(TestServer(app), loop=loop) loop.run_until_complete(client.start_server()) url = client.make_url("/graph/") self.q.put(url) loop.run_forever() except Exception as e: self.q.put(e) -@pytest.fixture(scope="module") -def graph_client(): - queue = multiprocessing.Queue() - server = GraphServerProcess(queue) - server.start() - res = queue.get() - if isinstance(res, Exception): - raise res - yield RemoteGraphClient(str(res)) - server.terminate() +@pytest.fixture(scope="module", params=["remote", "naive"]) +def graph_client(request): + if request.param == "remote": + queue = multiprocessing.Queue() + server = GraphServerProcess(queue) + server.start() + res = queue.get() + if isinstance(res, Exception): + raise res + yield RemoteGraphClient(str(res)) + server.terminate() + else: + with open(SWH_GRAPH_TESTS_ROOT / "dataset/example.nodes.csv") as fd: + nodes = [node for (node,) in csv.reader(fd, delimiter=" ")] + with open(SWH_GRAPH_TESTS_ROOT / "dataset/example.edges.csv") as fd: + edges = list(csv.reader(fd, delimiter=" ")) + yield NaiveClient(nodes=nodes, edges=edges) @pytest.fixture(scope="module") def graph(): with graph_load(str(TEST_GRAPH_PATH)) as g: yield g diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py index b3df805..90f9a0a 100644 --- a/swh/graph/tests/test_api_client.py +++ b/swh/graph/tests/test_api_client.py @@ -1,371 +1,375 @@ import pytest from pytest import raises from swh.core.api import RemoteException from swh.graph.client import GraphArgumentException def test_stats(graph_client): stats = graph_client.stats() assert set(stats.keys()) == {"counts", "ratios", "indegree", "outdegree"} assert set(stats["counts"].keys()) == {"nodes", "edges"} assert set(stats["ratios"].keys()) == { "compression", "bits_per_node", "bits_per_edge", "avg_locality", } assert set(stats["indegree"].keys()) == {"min", "max", "avg"} assert set(stats["outdegree"].keys()) == {"min", "max", "avg"} assert stats["counts"]["nodes"] == 21 assert stats["counts"]["edges"] == 23 assert isinstance(stats["ratios"]["compression"], float) assert isinstance(stats["ratios"]["bits_per_node"], float) assert isinstance(stats["ratios"]["bits_per_edge"], float) assert isinstance(stats["ratios"]["avg_locality"], float) assert stats["indegree"]["min"] == 0 assert stats["indegree"]["max"] == 3 assert isinstance(stats["indegree"]["avg"], float) assert stats["outdegree"]["min"] == 0 assert stats["outdegree"]["max"] == 3 assert isinstance(stats["outdegree"]["avg"], float) def test_leaves(graph_client): actual = list( graph_client.leaves("swh:1:ori:0000000000000000000000000000000000000021") ) expected = [ "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:cnt:0000000000000000000000000000000000000005", "swh:1:cnt:0000000000000000000000000000000000000007", ] assert set(actual) == set(expected) def test_neighbors(graph_client): actual = list( graph_client.neighbors( "swh:1:rev:0000000000000000000000000000000000000009", direction="backward" ) ) expected = [ "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000013", ] assert set(actual) == set(expected) def test_visit_nodes(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev", ) ) expected = [ "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ] assert set(actual) == set(expected) def test_visit_nodes_filtered(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", return_types="dir", ) ) expected = [ "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", ] assert set(actual) == set(expected) def test_visit_nodes_filtered_star(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", return_types="*", ) ) expected = [ "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:cnt:0000000000000000000000000000000000000005", ] assert set(actual) == set(expected) def test_visit_edges(graph_client): actual = list( graph_client.visit_edges( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev,rev:dir", ) ) expected = [ ( "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ] assert set(actual) == set(expected) def test_visit_edges_limited(graph_client): actual = list( graph_client.visit_edges( "swh:1:rel:0000000000000000000000000000000000000010", max_edges=4, edges="rel:rev,rev:rev,rev:dir", ) ) expected = [ ( "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ] # As there are four valid answers (up to reordering), we cannot check for # equality. Instead, we check the client returned all edges but one. assert set(actual).issubset(set(expected)) assert len(actual) == 3 def test_visit_edges_diamond_pattern(graph_client): actual = list( graph_client.visit_edges( "swh:1:rev:0000000000000000000000000000000000000009", edges="*", ) ) expected = [ ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ( "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", ), ( "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004", ), ( "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005", ), ] assert set(actual) == set(expected) def test_visit_paths(graph_client): actual = list( graph_client.visit_paths( "swh:1:snp:0000000000000000000000000000000000000020", edges="snp:*,rev:*" ) ) actual = [tuple(path) for path in actual] expected = [ ( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", ), ] assert set(actual) == set(expected) @pytest.mark.skip(reason="currently disabled due to T1969") def test_walk(graph_client): args = ("swh:1:dir:0000000000000000000000000000000000000016", "rel") kwargs = { "edges": "dir:dir,dir:rev,rev:*", "direction": "backward", "traversal": "bfs", } actual = list(graph_client.walk(*args, **kwargs)) expected = [ "swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019", ] assert set(actual) == set(expected) kwargs2 = kwargs.copy() kwargs2["limit"] = -1 actual = list(graph_client.walk(*args, **kwargs2)) expected = ["swh:1:rel:0000000000000000000000000000000000000019"] assert set(actual) == set(expected) kwargs2 = kwargs.copy() kwargs2["limit"] = 2 actual = list(graph_client.walk(*args, **kwargs2)) expected = [ "swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", ] assert set(actual) == set(expected) def test_random_walk(graph_client): """as the walk is random, we test a visit from a cnt node to the only origin in the dataset, and only check the final node of the path (i.e., the origin) """ args = ("swh:1:cnt:0000000000000000000000000000000000000001", "ori") kwargs = {"direction": "backward"} expected_root = "swh:1:ori:0000000000000000000000000000000000000021" actual = list(graph_client.random_walk(*args, **kwargs)) assert len(actual) > 1 # no origin directly links to a content assert actual[0] == args[0] assert actual[-1] == expected_root kwargs2 = kwargs.copy() kwargs2["limit"] = -1 actual = list(graph_client.random_walk(*args, **kwargs2)) assert actual == [expected_root] kwargs2["limit"] = -2 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 2 assert actual[-1] == expected_root kwargs2["limit"] = 3 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 3 def test_count(graph_client): actual = graph_client.count_leaves( "swh:1:ori:0000000000000000000000000000000000000021" ) assert actual == 4 actual = graph_client.count_visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev" ) assert actual == 3 actual = graph_client.count_neighbors( "swh:1:rev:0000000000000000000000000000000000000009", direction="backward" ) assert actual == 3 def test_param_validation(graph_client): with raises(GraphArgumentException) as exc_info: # SWHID not found list(graph_client.leaves("swh:1:ori:fff0000000000000000000000000000000000021")) - assert exc_info.value.response.status_code == 404 + if exc_info.value.response: + assert exc_info.value.response.status_code == 404 with raises(GraphArgumentException) as exc_info: # malformed SWHID list( graph_client.neighbors("swh:1:ori:fff000000zzzzzz0000000000000000000000021") ) - assert exc_info.value.response.status_code == 400 + if exc_info.value.response: + assert exc_info.value.response.status_code == 400 with raises(GraphArgumentException) as exc_info: # malformed edge specificaiton list( graph_client.visit_nodes( "swh:1:dir:0000000000000000000000000000000000000016", edges="dir:notanodetype,dir:rev,rev:*", direction="backward", ) ) - assert exc_info.value.response.status_code == 400 + if exc_info.value.response: + assert exc_info.value.response.status_code == 400 with raises(GraphArgumentException) as exc_info: # malformed direction list( graph_client.visit_nodes( "swh:1:dir:0000000000000000000000000000000000000016", edges="dir:dir,dir:rev,rev:*", direction="notadirection", ) ) - assert exc_info.value.response.status_code == 400 + if exc_info.value.response: + assert exc_info.value.response.status_code == 400 @pytest.mark.skip(reason="currently disabled due to T1969") def test_param_validation_walk(graph_client): """test validation of walk-specific parameters only""" with raises(RemoteException) as exc_info: # malformed traversal order list( graph_client.walk( "swh:1:dir:0000000000000000000000000000000000000016", "rel", edges="dir:dir,dir:rev,rev:*", direction="backward", traversal="notatraversalorder", ) ) assert exc_info.value.response.status_code == 400