Page MenuHomeSoftware Heritage

D5665.id.diff
No OneTemporary

D5665.id.diff

diff --git a/swh/graph/client.py b/swh/graph/client.py
--- a/swh/graph/client.py
+++ b/swh/graph/client.py
@@ -19,7 +19,7 @@
class GraphArgumentException(Exception):
- def __init__(self, *args, response):
+ def __init__(self, *args, response=None):
super().__init__(*args)
self.response = response
diff --git a/swh/graph/naive_client.py b/swh/graph/naive_client.py
new file mode 100644
--- /dev/null
+++ b/swh/graph/naive_client.py
@@ -0,0 +1,369 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import functools
+import inspect
+import re
+import statistics
+from typing import (
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Set,
+ Tuple,
+ TypeVar,
+)
+
+from swh.model.identifiers import ExtendedSWHID, ValidationError
+
+from .client import GraphArgumentException
+
+_NODE_TYPES = "ori|snp|rel|rev|dir|cnt"
+NODES_RE = re.compile(fr"(\*|{_NODE_TYPES})")
+EDGES_RE = re.compile(fr"(\*|{_NODE_TYPES}):(\*|{_NODE_TYPES})")
+
+
+T = TypeVar("T", bound=Callable)
+
+
+def check_arguments(f: T) -> T:
+ """Decorator for generic argument checking for methods of NaiveClient.
+ Checks ``src`` is a valid and known SWHID, and ``edges`` has the right format."""
+ signature = inspect.signature(f)
+
+ @functools.wraps(f)
+ def newf(*args, **kwargs):
+ __tracebackhide__ = True # for pytest
+ try:
+ bound_args = signature.bind(*args, **kwargs)
+ except TypeError as e:
+ # rethrow the exception from here so pytest doesn't flood the terminal
+ # with signature.bind's call stack.
+ raise TypeError(*e.args) from None
+ self = bound_args.arguments["self"]
+
+ src = bound_args.arguments.get("src")
+ if src:
+ self._check_swhid(src)
+
+ edges = bound_args.arguments.get("edges")
+ if edges:
+ if edges != "*" and not EDGES_RE.match(edges):
+ raise GraphArgumentException(f"invalid edge restriction: {edges}")
+
+ return_types = bound_args.arguments.get("return_types")
+ if return_types:
+ if not NODES_RE.match(return_types):
+ raise GraphArgumentException(
+ f"invalid return_types restriction: {return_types}"
+ )
+
+ return f(*args, **kwargs)
+
+ return newf # type: ignore
+
+
+def filter_node_types(node_types: str, nodes: Iterable[str]) -> Iterator[str]:
+ if node_types == "*":
+ yield from nodes
+ else:
+ prefixes = tuple(f"swh:1:{type_}:" for type_ in node_types.split(","))
+ for node in nodes:
+ if node.startswith(prefixes):
+ yield node
+
+
+class NaiveClient:
+ """An alternative implementation of :class:`swh.graph.backend.Backend`,
+ written in pure-python and meant for simulating it in other components' test
+ cases.
+
+ It is NOT meant to be efficient in any way; only to be a very simple
+ implementation that provides the same behavior."""
+
+ def __init__(self, *, nodes: List[str], edges: List[Tuple[str, str]]):
+ self.graph = Graph(nodes, edges)
+
+ def _check_swhid(self, swhid):
+ try:
+ ExtendedSWHID.from_string(swhid)
+ except ValidationError as e:
+ raise GraphArgumentException(*e.args) from None
+ if swhid not in self.graph.nodes:
+ raise GraphArgumentException(f"SWHID not found: {swhid}")
+
+ def stats(self) -> Dict:
+ return {
+ "counts": {
+ "nodes": len(self.graph.nodes),
+ "edges": sum(map(len, self.graph.forward_edges.values())),
+ },
+ "ratios": {
+ "compression": 1.0,
+ "bits_per_edge": 100.0,
+ "bits_per_node": 100.0,
+ "avg_locality": 0.0,
+ },
+ "indegree": {
+ "min": min(map(len, self.graph.backward_edges.values())),
+ "max": max(map(len, self.graph.backward_edges.values())),
+ "avg": statistics.mean(map(len, self.graph.backward_edges.values())),
+ },
+ "outdegree": {
+ "min": min(map(len, self.graph.forward_edges.values())),
+ "max": max(map(len, self.graph.forward_edges.values())),
+ "avg": statistics.mean(map(len, self.graph.forward_edges.values())),
+ },
+ }
+
+ @check_arguments
+ def leaves(
+ self,
+ src: str,
+ edges: str = "*",
+ direction: str = "forward",
+ max_edges: int = 0,
+ return_types: str = "*",
+ ) -> Iterator[str]:
+ # TODO: max_edges
+ yield from filter_node_types(
+ return_types,
+ [
+ node
+ for node in self.graph.get_subgraph(src, edges, direction)
+ if not self.graph.get_filtered_neighbors(node, edges, direction)
+ ],
+ )
+
+ @check_arguments
+ def neighbors(
+ self,
+ src: str,
+ edges: str = "*",
+ direction: str = "forward",
+ max_edges: int = 0,
+ return_types: str = "*",
+ ) -> Iterator[str]:
+ # TODO: max_edges
+ yield from filter_node_types(
+ return_types, self.graph.get_filtered_neighbors(src, edges, direction)
+ )
+
+ @check_arguments
+ def visit_nodes(
+ self,
+ src: str,
+ edges: str = "*",
+ direction: str = "forward",
+ max_edges: int = 0,
+ return_types: str = "*",
+ ) -> Iterator[str]:
+ # TODO: max_edges
+ yield from filter_node_types(
+ return_types, self.graph.get_subgraph(src, edges, direction)
+ )
+
+ @check_arguments
+ def visit_edges(
+ self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0
+ ) -> Iterator[Tuple[str, str]]:
+ if max_edges == 0:
+ max_edges = None # type: ignore
+ else:
+ max_edges -= 1
+ yield from list(self.graph.iter_edges_dfs(direction, edges, src))[:max_edges]
+
+ @check_arguments
+ def visit_paths(
+ self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0
+ ) -> Iterator[List[str]]:
+ # TODO: max_edges
+ for path in self.graph.iter_paths_dfs(direction, edges, src):
+ if path[-1] in self.leaves(src, edges, direction):
+ yield list(path)
+
+ @check_arguments
+ def walk(
+ self,
+ src: str,
+ dst: str,
+ edges: str = "*",
+ traversal: str = "dfs",
+ direction: str = "forward",
+ limit: Optional[int] = None,
+ ) -> Iterator[str]:
+ # TODO: implement algo="bfs"
+ # TODO: limit
+ match_path: Callable[[str], bool]
+ if ":" in dst:
+ match_path = dst.__eq__
+ self._check_swhid(dst)
+ else:
+ match_path = lambda node: node.startswith(f"swh:1:{dst}:") # noqa
+ for path in self.graph.iter_paths_dfs(direction, edges, src):
+ if match_path(path[-1]):
+ if not limit:
+ # 0 or None
+ yield from path
+ elif limit > 0:
+ yield from path[0:limit]
+ else:
+ yield from path[limit:]
+
+ @check_arguments
+ def random_walk(
+ self,
+ src: str,
+ dst: str,
+ edges: str = "*",
+ direction: str = "forward",
+ limit: Optional[int] = None,
+ ):
+ # TODO: limit
+ yield from self.walk(src, dst, edges, "dfs", direction, limit)
+
+ @check_arguments
+ def count_leaves(
+ self, src: str, edges: str = "*", direction: str = "forward"
+ ) -> int:
+ return len(list(self.leaves(src, edges, direction)))
+
+ @check_arguments
+ def count_neighbors(
+ self, src: str, edges: str = "*", direction: str = "forward"
+ ) -> int:
+ return len(self.graph.get_filtered_neighbors(src, edges, direction))
+
+ @check_arguments
+ def count_visit_nodes(
+ self, src: str, edges: str = "*", direction: str = "forward"
+ ) -> int:
+ return len(self.graph.get_subgraph(src, edges, direction))
+
+
+class Graph:
+ def __init__(self, nodes: List[str], edges: List[Tuple[str, str]]):
+ self.nodes = nodes
+ self.forward_edges: Dict[str, List[str]] = {}
+ self.backward_edges: Dict[str, List[str]] = {}
+ for node in nodes:
+ self.forward_edges[node] = []
+ self.backward_edges[node] = []
+ for (src, dst) in edges:
+ self.forward_edges[src].append(dst)
+ self.backward_edges[dst].append(src)
+
+ def get_filtered_neighbors(
+ self, src: str, edges_fmt: str, direction: str,
+ ) -> Set[str]:
+ if direction == "forward":
+ edges = self.forward_edges
+ elif direction == "backward":
+ edges = self.backward_edges
+ else:
+ raise GraphArgumentException(f"invalid direction: {direction}")
+
+ neighbors = edges.get(src, [])
+
+ if edges_fmt == "*":
+ return set(neighbors)
+ else:
+ filtered_neighbors: Set[str] = set()
+ for edges_fmt_item in edges_fmt.split(","):
+ (src_fmt, dst_fmt) = edges_fmt_item.split(":")
+ if src_fmt != "*" and not src.startswith(f"swh:1:{src_fmt}:"):
+ continue
+ if dst_fmt == "*":
+ filtered_neighbors.update(neighbors)
+ else:
+ prefix = f"swh:1:{dst_fmt}:"
+ filtered_neighbors.update(
+ n for n in neighbors if n.startswith(prefix)
+ )
+ return filtered_neighbors
+
+ def get_subgraph(self, src: str, edges_fmt: str, direction: str) -> Set[str]:
+ seen = set()
+ to_visit = {src}
+ while to_visit:
+ node = to_visit.pop()
+ seen.add(node)
+ neighbors = set(self.get_filtered_neighbors(node, edges_fmt, direction))
+ new_nodes = neighbors - seen
+ to_visit.update(new_nodes)
+
+ return seen
+
+ def iter_paths_dfs(
+ self, direction: str, edges_fmt: str, src: str
+ ) -> Iterator[Tuple[str, ...]]:
+ for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src):
+ yield path + (node,)
+
+ def iter_edges_dfs(
+ self, direction: str, edges_fmt: str, src: str
+ ) -> Iterator[Tuple[str, str]]:
+ for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src):
+ if len(path) > 0:
+ yield (path[-1], node)
+
+
+class SubgraphIterator(Iterator[Tuple[Tuple[str, ...], str]]):
+ def __init__(self, graph: Graph, direction: str, edges_fmt: str, src: str):
+ self.graph = graph
+ self.direction = direction
+ self.edges_fmt = edges_fmt
+ self.seen: Set[str] = set()
+ self.src = src
+
+ def more_work(self) -> bool:
+ raise NotImplementedError()
+
+ def pop(self) -> Tuple[Tuple[str, ...], str]:
+ raise NotImplementedError()
+
+ def push(self, new_path: Tuple[str, ...], neighbor: str) -> None:
+ raise NotImplementedError()
+
+ def __next__(self) -> Tuple[Tuple[str, ...], str]:
+ # Stores (path, next_node)
+ if not self.more_work():
+ raise StopIteration()
+
+ (path, node) = self.pop()
+
+ new_path = path + (node,)
+
+ if node not in self.seen:
+ neighbors = self.graph.get_filtered_neighbors(
+ node, self.edges_fmt, self.direction
+ )
+
+ # We want to visit the first neighbor first, and to_visit is a stack;
+ # so we need to reversed() the list of neighbors to get it on top
+ # of the stack.
+ for neighbor in reversed(list(neighbors)):
+ self.push(new_path, neighbor)
+
+ self.seen.add(node)
+ return (path, node)
+
+
+class DfsSubgraphIterator(SubgraphIterator):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.to_visit: List[Tuple[Tuple[str, ...], str]] = [((), self.src)]
+
+ def more_work(self) -> bool:
+ return bool(self.to_visit)
+
+ def pop(self) -> Tuple[Tuple[str, ...], str]:
+ return self.to_visit.pop()
+
+ def push(self, new_path: Tuple[str, ...], neighbor: str) -> None:
+ self.to_visit.append((new_path, neighbor))
diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py
--- a/swh/graph/tests/conftest.py
+++ b/swh/graph/tests/conftest.py
@@ -1,3 +1,9 @@
+# Copyright (C) 2019-2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import csv
import multiprocessing
from pathlib import Path
@@ -7,6 +13,7 @@
from swh.graph.backend import Backend
from swh.graph.client import RemoteGraphClient
from swh.graph.graph import load as graph_load
+from swh.graph.naive_client import NaiveClient
from swh.graph.server.app import make_app
SWH_GRAPH_TESTS_ROOT = Path(__file__).parents[0]
@@ -33,16 +40,23 @@
self.q.put(e)
-@pytest.fixture(scope="module")
-def graph_client():
- queue = multiprocessing.Queue()
- server = GraphServerProcess(queue)
- server.start()
- res = queue.get()
- if isinstance(res, Exception):
- raise res
- yield RemoteGraphClient(str(res))
- server.terminate()
+@pytest.fixture(scope="module", params=["remote", "naive"])
+def graph_client(request):
+ if request.param == "remote":
+ queue = multiprocessing.Queue()
+ server = GraphServerProcess(queue)
+ server.start()
+ res = queue.get()
+ if isinstance(res, Exception):
+ raise res
+ yield RemoteGraphClient(str(res))
+ server.terminate()
+ else:
+ with open(SWH_GRAPH_TESTS_ROOT / "dataset/example.nodes.csv") as fd:
+ nodes = [node for (node,) in csv.reader(fd, delimiter=" ")]
+ with open(SWH_GRAPH_TESTS_ROOT / "dataset/example.edges.csv") as fd:
+ edges = list(csv.reader(fd, delimiter=" "))
+ yield NaiveClient(nodes=nodes, edges=edges)
@pytest.fixture(scope="module")
diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py
--- a/swh/graph/tests/test_api_client.py
+++ b/swh/graph/tests/test_api_client.py
@@ -326,13 +326,15 @@
def test_param_validation(graph_client):
with raises(GraphArgumentException) as exc_info: # SWHID not found
list(graph_client.leaves("swh:1:ori:fff0000000000000000000000000000000000021"))
- assert exc_info.value.response.status_code == 404
+ if exc_info.value.response:
+ assert exc_info.value.response.status_code == 404
with raises(GraphArgumentException) as exc_info: # malformed SWHID
list(
graph_client.neighbors("swh:1:ori:fff000000zzzzzz0000000000000000000000021")
)
- assert exc_info.value.response.status_code == 400
+ if exc_info.value.response:
+ assert exc_info.value.response.status_code == 400
with raises(GraphArgumentException) as exc_info: # malformed edge specificaiton
list(
@@ -342,7 +344,8 @@
direction="backward",
)
)
- assert exc_info.value.response.status_code == 400
+ if exc_info.value.response:
+ assert exc_info.value.response.status_code == 400
with raises(GraphArgumentException) as exc_info: # malformed direction
list(
@@ -352,7 +355,8 @@
direction="notadirection",
)
)
- assert exc_info.value.response.status_code == 400
+ if exc_info.value.response:
+ assert exc_info.value.response.status_code == 400
@pytest.mark.skip(reason="currently disabled due to T1969")

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 11:24 AM (1 w, 20 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225523

Event Timeline