diff --git a/requirements-swh.txt b/requirements-swh.txt index f0d623f..987b552 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,2 +1,2 @@ -swh.core[http] >= 0.0.63 +swh.core[http] >= 0.0.79 swh.model diff --git a/swh/graph/client.py b/swh/graph/client.py index 439ca82..af622ab 100644 --- a/swh/graph/client.py +++ b/swh/graph/client.py @@ -1,118 +1,119 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from swh.core.api import RPCClient class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return ('An unexpected error occurred in the Graph backend: {}' .format(self.args)) class RemoteGraphClient(RPCClient): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__( api_exception=GraphAPIError, url=url, timeout=timeout) def raw_verb_lines(self, verb, endpoint, **kwargs): response = self.raw_verb(verb, endpoint, stream=True, **kwargs) + self.check_status(response) for line in response.iter_lines(): yield line.decode().lstrip('\n') def get_lines(self, endpoint, **kwargs): yield from self.raw_verb_lines('get', endpoint, **kwargs) # Web API endpoints def stats(self): return self.get('stats') def leaves(self, src, edges="*", direction="forward"): return self.get_lines( 'leaves/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def neighbors(self, src, edges="*", direction="forward"): return self.get_lines( 'neighbors/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def visit_nodes(self, src, edges="*", direction="forward"): return self.get_lines( 'visit/nodes/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def visit_paths(self, src, edges="*", direction="forward"): def decode_path_wrapper(it): for e in it: yield json.loads(e) return decode_path_wrapper( self.get_lines( 'visit/paths/{}'.format(src), params={ 'edges': edges, 'direction': direction })) def walk(self, src, dst, edges="*", traversal="dfs", direction="forward", last=False): endpoint = 'walk/last/{}/{}' if last else 'walk/{}/{}' return self.get_lines( endpoint.format(src, dst), params={ 'edges': edges, 'traversal': traversal, 'direction': direction }) def random_walk(self, src, dst, edges="*", direction="forward", last=False): endpoint = 'randomwalk/last/{}/{}' if last else 'randomwalk/{}/{}' return self.get_lines( endpoint.format(src, dst), params={ 'edges': edges, 'direction': direction }) def count_leaves(self, src, edges="*", direction="forward"): return self.get( 'leaves/count/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def count_neighbors(self, src, edges="*", direction="forward"): return self.get( 'neighbors/count/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def count_visit_nodes(self, src, edges="*", direction="forward"): return self.get( 'visit/nodes/count/{}'.format(src), params={ 'edges': edges, 'direction': direction }) diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py index f0596bd..1378747 100644 --- a/swh/graph/tests/test_api_client.py +++ b/swh/graph/tests/test_api_client.py @@ -1,155 +1,190 @@ import pytest +from pytest import raises + +from swh.core.api import RemoteException + + def test_stats(graph_client): stats = graph_client.stats() assert set(stats.keys()) == {'counts', 'ratios', 'indegree', 'outdegree'} assert set(stats['counts'].keys()) == {'nodes', 'edges'} assert set(stats['ratios'].keys()) == {'compression', 'bits_per_node', 'bits_per_edge', 'avg_locality'} assert set(stats['indegree'].keys()) == {'min', 'max', 'avg'} assert set(stats['outdegree'].keys()) == {'min', 'max', 'avg'} assert stats['counts']['nodes'] == 21 assert stats['counts']['edges'] == 23 assert isinstance(stats['ratios']['compression'], float) assert isinstance(stats['ratios']['bits_per_node'], float) assert isinstance(stats['ratios']['bits_per_edge'], float) assert isinstance(stats['ratios']['avg_locality'], float) assert stats['indegree']['min'] == 0 assert stats['indegree']['max'] == 3 assert isinstance(stats['indegree']['avg'], float) assert stats['outdegree']['min'] == 0 assert stats['outdegree']['max'] == 3 assert isinstance(stats['outdegree']['avg'], float) def test_leaves(graph_client): actual = list(graph_client.leaves( 'swh:1:ori:0000000000000000000000000000000000000021' )) expected = [ 'swh:1:cnt:0000000000000000000000000000000000000001', 'swh:1:cnt:0000000000000000000000000000000000000004', 'swh:1:cnt:0000000000000000000000000000000000000005', 'swh:1:cnt:0000000000000000000000000000000000000007' ] assert set(actual) == set(expected) def test_neighbors(graph_client): actual = list(graph_client.neighbors( 'swh:1:rev:0000000000000000000000000000000000000009', direction='backward' )) expected = [ 'swh:1:snp:0000000000000000000000000000000000000020', 'swh:1:rel:0000000000000000000000000000000000000010', 'swh:1:rev:0000000000000000000000000000000000000013' ] assert set(actual) == set(expected) def test_visit_nodes(graph_client): actual = list(graph_client.visit_nodes( 'swh:1:rel:0000000000000000000000000000000000000010', edges='rel:rev,rev:rev' )) expected = [ 'swh:1:rel:0000000000000000000000000000000000000010', 'swh:1:rev:0000000000000000000000000000000000000009', 'swh:1:rev:0000000000000000000000000000000000000003' ] assert set(actual) == set(expected) def test_visit_paths(graph_client): actual = list(graph_client.visit_paths( 'swh:1:snp:0000000000000000000000000000000000000020', edges='snp:*,rev:*')) actual = [tuple(path) for path in actual] expected = [ ( 'swh:1:snp:0000000000000000000000000000000000000020', 'swh:1:rev:0000000000000000000000000000000000000009', 'swh:1:rev:0000000000000000000000000000000000000003', 'swh:1:dir:0000000000000000000000000000000000000002' ), ( 'swh:1:snp:0000000000000000000000000000000000000020', 'swh:1:rev:0000000000000000000000000000000000000009', 'swh:1:dir:0000000000000000000000000000000000000008' ), ( 'swh:1:snp:0000000000000000000000000000000000000020', 'swh:1:rel:0000000000000000000000000000000000000010' ) ] assert set(actual) == set(expected) @pytest.mark.skip(reason='currently disabled due to T1969') def test_walk(graph_client): args = ('swh:1:dir:0000000000000000000000000000000000000016', 'rel') kwargs = { 'edges': 'dir:dir,dir:rev,rev:*', 'direction': 'backward', 'traversal': 'bfs', } actual = list(graph_client.walk(*args, **kwargs)) expected = [ 'swh:1:dir:0000000000000000000000000000000000000016', 'swh:1:dir:0000000000000000000000000000000000000017', 'swh:1:rev:0000000000000000000000000000000000000018', 'swh:1:rel:0000000000000000000000000000000000000019' ] assert set(actual) == set(expected) kwargs2 = kwargs.copy() kwargs2['last'] = True actual = list(graph_client.walk(*args, **kwargs2)) expected = [ 'swh:1:rel:0000000000000000000000000000000000000019' ] assert set(actual) == set(expected) def test_random_walk(graph_client): """as the walk is random, we test a visit from a cnt node to the only origin in the dataset, and only check the final node of the path (i.e., the origin) """ args = ('swh:1:cnt:0000000000000000000000000000000000000001', 'ori') kwargs = {'direction': 'backward'} expected_root = 'swh:1:ori:0000000000000000000000000000000000000021' actual = list(graph_client.random_walk(*args, **kwargs)) assert len(actual) > 1 # no origin directly links to a content assert actual[0] == args[0] assert actual[-1] == expected_root kwargs2 = kwargs.copy() kwargs2['last'] = True actual = list(graph_client.random_walk(*args, **kwargs2)) assert actual == [expected_root] def test_count(graph_client): actual = graph_client.count_leaves( 'swh:1:ori:0000000000000000000000000000000000000021' ) assert actual == 4 actual = graph_client.count_visit_nodes( 'swh:1:rel:0000000000000000000000000000000000000010', edges='rel:rev,rev:rev' ) assert actual == 3 actual = graph_client.count_neighbors( 'swh:1:rev:0000000000000000000000000000000000000009', direction='backward' ) assert actual == 3 + + +def test_param_validation(graph_client): + with raises(RemoteException): # PID not found + list(graph_client.leaves( + 'swh:1:ori:fff0000000000000000000000000000000000021')) + with raises(RemoteException): # malformed PID + list(graph_client.neighbors( + 'swh:1:ori:fff000000zzzzzz0000000000000000000000021')) + with raises(RemoteException): # malformed edge specificaiton + list(graph_client.walk( + 'swh:1:dir:0000000000000000000000000000000000000016', 'rel', + edges='dir:notanodetype,dir:rev,rev:*', + direction='backward', + traversal='bfs', + )) + with raises(RemoteException): # malformed direction + list(graph_client.walk( + 'swh:1:dir:0000000000000000000000000000000000000016', 'rel', + edges='dir:dir,dir:rev,rev:*', + direction='notadirection', + traversal='bfs', + )) + with raises(RemoteException): # malformed traversal order + list(graph_client.walk( + 'swh:1:dir:0000000000000000000000000000000000000016', 'rel', + edges='dir:dir,dir:rev,rev:*', + direction='backward', + traversal='notatraversalorder', + ))