diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -107,25 +107,25 @@ def origin_search( self, *, url_pattern: str = None, metadata_pattern: str = None, - cursor: str = None, count: int = 50 + scroll_token: str = None, count: int = 50 ) -> Dict[str, object]: """Searches for origins matching the `url_pattern`. Args: url_pattern (str): Part of thr URL to search for - cursor (str): `cursor` is opaque value used for pagination. + scroll_token (str): `scroll_token` is an opaque value used for + pagination. count (int): number of results to return. Returns: a dictionary with keys: - * `cursor`: + * `scroll_token`: opaque value used for fetching more results. `None` if there are no more result. * `results`: list of dictionaries with key: * `url`: URL of a matching origin """ - # TODO: find a better name for "cursor" query_clauses = [] if url_pattern: @@ -171,11 +171,11 @@ {'_id': 'asc'}, ] } - if cursor: + if scroll_token: # TODO: use ElasticSearch's scroll API? - cursor = msgpack.loads(base64.b64decode(cursor)) + scroll_token = msgpack.loads(base64.b64decode(scroll_token)) body['search_after'] = \ - [cursor[b'score'], cursor[b'id'].decode('ascii')] + [scroll_token[b'score'], scroll_token[b'id'].decode('ascii')] res = self._backend.search( index='origin', @@ -187,16 +187,17 @@ if len(hits) == count: last_hit = hits[-1] - next_cursor = { + next_scroll_token = { b'score': last_hit['_score'], b'id': last_hit['_id'], } - next_cursor = base64.b64encode(msgpack.dumps(next_cursor)) + next_scroll_token = base64.b64encode(msgpack.dumps( + next_scroll_token)) else: - next_cursor = None + next_scroll_token = None return { - 'cursor': next_cursor, + 'scroll_token': next_scroll_token, 'results': [ { # TODO: also add 'id'? diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -61,7 +61,7 @@ def origin_search( self, *, url_pattern: str = None, metadata_pattern: str = None, - cursor: str = None, count: int = 50 + scroll_token: str = None, count: int = 50 ) -> Dict[str, object]: matches = (self._origins[id_] for id_ in self._origin_ids) @@ -91,9 +91,9 @@ 'At least one of url_pattern and metadata_pattern ' 'must be provided.') - if cursor: - cursor = msgpack.loads(base64.b64decode(cursor)) - start_at_index = cursor[b'start_at_index'] + if scroll_token: + scroll_token = msgpack.loads(base64.b64decode(scroll_token)) + start_at_index = scroll_token[b'start_at_index'] else: start_at_index = 0 @@ -101,15 +101,16 @@ matches, start_at_index, start_at_index+count)) if len(hits) == count: - next_cursor = { + next_scroll_token = { b'start_at_index': start_at_index+count, } - next_cursor = base64.b64encode(msgpack.dumps(next_cursor)) + next_scroll_token = base64.b64encode(msgpack.dumps( + next_scroll_token)) else: - next_cursor = None + next_scroll_token = None return { - 'cursor': next_cursor, + 'scroll_token': next_scroll_token, 'results': [ {'url': hit['url']} for hit in hits diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -82,5 +82,5 @@ assert result.output == expected_output results = self.search.origin_search(url_pattern='foobar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://foobar.baz'}]} diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -17,20 +17,20 @@ ]) results = self.search.origin_search(url_pattern='foobar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://foobar.baz'}]} results = self.search.origin_search(url_pattern='barb') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} # 'bar' is part of 'foobar', but is not the beginning of it results = self.search.origin_search(url_pattern='bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} results = self.search.origin_search(url_pattern='barbaz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} def test_origin_url_unique_word_prefix_multiple_results(self): @@ -41,14 +41,14 @@ ]) results = self.search.origin_search(url_pattern='qu') - assert results['cursor'] is None + assert results['scroll_token'] is None results = [res['url'] for res in results['results']] expected_results = ['http://qux.quux', 'http://barbaz.qux'] assert sorted(results) == sorted(expected_results) results = self.search.origin_search(url_pattern='qux') - assert results['cursor'] is None + assert results['scroll_token'] is None results = [res['url'] for res in results['results']] expected_results = ['http://barbaz.qux', 'http://qux.quux'] @@ -77,16 +77,16 @@ ]) results = self.search.origin_search(metadata_pattern='foo') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}]} # ES returns both results, because blahblah results = self.search.origin_search(metadata_pattern='foo bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}, {'url': 'http://origin3'}]} results = self.search.origin_search(metadata_pattern='bar baz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin3'}, {'url': 'http://origin2'}]} def test_origin_intrinsic_metadata_nested(self): @@ -112,15 +112,15 @@ ]) results = self.search.origin_search(metadata_pattern='foo') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}]} results = self.search.origin_search(metadata_pattern='foo bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}, {'url': 'http://origin3'}]} results = self.search.origin_search(metadata_pattern='bar baz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin3'}, {'url': 'http://origin2'}]} # TODO: add more tests with more codemeta terms diff --git a/swh/search/utils.py b/swh/search/utils.py --- a/swh/search/utils.py +++ b/swh/search/utils.py @@ -5,12 +5,12 @@ def stream_results(f, *args, **kwargs): - if 'cursor' in kwargs: - raise TypeError('stream_results has no argument "cursor".') - cursor = None + if 'scroll_token' in kwargs: + raise TypeError('stream_results has no argument "scroll_token".') + scroll_token = None while True: - results = f(*args, cursor=cursor, **kwargs) + results = f(*args, scroll_token=scroll_token, **kwargs) yield from results['results'] - cursor = results['cursor'] - if cursor is None: + scroll_token = results['scroll_token'] + if scroll_token is None: break