Page MenuHomeSoftware Heritage

D2181.diff
No OneTemporary

D2181.diff

diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -107,25 +107,25 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
- cursor: str = None, count: int = 50
+ scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
"""Searches for origins matching the `url_pattern`.
Args:
url_pattern (str): Part of thr URL to search for
- cursor (str): `cursor` is opaque value used for pagination.
+ scroll_token (str): `scroll_token` is an opaque value used for
+ pagination.
count (int): number of results to return.
Returns:
a dictionary with keys:
- * `cursor`:
+ * `scroll_token`:
opaque value used for fetching more results. `None` if there
are no more result.
* `results`:
list of dictionaries with key:
* `url`: URL of a matching origin
"""
- # TODO: find a better name for "cursor"
query_clauses = []
if url_pattern:
@@ -171,11 +171,11 @@
{'_id': 'asc'},
]
}
- if cursor:
+ if scroll_token:
# TODO: use ElasticSearch's scroll API?
- cursor = msgpack.loads(base64.b64decode(cursor))
+ scroll_token = msgpack.loads(base64.b64decode(scroll_token))
body['search_after'] = \
- [cursor[b'score'], cursor[b'id'].decode('ascii')]
+ [scroll_token[b'score'], scroll_token[b'id'].decode('ascii')]
res = self._backend.search(
index='origin',
@@ -187,16 +187,17 @@
if len(hits) == count:
last_hit = hits[-1]
- next_cursor = {
+ next_scroll_token = {
b'score': last_hit['_score'],
b'id': last_hit['_id'],
}
- next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
+ next_scroll_token = base64.b64encode(msgpack.dumps(
+ next_scroll_token))
else:
- next_cursor = None
+ next_scroll_token = None
return {
- 'cursor': next_cursor,
+ 'scroll_token': next_scroll_token,
'results': [
{
# TODO: also add 'id'?
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -61,7 +61,7 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
- cursor: str = None, count: int = 50
+ scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
matches = (self._origins[id_] for id_ in self._origin_ids)
@@ -91,9 +91,9 @@
'At least one of url_pattern and metadata_pattern '
'must be provided.')
- if cursor:
- cursor = msgpack.loads(base64.b64decode(cursor))
- start_at_index = cursor[b'start_at_index']
+ if scroll_token:
+ scroll_token = msgpack.loads(base64.b64decode(scroll_token))
+ start_at_index = scroll_token[b'start_at_index']
else:
start_at_index = 0
@@ -101,15 +101,16 @@
matches, start_at_index, start_at_index+count))
if len(hits) == count:
- next_cursor = {
+ next_scroll_token = {
b'start_at_index': start_at_index+count,
}
- next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
+ next_scroll_token = base64.b64encode(msgpack.dumps(
+ next_scroll_token))
else:
- next_cursor = None
+ next_scroll_token = None
return {
- 'cursor': next_cursor,
+ 'scroll_token': next_scroll_token,
'results': [
{'url': hit['url']}
for hit in hits
diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py
--- a/swh/search/tests/test_cli.py
+++ b/swh/search/tests/test_cli.py
@@ -82,5 +82,5 @@
assert result.output == expected_output
results = self.search.origin_search(url_pattern='foobar')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://foobar.baz'}]}
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -17,20 +17,20 @@
])
results = self.search.origin_search(url_pattern='foobar')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://foobar.baz'}]}
results = self.search.origin_search(url_pattern='barb')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://barbaz.qux'}]}
# 'bar' is part of 'foobar', but is not the beginning of it
results = self.search.origin_search(url_pattern='bar')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://barbaz.qux'}]}
results = self.search.origin_search(url_pattern='barbaz')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://barbaz.qux'}]}
def test_origin_url_unique_word_prefix_multiple_results(self):
@@ -41,14 +41,14 @@
])
results = self.search.origin_search(url_pattern='qu')
- assert results['cursor'] is None
+ assert results['scroll_token'] is None
results = [res['url'] for res in results['results']]
expected_results = ['http://qux.quux', 'http://barbaz.qux']
assert sorted(results) == sorted(expected_results)
results = self.search.origin_search(url_pattern='qux')
- assert results['cursor'] is None
+ assert results['scroll_token'] is None
results = [res['url'] for res in results['results']]
expected_results = ['http://barbaz.qux', 'http://qux.quux']
@@ -77,16 +77,16 @@
])
results = self.search.origin_search(metadata_pattern='foo')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin2'}]}
# ES returns both results, because blahblah
results = self.search.origin_search(metadata_pattern='foo bar')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin2'}, {'url': 'http://origin3'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin3'}, {'url': 'http://origin2'}]}
def test_origin_intrinsic_metadata_nested(self):
@@ -112,15 +112,15 @@
])
results = self.search.origin_search(metadata_pattern='foo')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin2'}]}
results = self.search.origin_search(metadata_pattern='foo bar')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin2'}, {'url': 'http://origin3'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
- assert results == {'cursor': None, 'results': [
+ assert results == {'scroll_token': None, 'results': [
{'url': 'http://origin3'}, {'url': 'http://origin2'}]}
# TODO: add more tests with more codemeta terms
diff --git a/swh/search/utils.py b/swh/search/utils.py
--- a/swh/search/utils.py
+++ b/swh/search/utils.py
@@ -5,12 +5,12 @@
def stream_results(f, *args, **kwargs):
- if 'cursor' in kwargs:
- raise TypeError('stream_results has no argument "cursor".')
- cursor = None
+ if 'scroll_token' in kwargs:
+ raise TypeError('stream_results has no argument "scroll_token".')
+ scroll_token = None
while True:
- results = f(*args, cursor=cursor, **kwargs)
+ results = f(*args, scroll_token=scroll_token, **kwargs)
yield from results['results']
- cursor = results['cursor']
- if cursor is None:
+ scroll_token = results['scroll_token']
+ if scroll_token is None:
break

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 18, 6:29 PM (17 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218399

Event Timeline