D2181.diff
No OneTemporary
Actions

Size

8 KB

Subscribers

None

D2181.diff
View Options

	diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
	--- a/swh/search/elasticsearch.py
	+++ b/swh/search/elasticsearch.py
	@@ -107,25 +107,25 @@
	def origin_search(
	self, *,
	url_pattern: str = None, metadata_pattern: str = None,
	- cursor: str = None, count: int = 50
	+ scroll_token: str = None, count: int = 50
	) -> Dict[str, object]:
	"""Searches for origins matching the `url_pattern`.

	Args:
	url_pattern (str): Part of thr URL to search for
	- cursor (str): `cursor` is opaque value used for pagination.
	+ scroll_token (str): `scroll_token` is an opaque value used for
	+ pagination.
	count (int): number of results to return.

	Returns:
	a dictionary with keys:
	- * `cursor`:
	+ * `scroll_token`:
	opaque value used for fetching more results. `None` if there
	are no more result.
	* `results`:
	list of dictionaries with key:
	* `url`: URL of a matching origin
	"""
	- # TODO: find a better name for "cursor"
	query_clauses = []

	if url_pattern:
	@@ -171,11 +171,11 @@
	{'_id': 'asc'},
	]
	}
	- if cursor:
	+ if scroll_token:
	# TODO: use ElasticSearch's scroll API?
	- cursor = msgpack.loads(base64.b64decode(cursor))
	+ scroll_token = msgpack.loads(base64.b64decode(scroll_token))
	body['search_after'] = \
	- [cursor[b'score'], cursor[b'id'].decode('ascii')]
	+ [scroll_token[b'score'], scroll_token[b'id'].decode('ascii')]

	res = self._backend.search(
	index='origin',
	@@ -187,16 +187,17 @@

	if len(hits) == count:
	last_hit = hits[-1]
	- next_cursor = {
	+ next_scroll_token = {
	b'score': last_hit['_score'],
	b'id': last_hit['_id'],
	}
	- next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
	+ next_scroll_token = base64.b64encode(msgpack.dumps(
	+ next_scroll_token))
	else:
	- next_cursor = None
	+ next_scroll_token = None

	return {
	- 'cursor': next_cursor,
	+ 'scroll_token': next_scroll_token,
	'results': [
	{
	# TODO: also add 'id'?
	diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
	--- a/swh/search/in_memory.py
	+++ b/swh/search/in_memory.py
	@@ -61,7 +61,7 @@
	def origin_search(
	self, *,
	url_pattern: str = None, metadata_pattern: str = None,
	- cursor: str = None, count: int = 50
	+ scroll_token: str = None, count: int = 50
	) -> Dict[str, object]:
	matches = (self._origins[id_] for id_ in self._origin_ids)

	@@ -91,9 +91,9 @@
	'At least one of url_pattern and metadata_pattern '
	'must be provided.')

	- if cursor:
	- cursor = msgpack.loads(base64.b64decode(cursor))
	- start_at_index = cursor[b'start_at_index']
	+ if scroll_token:
	+ scroll_token = msgpack.loads(base64.b64decode(scroll_token))
	+ start_at_index = scroll_token[b'start_at_index']
	else:
	start_at_index = 0

	@@ -101,15 +101,16 @@
	matches, start_at_index, start_at_index+count))

	if len(hits) == count:
	- next_cursor = {
	+ next_scroll_token = {
	b'start_at_index': start_at_index+count,
	}
	- next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
	+ next_scroll_token = base64.b64encode(msgpack.dumps(
	+ next_scroll_token))
	else:
	- next_cursor = None
	+ next_scroll_token = None

	return {
	- 'cursor': next_cursor,
	+ 'scroll_token': next_scroll_token,
	'results': [
	{'url': hit['url']}
	for hit in hits
	diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py
	--- a/swh/search/tests/test_cli.py
	+++ b/swh/search/tests/test_cli.py
	@@ -82,5 +82,5 @@
	assert result.output == expected_output

	results = self.search.origin_search(url_pattern='foobar')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://foobar.baz'}]}
	diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
	--- a/swh/search/tests/test_search.py
	+++ b/swh/search/tests/test_search.py
	@@ -17,20 +17,20 @@
	])

	results = self.search.origin_search(url_pattern='foobar')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://foobar.baz'}]}

	results = self.search.origin_search(url_pattern='barb')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://barbaz.qux'}]}

	# 'bar' is part of 'foobar', but is not the beginning of it
	results = self.search.origin_search(url_pattern='bar')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://barbaz.qux'}]}

	results = self.search.origin_search(url_pattern='barbaz')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://barbaz.qux'}]}

	def test_origin_url_unique_word_prefix_multiple_results(self):
	@@ -41,14 +41,14 @@
	])

	results = self.search.origin_search(url_pattern='qu')
	- assert results['cursor'] is None
	+ assert results['scroll_token'] is None

	results = [res['url'] for res in results['results']]
	expected_results = ['http://qux.quux', 'http://barbaz.qux']
	assert sorted(results) == sorted(expected_results)

	results = self.search.origin_search(url_pattern='qux')
	- assert results['cursor'] is None
	+ assert results['scroll_token'] is None

	results = [res['url'] for res in results['results']]
	expected_results = ['http://barbaz.qux', 'http://qux.quux']
	@@ -77,16 +77,16 @@
	])

	results = self.search.origin_search(metadata_pattern='foo')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin2'}]}

	# ES returns both results, because blahblah
	results = self.search.origin_search(metadata_pattern='foo bar')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin2'}, {'url': 'http://origin3'}]}

	results = self.search.origin_search(metadata_pattern='bar baz')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin3'}, {'url': 'http://origin2'}]}

	def test_origin_intrinsic_metadata_nested(self):
	@@ -112,15 +112,15 @@
	])

	results = self.search.origin_search(metadata_pattern='foo')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin2'}]}

	results = self.search.origin_search(metadata_pattern='foo bar')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin2'}, {'url': 'http://origin3'}]}

	results = self.search.origin_search(metadata_pattern='bar baz')
	- assert results == {'cursor': None, 'results': [
	+ assert results == {'scroll_token': None, 'results': [
	{'url': 'http://origin3'}, {'url': 'http://origin2'}]}

	# TODO: add more tests with more codemeta terms
	diff --git a/swh/search/utils.py b/swh/search/utils.py
	--- a/swh/search/utils.py
	+++ b/swh/search/utils.py
	@@ -5,12 +5,12 @@


	def stream_results(f, args, *kwargs):
	- if 'cursor' in kwargs:
	- raise TypeError('stream_results has no argument "cursor".')
	- cursor = None
	+ if 'scroll_token' in kwargs:
	+ raise TypeError('stream_results has no argument "scroll_token".')
	+ scroll_token = None
	while True:
	- results = f(args, cursor=cursor, *kwargs)
	+ results = f(args, scroll_token=scroll_token, *kwargs)
	yield from results['results']
	- cursor = results['cursor']
	- if cursor is None:
	+ scroll_token = results['scroll_token']
	+ if scroll_token is None:
	break

File Metadata

Mime Type: text/plain
Expires: Nov 18 2024, 6:29 PM (68 w, 1 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3218399

D2181.diffNo OneTemporaryActions

D2181.diffView Options

File Metadata

Event Timeline

D2181.diff
No OneTemporary
Actions

D2181.diff
View Options