Differential D2181 Diff 7461 swh/search/in_memory.py

Changeset View

Standalone View

swh/search/in_memory.py

Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	def origin_update(self, documents: Iterable[dict]) -> None:
self._origins[id_].update(document)		self._origins[id_].update(document)
if id_ not in self._origin_ids:		if id_ not in self._origin_ids:
self._origin_ids.append(id_)		self._origin_ids.append(id_)

@remote_api_endpoint('origin/search')		@remote_api_endpoint('origin/search')
def origin_search(		def origin_search(
self, *,		self, *,
url_pattern: str = None, metadata_pattern: str = None,		url_pattern: str = None, metadata_pattern: str = None,
cursor: str = None, count: int = 50		scroll_token: str = None, count: int = 50
) -> Dict[str, object]:		) -> Dict[str, object]:
matches = (self._origins[id_] for id_ in self._origin_ids)		matches = (self._origins[id_] for id_ in self._origin_ids)

if url_pattern:		if url_pattern:
tokens = set(self._url_splitter.split(url_pattern))		tokens = set(self._url_splitter.split(url_pattern))

def predicate(match):		def predicate(match):
missing_tokens = tokens - match['_url_tokens']		missing_tokens = tokens - match['_url_tokens']
Show All 13 Lines	def origin_search(
raise NotImplementedError(		raise NotImplementedError(
'Metadata search is not implemented in the in-memory backend.')		'Metadata search is not implemented in the in-memory backend.')

if not url_pattern and not metadata_pattern:		if not url_pattern and not metadata_pattern:
raise ValueError(		raise ValueError(
'At least one of url_pattern and metadata_pattern '		'At least one of url_pattern and metadata_pattern '
'must be provided.')		'must be provided.')

if cursor:		if scroll_token:
cursor = msgpack.loads(base64.b64decode(cursor))		scroll_token = msgpack.loads(base64.b64decode(scroll_token))
start_at_index = cursor[b'start_at_index']		start_at_index = scroll_token[b'start_at_index']
else:		else:
start_at_index = 0		start_at_index = 0

hits = list(itertools.islice(		hits = list(itertools.islice(
matches, start_at_index, start_at_index+count))		matches, start_at_index, start_at_index+count))

if len(hits) == count:		if len(hits) == count:
next_cursor = {		next_scroll_token = {
b'start_at_index': start_at_index+count,		b'start_at_index': start_at_index+count,
}		}
next_cursor = base64.b64encode(msgpack.dumps(next_cursor))		next_scroll_token = base64.b64encode(msgpack.dumps(
		next_scroll_token))
else:		else:
next_cursor = None		next_scroll_token = None

return {		return {
'cursor': next_cursor,		'scroll_token': next_scroll_token,
'results': [		'results': [
{'url': hit['url']}		{'url': hit['url']}
for hit in hits		for hit in hits
]		]
}		}