Changeset View
Changeset View
Standalone View
Standalone View
swh/search/in_memory.py
Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | def origin_update(self, documents: Iterable[dict]) -> None: | ||||
self._origins[id_].update(document) | self._origins[id_].update(document) | ||||
if id_ not in self._origin_ids: | if id_ not in self._origin_ids: | ||||
self._origin_ids.append(id_) | self._origin_ids.append(id_) | ||||
@remote_api_endpoint('origin/search') | @remote_api_endpoint('origin/search') | ||||
def origin_search( | def origin_search( | ||||
self, *, | self, *, | ||||
url_pattern: str = None, metadata_pattern: str = None, | url_pattern: str = None, metadata_pattern: str = None, | ||||
cursor: str = None, count: int = 50 | scroll_token: str = None, count: int = 50 | ||||
) -> Dict[str, object]: | ) -> Dict[str, object]: | ||||
matches = (self._origins[id_] for id_ in self._origin_ids) | matches = (self._origins[id_] for id_ in self._origin_ids) | ||||
if url_pattern: | if url_pattern: | ||||
tokens = set(self._url_splitter.split(url_pattern)) | tokens = set(self._url_splitter.split(url_pattern)) | ||||
def predicate(match): | def predicate(match): | ||||
missing_tokens = tokens - match['_url_tokens'] | missing_tokens = tokens - match['_url_tokens'] | ||||
Show All 13 Lines | def origin_search( | ||||
raise NotImplementedError( | raise NotImplementedError( | ||||
'Metadata search is not implemented in the in-memory backend.') | 'Metadata search is not implemented in the in-memory backend.') | ||||
if not url_pattern and not metadata_pattern: | if not url_pattern and not metadata_pattern: | ||||
raise ValueError( | raise ValueError( | ||||
'At least one of url_pattern and metadata_pattern ' | 'At least one of url_pattern and metadata_pattern ' | ||||
'must be provided.') | 'must be provided.') | ||||
if cursor: | if scroll_token: | ||||
cursor = msgpack.loads(base64.b64decode(cursor)) | scroll_token = msgpack.loads(base64.b64decode(scroll_token)) | ||||
start_at_index = cursor[b'start_at_index'] | start_at_index = scroll_token[b'start_at_index'] | ||||
else: | else: | ||||
start_at_index = 0 | start_at_index = 0 | ||||
hits = list(itertools.islice( | hits = list(itertools.islice( | ||||
matches, start_at_index, start_at_index+count)) | matches, start_at_index, start_at_index+count)) | ||||
if len(hits) == count: | if len(hits) == count: | ||||
next_cursor = { | next_scroll_token = { | ||||
b'start_at_index': start_at_index+count, | b'start_at_index': start_at_index+count, | ||||
} | } | ||||
next_cursor = base64.b64encode(msgpack.dumps(next_cursor)) | next_scroll_token = base64.b64encode(msgpack.dumps( | ||||
next_scroll_token)) | |||||
else: | else: | ||||
next_cursor = None | next_scroll_token = None | ||||
return { | return { | ||||
'cursor': next_cursor, | 'scroll_token': next_scroll_token, | ||||
'results': [ | 'results': [ | ||||
{'url': hit['url']} | {'url': hit['url']} | ||||
for hit in hits | for hit in hits | ||||
] | ] | ||||
} | } |