Changeset View
Changeset View
Standalone View
Standalone View
swh/search/in_memory.py
Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | class InMemorySearch: | ||||
def origin_update(self, documents: Iterable[Dict]) -> None: | def origin_update(self, documents: Iterable[Dict]) -> None: | ||||
for document in documents: | for document in documents: | ||||
document = document.copy() | document = document.copy() | ||||
id_ = origin_identifier(document) | id_ = origin_identifier(document) | ||||
if "url" in document: | if "url" in document: | ||||
document["_url_tokens"] = set(self._url_splitter.split(document["url"])) | document["_url_tokens"] = set(self._url_splitter.split(document["url"])) | ||||
if "visit_types" in document: | if "visit_types" in document: | ||||
document["visit_types"] = set(document["visit_types"]) | document["visit_types"] = set(document["visit_types"]) | ||||
if "visit_types" in self._origins[id_]: | if "visit_types" in self._origins[id_]: | ||||
document["visit_types"].update(self._origins[id_]["visit_types"]) | document["visit_types"].update(self._origins[id_]["visit_types"]) | ||||
self._origins[id_].update(document) | self._origins[id_].update(document) | ||||
if id_ not in self._origin_ids: | if id_ not in self._origin_ids: | ||||
self._origin_ids.append(id_) | self._origin_ids.append(id_) | ||||
def origin_search( | def origin_search( | ||||
self, | self, | ||||
*, | *, | ||||
▲ Show 20 Lines • Show All 75 Lines • Show Last 20 Lines |