diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -61,8 +61,8 @@ document["_url_tokens"] = set(self._url_splitter.split(document["url"])) if "visit_types" in document: document["visit_types"] = set(document["visit_types"]) - if "visit_types" in self._origins[id_]: - document["visit_types"].update(self._origins[id_]["visit_types"]) + if "visit_types" in self._origins[id_]: + document["visit_types"].update(self._origins[id_]["visit_types"]) self._origins[id_].update(document) if id_ not in self._origin_ids: diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -184,6 +184,24 @@ ] ) + def test_origin_update_with_no_visit_types(self): + """ + Update an origin with visit types first then with no visit types, + check origin can still be searched with visit types afterwards. + """ + origin_url = "http://foobar.baz" + self.search.origin_update([{"url": origin_url, "visit_types": ["git"]}]) + self.search.flush() + + self.search.origin_update([{"url": origin_url}]) + self.search.flush() + + actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"]) + assert actual_page.next_page_token is None + results = [r["url"] for r in actual_page.results] + expected_results = [origin_url] + assert results == expected_results + def test_origin_intrinsic_metadata_description(self): origin1_nothin = {"url": "http://origin1"} origin2_foobar = {"url": "http://origin2"}