diff --git a/swh/search/journal_client.py b/swh/search/journal_client.py --- a/swh/search/journal_client.py +++ b/swh/search/journal_client.py @@ -25,7 +25,7 @@ process_origin_visits(messages["origin_visit"], search) if "origin_visit_status" in messages: - process_origin_visits(messages["origin_visit_status"], search) + process_origin_visit_statuses(messages["origin_visit_status"], search) if "origin_intrinsic_metadata" in messages: process_origin_intrinsic_metadata(messages["origin_intrinsic_metadata"], search) @@ -58,12 +58,14 @@ def process_origin_visit_statuses(visit_statuses, search): logging.debug("processing origin visit statuses %r", visit_statuses) - search.origin_update( - [ - {"url": (visit_status["origin"]), "has_visits": True,} - for visit_status in visit_statuses - ] - ) + full_visit_status = [ + {"url": (visit_status["origin"]), "has_visits": True,} + for visit_status in visit_statuses + if visit_status["status"] == "full" + ] + + if full_visit_status: + search.origin_update(full_visit_status) def process_origin_intrinsic_metadata(origin_metadata, search): diff --git a/swh/search/tests/test_journal_client.py b/swh/search/tests/test_journal_client.py --- a/swh/search/tests/test_journal_client.py +++ b/swh/search/tests/test_journal_client.py @@ -38,6 +38,31 @@ ) +def test_journal_client_origin_visit_status_from_journal(): + search_mock = MagicMock() + + worker_fn = functools.partial(process_journal_objects, search=search_mock,) + + worker_fn( + { + "origin_visit_status": [ + {"origin": "http://foobar.baz", "status": "full"} # full visits ok + ] + } + ) + search_mock.origin_update.assert_called_once_with( + [{"url": "http://foobar.baz", "has_visits": True},] + ) + + search_mock.reset_mock() + + # non-full visits are filtered out + worker_fn( + {"origin_visit_status": [{"origin": "http://foobar.baz", "status": "partial"}]} + ) + search_mock.origin_update.assert_not_called() + + def test_journal_client_origin_metadata_from_journal(): search_mock = MagicMock()