diff --git a/swh/search/journal_client.py b/swh/search/journal_client.py --- a/swh/search/journal_client.py +++ b/swh/search/journal_client.py @@ -6,9 +6,9 @@ import logging -MAX_ORIGINS_PER_TASK = 100 - -EXPECTED_MESSAGE_TYPES = {'origin', 'origin_intrinsic_metadata'} +EXPECTED_MESSAGE_TYPES = { + 'origin', 'origin_visit', 'origin_intrinsic_metadata', +} def process_journal_objects(messages, *, search): @@ -19,6 +19,9 @@ if 'origin' in messages: process_origins(messages['origin'], search) + if 'origin_visit' in messages: + process_origin_visits(messages['origin_visit'], search) + if 'origin_intrinsic_metadata' in messages: process_origin_intrinsic_metadata( messages['origin_intrinsic_metadata'], search) @@ -30,6 +33,18 @@ search.origin_update(origins) +def process_origin_visits(visits, search): + logging.debug('processing origin visits %r', visits) + + search.origin_update([ + { + 'url': visit['origin']['url'], + 'has_visits': True + } + for visit in visits + ]) + + def process_origin_intrinsic_metadata(origin_metadata, search): logging.debug('processing origin intrinsic_metadata %r', origin_metadata) diff --git a/swh/search/tests/test_journal_client.py b/swh/search/tests/test_journal_client.py --- a/swh/search/tests/test_journal_client.py +++ b/swh/search/tests/test_journal_client.py @@ -37,6 +37,23 @@ {'url': 'http://barbaz.qux'}, ]) + def test_origin_visit_from_journal(self): + search_mock = MagicMock() + + worker_fn = functools.partial( + process_journal_objects, + search=search_mock, + ) + + worker_fn({'origin_visit': [ + { + 'origin': {'url': 'http://foobar.baz'}, + } + ]}) + search_mock.origin_update.assert_called_once_with([ + {'url': 'http://foobar.baz', 'has_visits': True}, + ]) + def test_origin_metadata_from_journal(self): search_mock = MagicMock()