diff --git a/swh/search/cli.py b/swh/search/cli.py --- a/swh/search/cli.py +++ b/swh/search/cli.py @@ -55,7 +55,8 @@ to run relevant indexers (currently, only origin) on these new objects.""" client = get_journal_client( - ctx, object_types=['origin'], max_messages=max_messages) + ctx, object_types=['origin', 'origin_visit'], + max_messages=max_messages) search = get_search(**ctx.obj['config']['search']) worker_fn = functools.partial( diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -32,6 +32,14 @@ ''' +class MockedKafkaConsumerWithTopics(MockedKafkaConsumer): + def list_topics(self, timeout=None): + return { + 'swh.journal.objects.origin', + 'swh.journal.objects.origin_visit', + } + + def invoke(catch_exceptions, args, config='', *, elasticsearch_host): runner = CliRunner() with tempfile.NamedTemporaryFile('a', suffix='.yml') as config_fd: @@ -59,7 +67,7 @@ message.topic.return_value = topic message.value.return_value = value - mock_consumer = MockedKafkaConsumer([message]) + mock_consumer = MockedKafkaConsumerWithTopics([message]) with patch('swh.journal.client.Consumer', return_value=mock_consumer): @@ -80,3 +88,42 @@ results = self.search.origin_search(url_pattern='foobar') assert results == {'scroll_token': None, 'results': [ {'url': 'http://foobar.baz'}]} + + results = self.search.origin_search(url_pattern='foobar', + with_visit=True) + assert results == {'scroll_token': None, 'results': []} + + def test__journal_client__origin_visit(self): + """Tests the re-indexing when origin_batch_size*task_batch_size is a + divisor of nb_origins.""" + topic = 'swh.journal.objects.origin_visit' + value = value_to_kafka({ + 'origin': 'http://foobar.baz', + }) + message = MagicMock() + message.error.return_value = None + message.topic.return_value = topic + message.value.return_value = value + + mock_consumer = MockedKafkaConsumerWithTopics([message]) + + with patch('swh.journal.client.Consumer', + return_value=mock_consumer): + result = invoke(False, [ + 'journal-client', 'objects', + '--max-messages', '1', + ], JOURNAL_OBJECTS_CONFIG, + elasticsearch_host=self._elasticsearch_host) + + # Check the output + expected_output = ( + 'Processed 1 messages.\n' + 'Done.\n' + ) + assert result.exit_code == 0, result.output + assert result.output == expected_output + + results = self.search.origin_search(url_pattern='foobar', + with_visit=True) + assert results == {'scroll_token': None, 'results': [ + {'url': 'http://foobar.baz'}]}