diff --git a/swh/indexer/journal_client.py b/swh/indexer/journal_client.py --- a/swh/indexer/journal_client.py +++ b/swh/indexer/journal_client.py @@ -26,10 +26,16 @@ visits = [visit for visit in visits if visit['status'] == 'full'] visit_batches = grouper(visits, MAX_ORIGINS_PER_TASK) for visit_batch in visit_batches: + visit_urls = [] + for visit in visit_batch: + if isinstance(visit['origin'], str): + visit_urls.append(visit['origin']) + else: + visit_urls.append(visit['origin']['url']) task_dicts.append(create_task_dict( task_names['origin_metadata'], 'oneshot', - [visit['origin']['url'] for visit in visit_batch], + visit_urls, policy_update='update-dups', )) diff --git a/swh/indexer/tests/test_journal_client.py b/swh/indexer/tests/test_journal_client.py --- a/swh/indexer/tests/test_journal_client.py +++ b/swh/indexer/tests/test_journal_client.py @@ -11,6 +11,35 @@ class JournalClientTest(unittest.TestCase): def testOneOriginVisit(self): + mock_scheduler = Mock() + messages = { + 'origin_visit': [ + { + 'status': 'full', + 'origin': 'file:///dev/zero', + }, + ] + } + process_journal_objects( + messages, scheduler=mock_scheduler, + task_names={'origin_metadata': 'task-name'}) + self.assertTrue(mock_scheduler.create_tasks.called) + call_args = mock_scheduler.create_tasks.call_args + (args, kwargs) = call_args + self.assertEqual(kwargs, {}) + del args[0][0]['next_run'] + self.assertEqual(args, ([ + { + 'arguments': { + 'kwargs': {'policy_update': 'update-dups'}, + 'args': (['file:///dev/zero'],) + }, + 'policy': 'oneshot', + 'type': 'task-name' + }, + ],)) + + def testOriginVisitLegacy(self): mock_scheduler = Mock() messages = { 'origin_visit': [ @@ -47,15 +76,11 @@ 'origin_visit': [ { 'status': 'full', - 'origin': { - 'url': 'file:///dev/zero', - } + 'origin': 'file:///dev/zero', }, { 'status': 'full', - 'origin': { - 'url': 'file:///tmp/foobar', - } + 'origin': 'file:///tmp/foobar', }, ] } @@ -85,21 +110,15 @@ 'origin_visit': [ { 'status': 'full', - 'origin': { - 'url': 'file:///dev/zero', - } + 'origin': 'file:///dev/zero', }, { 'status': 'full', - 'origin': { - 'url': 'file:///tmp/foobar', - } + 'origin': 'file:///tmp/foobar', }, { 'status': 'full', - 'origin': { - 'url': 'file:///tmp/spamegg', - } + 'origin': 'file:///tmp/spamegg', }, ] }