diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -484,8 +484,8 @@ class. """ - def run(self, ids, policy_update, - parse_ids=False, next_step=None, **kwargs): + def run(self, ids, policy_update='update-dups', parse_ids=True, + next_step=None, **kwargs): """Given a list of origin ids: - retrieve origins from storage @@ -496,20 +496,18 @@ ids ([Union[int, Tuple[str, bytes]]]): list of origin ids or (type, url) tuples. policy_update (str): either 'update-dups' or 'ignore-dups' to - respectively update duplicates or ignore - them - parse_ids (bool: If `True`, will try to convert `ids` - from a human input to the valid type. + respectively update duplicates (default) + or ignore them next_step (dict): a dict in the form expected by `scheduler.backend.SchedulerBackend.create_tasks` without `next_run`, plus a `result_name` key. + parse_ids (bool): Do we need to parse id or not (default) **kwargs: passed to the `index` method """ if parse_ids: - ids = [ - o.split('+', 1) if ':' in o else int(o) # type+url or id - for o in ids] + ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id + for o in ids] results = [] diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -294,6 +294,7 @@ # run() with the usual order, ie. origin ids first. return super().run(ids=list(origin_head_map), policy_update=policy_update, + parse_ids=False, revisions_metadata=revisions_metadata, origin_head_map=origin_head_map) diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py --- a/swh/indexer/origin_head.py +++ b/swh/indexer/origin_head.py @@ -213,7 +213,7 @@ multiple=True) def main(origins): rev_metadata_indexer = OriginHeadIndexer() - rev_metadata_indexer.run(origins, 'update-dups', parse_ids=True) + rev_metadata_indexer.run(origins) if __name__ == '__main__': diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py --- a/swh/indexer/tests/test_origin_head.py +++ b/swh/indexer/tests/test_origin_head.py @@ -42,8 +42,7 @@ def test_git(self): indexer = OriginHeadTestIndexer() indexer.run( - ['git+https://github.com/SoftwareHeritage/swh-storage'], - 'update-dups', parse_ids=True) + ['git+https://github.com/SoftwareHeritage/swh-storage']) self.assertEqual(indexer.results, [{ 'revision_id': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{' b'\xd7}\xac\xefrm', @@ -52,8 +51,7 @@ def test_ftp(self): indexer = OriginHeadTestIndexer() indexer.run( - ['ftp+rsync://ftp.gnu.org/gnu/3dldf'], - 'update-dups', parse_ids=True) + ['ftp+rsync://ftp.gnu.org/gnu/3dldf']) self.assertEqual(indexer.results, [{ 'revision_id': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee' b'\xcc\x1a\xb4`\x8c\x8by', @@ -63,8 +61,7 @@ indexer = OriginHeadTestIndexer() indexer.run( ['deposit+https://forge.softwareheritage.org/source/' - 'jesuisgpl/'], - 'update-dups', parse_ids=True) + 'jesuisgpl/']) self.assertEqual(indexer.results, [{ 'revision_id': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{' b'\xa6\xe9\x99\xb1\x9e]q\xeb', @@ -73,8 +70,7 @@ def test_pypi(self): indexer = OriginHeadTestIndexer() indexer.run( - ['pypi+https://pypi.org/project/limnoria/'], - 'update-dups', parse_ids=True) + ['pypi+https://pypi.org/project/limnoria/']) self.assertEqual(indexer.results, [{ 'revision_id': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k' b'A\x10\x9d\xc5\xfa2\xf8t', @@ -83,8 +79,7 @@ def test_svn(self): indexer = OriginHeadTestIndexer() indexer.run( - ['svn+http://0-512-md.googlecode.com/svn/'], - 'update-dups', parse_ids=True) + ['svn+http://0-512-md.googlecode.com/svn/']) self.assertEqual(indexer.results, [{ 'revision_id': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8' b'\xc9\xad#.\x1bw=\x18', diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -86,10 +86,7 @@ def test_pipeline(self): indexer = OriginHeadTestIndexer() indexer.scheduler = self.scheduler - indexer.run( - ["git+https://github.com/librariesio/yarn-parser"], - policy_update='update-dups', - parse_ids=True) + indexer.run(["git+https://github.com/librariesio/yarn-parser"]) self.run_ready_tasks() # Run the first task time.sleep(0.1) # Give it time to complete and schedule the 2nd one