diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -3,8 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import click -import logging from copy import deepcopy from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer @@ -29,10 +27,6 @@ - store result in content_metadata table """ - # Note: This used when the content metadata indexer is used alone - # (not the case for example in the case of the RevisionMetadataIndexer) - CONFIG_BASE_FILENAME = 'indexer/content_metadata' - def filter(self, ids): """Filter out known sha1s and return only missing ones. """ @@ -104,8 +98,6 @@ - store the results for revision """ - CONFIG_BASE_FILENAME = 'indexer/revision_metadata' - ADDITIONAL_CONFIG = { 'tools': ('dict', { 'name': 'swh-metadata-detector', @@ -259,18 +251,14 @@ class OriginMetadataIndexer(OriginIndexer): - CONFIG_BASE_FILENAME = 'indexer/origin_intrinsic_metadata' - - ADDITIONAL_CONFIG = { - 'tools': ('list', []) - } + ADDITIONAL_CONFIG = RevisionMetadataIndexer.ADDITIONAL_CONFIG USE_TOOLS = False - def __init__(self): - super().__init__() - self.origin_head_indexer = OriginHeadIndexer() - self.revision_metadata_indexer = RevisionMetadataIndexer() + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.origin_head_indexer = OriginHeadIndexer(config=config) + self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) def index_list(self, origins): head_rev_ids = [] @@ -320,17 +308,3 @@ self.idx_storage.origin_intrinsic_metadata_add( orig_metadata, conflict_update=conflict_update) - - -@click.command() -@click.option('--revs', '-i', - help='Default sha1_git to lookup', multiple=True) -def main(revs): - _git_sha1s = list(map(hashutil.hash_to_bytes, revs)) - rev_metadata_indexer = RevisionMetadataIndexer() - rev_metadata_indexer.run(_git_sha1s, 'update-dups') - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - main() diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py --- a/swh/indexer/origin_head.py +++ b/swh/indexer/origin_head.py @@ -18,15 +18,7 @@ In git, this is usually the commit pointed to by the 'master' branch.""" - ADDITIONAL_CONFIG = { - 'tools': ('dict', { - 'name': 'origin-metadata', - 'version': '0.0.1', - 'configuration': {}, - }), - } - - CONFIG_BASE_FILENAME = 'indexer/origin_head' + USE_TOOLS = False def persist_index_computations(self, results, policy_update): """Do nothing. The indexer's results are not persistent, they diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -3,49 +3,18 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import pytest - -from unittest.mock import patch - from swh.model.hashutil import hash_to_bytes from swh.indexer.metadata import OriginMetadataIndexer -from .utils import BASE_TEST_CONFIG, YARN_PARSER_METADATA +from .utils import YARN_PARSER_METADATA from .test_metadata import REVISION_METADATA_CONFIG -ORIGIN_HEAD_CONFIG = { - **BASE_TEST_CONFIG, - 'tools': { - 'name': 'origin-metadata', - 'version': '0.0.1', - 'configuration': {}, - }, - 'tasks': { - 'revision_metadata': 'revision_metadata', - 'origin_intrinsic_metadata': 'origin_intrinsic_metadata', - } -} - - -@pytest.fixture -def origin_metadata_indexer(): - prefix = 'swh.indexer.' - suffix = '.parse_config_file' - with patch(prefix + 'metadata.OriginMetadataIndexer' + suffix) as omi, \ - patch(prefix + 'origin_head.OriginHeadIndexer' + suffix) as ohi, \ - patch(prefix + 'metadata.RevisionMetadataIndexer' + suffix) as rmi: - omi.return_value = BASE_TEST_CONFIG - ohi.return_value = ORIGIN_HEAD_CONFIG - rmi.return_value = REVISION_METADATA_CONFIG - yield OriginMetadataIndexer() - - def test_origin_metadata_indexer( - idx_storage, storage, obj_storage, origin_metadata_indexer): + idx_storage, storage, obj_storage): - indexer = OriginMetadataIndexer() + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["git+https://github.com/librariesio/yarn-parser"]) origin = storage.origin_get({ @@ -78,8 +47,8 @@ def test_origin_metadata_indexer_duplicate_origin( - idx_storage, storage, obj_storage, origin_metadata_indexer): - indexer = OriginMetadataIndexer() + idx_storage, storage, obj_storage): + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.storage = storage indexer.idx_storage = idx_storage indexer.run(["git+https://github.com/librariesio/yarn-parser"]) @@ -100,14 +69,14 @@ def test_origin_metadata_indexer_missing_head( - idx_storage, storage, obj_storage, origin_metadata_indexer): + idx_storage, storage, obj_storage): storage.origin_add([{ 'type': 'git', 'url': 'https://example.com' }]) - indexer = OriginMetadataIndexer() + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["git+https://example.com"]) origin = storage.origin_get({ @@ -120,14 +89,14 @@ def test_origin_metadata_indexer_partial_missing_head( - idx_storage, storage, obj_storage, origin_metadata_indexer): + idx_storage, storage, obj_storage): storage.origin_add([{ 'type': 'git', 'url': 'https://example.com' }]) - indexer = OriginMetadataIndexer() + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["git+https://example.com", "git+https://github.com/librariesio/yarn-parser"]) @@ -164,8 +133,8 @@ def test_origin_metadata_indexer_duplicate_revision( - idx_storage, storage, obj_storage, origin_metadata_indexer): - indexer = OriginMetadataIndexer() + idx_storage, storage, obj_storage): + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.storage = storage indexer.idx_storage = idx_storage indexer.run(["git+https://github.com/librariesio/yarn-parser",