diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py --- a/swh/indexer/tests/conftest.py +++ b/swh/indexer/tests/conftest.py @@ -1,6 +1,15 @@ -import pytest from datetime import timedelta +from unittest.mock import patch + +import pytest + +from swh.objstorage.objstorage_in_memory import InMemoryObjStorage from swh.scheduler.tests.conftest import * # noqa +from swh.storage.in_memory import Storage + +from swh.indexer.storage.in_memory import IndexerStorage + +from .utils import fill_storage, fill_obj_storage TASK_NAMES = ['revision_metadata', 'origin_intrinsic_metadata'] @@ -21,6 +30,39 @@ return swh_scheduler +@pytest.fixture +def idx_storage(): + """An instance of swh.indexer.storage.in_memory.IndexerStorage that + gets injected into all indexers classes.""" + idx_storage = IndexerStorage() + with patch('swh.indexer.storage.in_memory.IndexerStorage') \ + as idx_storage_mock: + idx_storage_mock.return_value = idx_storage + yield idx_storage + + +@pytest.fixture +def storage(): + """An instance of swh.storage.in_memory.Storage that gets injected + into all indexers classes.""" + storage = Storage() + fill_storage(storage) + with patch('swh.storage.in_memory.Storage') as storage_mock: + storage_mock.return_value = storage + yield storage + + +@pytest.fixture +def obj_storage(): + """An instance of swh.objstorage.objstorage_in_memory.InMemoryObjStorage + that gets injected into all indexers classes.""" + objstorage = InMemoryObjStorage() + fill_obj_storage(objstorage) + with patch.dict('swh.objstorage._STORAGE_CLASSES', + {'memory': lambda: objstorage}): + yield objstorage + + @pytest.fixture(scope='session') def celery_includes(): return [ diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -17,7 +17,8 @@ ) from .utils import ( - BASE_TEST_CONFIG, fill_obj_storage, fill_storage + BASE_TEST_CONFIG, fill_obj_storage, fill_storage, + YARN_PARSER_METADATA ) @@ -1053,24 +1054,7 @@ metadata_indexer.idx_storage.content_metadata_add([{ 'indexer_configuration_id': tool['id'], 'id': b'cde', - 'translated_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'type': 'SoftwareSourceCode', - 'issueTracker': - 'https://github.com/librariesio/yarn-parser/issues', - 'version': '1.0.0', - 'name': 'yarn-parser', - 'author': ['Andrew Nesbitt'], - 'url': - 'https://github.com/librariesio/yarn-parser#readme', - 'processorRequirements': {'node': '7.5'}, - 'license': 'AGPL-3.0', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - 'codeRepository': - 'git+https://github.com/librariesio/yarn-parser.git', - 'description': - 'Tiny web service for parsing yarn.lock files', - } + 'translated_metadata': YARN_PARSER_METADATA, }]) sha1_gits = [ @@ -1084,22 +1068,7 @@ expected_results = [{ 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'tool': TRANSLATOR_TOOL, - 'translated_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'url': - 'https://github.com/librariesio/yarn-parser#readme', - 'codeRepository': - 'git+https://github.com/librariesio/yarn-parser.git', - 'author': ['Andrew Nesbitt'], - 'license': 'AGPL-3.0', - 'version': '1.0.0', - 'description': - 'Tiny web service for parsing yarn.lock files', - 'issueTracker': - 'https://github.com/librariesio/yarn-parser/issues', - 'name': 'yarn-parser', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - }, + 'translated_metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], }] diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -5,16 +5,13 @@ import pytest -from unittest import mock +from unittest.mock import patch -from swh.objstorage.objstorage_in_memory import InMemoryObjStorage from swh.model.hashutil import hash_to_bytes -from swh.storage.in_memory import Storage from swh.indexer.metadata import OriginMetadataIndexer -from swh.indexer.storage.in_memory import IndexerStorage -from .utils import fill_storage, fill_obj_storage, BASE_TEST_CONFIG +from .utils import BASE_TEST_CONFIG, YARN_PARSER_METADATA from .test_metadata import REVISION_METADATA_CONFIG @@ -32,74 +29,39 @@ } -@pytest.mark.db -@mock.patch('swh.indexer.metadata.RevisionMetadataIndexer.parse_config_file') -@mock.patch('swh.indexer.origin_head.OriginHeadIndexer.parse_config_file') -@mock.patch('swh.indexer.storage.in_memory.IndexerStorage') -@mock.patch('swh.storage.in_memory.Storage') -def test_full_origin_metadata_indexer( - storage_mock, idx_storage_mock, origin_head_parse_config, - revision_metadata_parse_config): - # Always returns the same instance of the idx storage, because - # this function is called by each of the three indexers. - objstorage = InMemoryObjStorage() - storage = Storage() - idx_storage = IndexerStorage() - - origin_head_parse_config.return_value = ORIGIN_HEAD_CONFIG - revision_metadata_parse_config.return_value = REVISION_METADATA_CONFIG - storage_mock.return_value = storage - idx_storage_mock.return_value = idx_storage - - fill_obj_storage(objstorage) - fill_storage(storage) - - # TODO: find a better way to share the ContentMetadataIndexer use - # the same objstorage instance. - import swh.objstorage - old_inmem_objstorage = swh.objstorage._STORAGE_CLASSES['memory'] - swh.objstorage._STORAGE_CLASSES['memory'] = lambda: objstorage - try: - indexer = OriginMetadataIndexer() - indexer.storage = storage - indexer.idx_storage = idx_storage - indexer.run(["git+https://github.com/librariesio/yarn-parser"]) - finally: - swh.objstorage._STORAGE_CLASSES['memory'] = old_inmem_objstorage +@pytest.fixture +def origin_metadata_indexer(): + prefix = 'swh.indexer.' + suffix = '.parse_config_file' + with patch(prefix + 'metadata.OriginMetadataIndexer' + suffix) as omi, \ + patch(prefix + 'origin_head.OriginHeadIndexer' + suffix) as ohi, \ + patch(prefix + 'metadata.RevisionMetadataIndexer' + suffix) as rmi: + omi.return_value = BASE_TEST_CONFIG + ohi.return_value = ORIGIN_HEAD_CONFIG + rmi.return_value = REVISION_METADATA_CONFIG + yield OriginMetadataIndexer() + + +def test_origin_metadata_indexer( + idx_storage, storage, obj_storage, origin_metadata_indexer): + + indexer = OriginMetadataIndexer() + indexer.run(["git+https://github.com/librariesio/yarn-parser"]) origin = storage.origin_get({ 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser'}) rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') - metadata = { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'url': - 'https://github.com/librariesio/yarn-parser#readme', - 'codeRepository': - 'git+git+https://github.com/librariesio/yarn-parser.git', - 'author': [{ - 'type': 'Person', - 'name': 'Andrew Nesbitt' - }], - 'license': 'https://spdx.org/licenses/AGPL-3.0', - 'version': '1.0.0', - 'description': - 'Tiny web service for parsing yarn.lock files', - 'issueTracker': - 'https://github.com/librariesio/yarn-parser/issues', - 'name': 'yarn-parser', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - } rev_metadata = { 'id': rev_id, - 'translated_metadata': metadata, + 'translated_metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } origin_metadata = { 'origin_id': origin['id'], 'from_revision': rev_id, - 'metadata': metadata, + 'metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -374,6 +374,27 @@ } +YARN_PARSER_METADATA = { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codeRepository': + 'git+git+https://github.com/librariesio/yarn-parser.git', + 'author': [{ + 'type': 'Person', + 'name': 'Andrew Nesbitt' + }], + 'license': 'https://spdx.org/licenses/AGPL-3.0', + 'version': '1.0.0', + 'description': + 'Tiny web service for parsing yarn.lock files', + 'issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'name': 'yarn-parser', + 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], +} + + def filter_dict(d, keys): 'return a copy of the dict with keys deleted' if not isinstance(keys, (list, tuple)):