Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_origin_metadata.py
| # Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
| # See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
| # License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
| # See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
| import copy | import copy | ||||
| from unittest.mock import patch | from unittest.mock import patch | ||||
| import pytest | import pytest | ||||
| from swh.indexer.metadata import OriginMetadataIndexer | from swh.indexer.metadata import OriginMetadataIndexer | ||||
| from swh.indexer.storage.interface import IndexerStorageInterface | from swh.indexer.storage.interface import IndexerStorageInterface | ||||
| from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
| DirectoryIntrinsicMetadataRow, | |||||
| OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
| RevisionIntrinsicMetadataRow, | |||||
| ) | ) | ||||
| from swh.model.model import Origin | from swh.model.model import Origin | ||||
| from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
| from .test_metadata import TRANSLATOR_TOOL | from .test_metadata import TRANSLATOR_TOOL | ||||
| from .utils import REVISION, YARN_PARSER_METADATA | from .utils import DIRECTORY2, YARN_PARSER_METADATA | ||||
| @pytest.fixture | @pytest.fixture | ||||
| def swh_indexer_config(swh_indexer_config): | def swh_indexer_config(swh_indexer_config): | ||||
| """Override the default configuration to override the tools entry""" | """Override the default configuration to override the tools entry""" | ||||
| cfg = copy.deepcopy(swh_indexer_config) | cfg = copy.deepcopy(swh_indexer_config) | ||||
| cfg["tools"] = TRANSLATOR_TOOL | cfg["tools"] = TRANSLATOR_TOOL | ||||
| return cfg | return cfg | ||||
| def test_origin_metadata_indexer( | def test_origin_metadata_indexer( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
| indexer.run([origin]) | indexer.run([origin]) | ||||
| tool = swh_indexer_config["tools"] | tool = swh_indexer_config["tools"] | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_metadata = RevisionIntrinsicMetadataRow( | dir_metadata = DirectoryIntrinsicMetadataRow( | ||||
| id=rev_id, | id=dir_id, | ||||
| tool=tool, | tool=tool, | ||||
| metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| ) | ) | ||||
| origin_metadata = OriginIntrinsicMetadataRow( | origin_metadata = OriginIntrinsicMetadataRow( | ||||
| id=origin, | id=origin, | ||||
| tool=tool, | tool=tool, | ||||
| from_revision=rev_id, | from_directory=dir_id, | ||||
| metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| ) | ) | ||||
| rev_results = list(idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| for rev_result in rev_results: | for dir_result in dir_results: | ||||
| assert rev_result.tool | assert dir_result.tool | ||||
| del rev_result.tool["id"] | del dir_result.tool["id"] | ||||
| assert rev_results == [rev_metadata] | assert dir_results == [dir_metadata] | ||||
| orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
| for orig_result in orig_results: | for orig_result in orig_results: | ||||
| assert orig_result.tool | assert orig_result.tool | ||||
| del orig_result.tool["id"] | del orig_result.tool["id"] | ||||
| assert orig_results == [origin_metadata] | assert orig_results == [origin_metadata] | ||||
| def test_origin_metadata_indexer_duplicate_origin( | def test_origin_metadata_indexer_duplicate_origin( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| indexer.storage = storage | indexer.storage = storage | ||||
| indexer.idx_storage = idx_storage | indexer.idx_storage = idx_storage | ||||
| indexer.run(["https://github.com/librariesio/yarn-parser"]) | indexer.run(["https://github.com/librariesio/yarn-parser"]) | ||||
| indexer.run(["https://github.com/librariesio/yarn-parser"] * 2) | indexer.run(["https://github.com/librariesio/yarn-parser"] * 2) | ||||
| origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert len(rev_results) == 1 | assert len(dir_results) == 1 | ||||
| orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
| assert len(orig_results) == 1 | assert len(orig_results) == 1 | ||||
| def test_origin_metadata_indexer_missing_head( | def test_origin_metadata_indexer_missing_head( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| Show All 19 Lines | |||||
| ) -> None: | ) -> None: | ||||
| origin1 = "https://example.com" | origin1 = "https://example.com" | ||||
| origin2 = "https://github.com/librariesio/yarn-parser" | origin2 = "https://github.com/librariesio/yarn-parser" | ||||
| storage.origin_add([Origin(url=origin1)]) | storage.origin_add([Origin(url=origin1)]) | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| indexer.run([origin1, origin2]) | indexer.run([origin1, origin2]) | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert rev_results == [ | assert dir_results == [ | ||||
| RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
| id=rev_id, | id=dir_id, | ||||
| metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| tool=rev_results[0].tool, | tool=dir_results[0].tool, | ||||
| ) | ) | ||||
| ] | ] | ||||
| orig_results = list( | orig_results = list( | ||||
| indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | ||||
| ) | ) | ||||
| for orig_result in orig_results: | for orig_result in orig_results: | ||||
| assert orig_results == [ | assert orig_results == [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=origin2, | id=origin2, | ||||
| from_revision=rev_id, | from_directory=dir_id, | ||||
| metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| tool=orig_results[0].tool, | tool=orig_results[0].tool, | ||||
| ) | ) | ||||
| ] | ] | ||||
| def test_origin_metadata_indexer_duplicate_revision( | def test_origin_metadata_indexer_duplicate_directory( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| indexer.storage = storage | indexer.storage = storage | ||||
| indexer.idx_storage = idx_storage | indexer.idx_storage = idx_storage | ||||
| indexer.catch_exceptions = False | indexer.catch_exceptions = False | ||||
| origin1 = "https://github.com/librariesio/yarn-parser" | origin1 = "https://github.com/librariesio/yarn-parser" | ||||
| origin2 = "https://github.com/librariesio/yarn-parser.git" | origin2 = "https://github.com/librariesio/yarn-parser.git" | ||||
| indexer.run([origin1, origin2]) | indexer.run([origin1, origin2]) | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert len(rev_results) == 1 | assert len(dir_results) == 1 | ||||
| orig_results = list( | orig_results = list( | ||||
| indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | ||||
| ) | ) | ||||
| assert len(orig_results) == 2 | assert len(orig_results) == 2 | ||||
| def test_origin_metadata_indexer_no_metadata_file( | def test_origin_metadata_indexer_no_metadata_file( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
| with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): | with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): | ||||
| indexer.run([origin]) | indexer.run([origin]) | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert rev_results == [] | assert dir_results == [] | ||||
| orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
| assert orig_results == [] | assert orig_results == [] | ||||
| def test_origin_metadata_indexer_no_metadata( | def test_origin_metadata_indexer_no_metadata( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
| with patch( | with patch( | ||||
| "swh.indexer.metadata.RevisionMetadataIndexer" | "swh.indexer.metadata.DirectoryMetadataIndexer" | ||||
| ".translate_revision_intrinsic_metadata", | ".translate_directory_intrinsic_metadata", | ||||
| return_value=(["npm"], {"@context": "foo"}), | return_value=(["npm"], {"@context": "foo"}), | ||||
| ): | ): | ||||
| indexer.run([origin]) | indexer.run([origin]) | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert rev_results == [] | assert dir_results == [] | ||||
| orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
| assert orig_results == [] | assert orig_results == [] | ||||
| def test_origin_metadata_indexer_error( | def test_origin_metadata_indexer_error( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
| with patch( | with patch( | ||||
| "swh.indexer.metadata.RevisionMetadataIndexer" | "swh.indexer.metadata.DirectoryMetadataIndexer" | ||||
| ".translate_revision_intrinsic_metadata", | ".translate_directory_intrinsic_metadata", | ||||
| return_value=None, | return_value=None, | ||||
| ): | ): | ||||
| indexer.run([origin]) | indexer.run([origin]) | ||||
| rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
| rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
| assert rev_results == [] | assert dir_results == [] | ||||
| orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
| assert orig_results == [] | assert orig_results == [] | ||||
| def test_origin_metadata_indexer_unknown_origin( | def test_origin_metadata_indexer_unknown_origin( | ||||
| swh_indexer_config, | swh_indexer_config, | ||||
| idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
| storage: StorageInterface, | storage: StorageInterface, | ||||
| obj_storage, | obj_storage, | ||||
| ) -> None: | ) -> None: | ||||
| indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
| result = indexer.index_list([Origin("https://unknown.org/foo")]) | result = indexer.index_list([Origin("https://unknown.org/foo")]) | ||||
| assert not result | assert not result | ||||