Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_origin_metadata.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
import pytest | import pytest | ||||
from swh.indexer.metadata import OriginMetadataIndexer | from swh.indexer.metadata import OriginMetadataIndexer | ||||
from swh.indexer.storage.interface import IndexerStorageInterface | from swh.indexer.storage.interface import IndexerStorageInterface | ||||
from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
DirectoryIntrinsicMetadataRow, | |||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
RevisionIntrinsicMetadataRow, | |||||
) | ) | ||||
from swh.model.model import Origin | from swh.model.model import Origin | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from .test_metadata import TRANSLATOR_TOOL | from .test_metadata import TRANSLATOR_TOOL | ||||
from .utils import REVISION, YARN_PARSER_METADATA | from .utils import DIRECTORY2, YARN_PARSER_METADATA | ||||
@pytest.fixture | @pytest.fixture | ||||
def swh_indexer_config(swh_indexer_config): | def swh_indexer_config(swh_indexer_config): | ||||
"""Override the default configuration to override the tools entry""" | """Override the default configuration to override the tools entry""" | ||||
cfg = copy.deepcopy(swh_indexer_config) | cfg = copy.deepcopy(swh_indexer_config) | ||||
cfg["tools"] = TRANSLATOR_TOOL | cfg["tools"] = TRANSLATOR_TOOL | ||||
return cfg | return cfg | ||||
def test_origin_metadata_indexer( | def test_origin_metadata_indexer_release( | ||||
swh_indexer_config, | |||||
idx_storage: IndexerStorageInterface, | |||||
storage: StorageInterface, | |||||
obj_storage, | |||||
) -> None: | |||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | |||||
origin = "https://npm.example.org/yarn-parser" | |||||
indexer.run([origin]) | |||||
tool = swh_indexer_config["tools"] | |||||
dir_id = DIRECTORY2.id | |||||
dir_metadata = DirectoryIntrinsicMetadataRow( | |||||
id=dir_id, | |||||
tool=tool, | |||||
metadata=YARN_PARSER_METADATA, | |||||
mappings=["npm"], | |||||
) | |||||
origin_metadata = OriginIntrinsicMetadataRow( | |||||
id=origin, | |||||
tool=tool, | |||||
from_directory=dir_id, | |||||
metadata=YARN_PARSER_METADATA, | |||||
mappings=["npm"], | |||||
) | |||||
dir_results = list(idx_storage.directory_intrinsic_metadata_get([dir_id])) | |||||
for dir_result in dir_results: | |||||
assert dir_result.tool | |||||
del dir_result.tool["id"] | |||||
assert dir_results == [dir_metadata] | |||||
orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin])) | |||||
for orig_result in orig_results: | |||||
assert orig_result.tool | |||||
del orig_result.tool["id"] | |||||
assert orig_results == [origin_metadata] | |||||
def test_origin_metadata_indexer_revision( | |||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
indexer.run([origin]) | indexer.run([origin]) | ||||
tool = swh_indexer_config["tools"] | tool = swh_indexer_config["tools"] | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_metadata = RevisionIntrinsicMetadataRow( | dir_metadata = DirectoryIntrinsicMetadataRow( | ||||
id=rev_id, | id=dir_id, | ||||
tool=tool, | tool=tool, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
origin_metadata = OriginIntrinsicMetadataRow( | origin_metadata = OriginIntrinsicMetadataRow( | ||||
id=origin, | id=origin, | ||||
tool=tool, | tool=tool, | ||||
from_revision=rev_id, | from_directory=dir_id, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
rev_results = list(idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
for rev_result in rev_results: | for dir_result in dir_results: | ||||
assert rev_result.tool | assert dir_result.tool | ||||
del rev_result.tool["id"] | del dir_result.tool["id"] | ||||
assert rev_results == [rev_metadata] | assert dir_results == [dir_metadata] | ||||
orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
for orig_result in orig_results: | for orig_result in orig_results: | ||||
assert orig_result.tool | assert orig_result.tool | ||||
del orig_result.tool["id"] | del orig_result.tool["id"] | ||||
assert orig_results == [origin_metadata] | assert orig_results == [origin_metadata] | ||||
def test_origin_metadata_indexer_duplicate_origin( | def test_origin_metadata_indexer_duplicate_origin( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
indexer.storage = storage | indexer.storage = storage | ||||
indexer.idx_storage = idx_storage | indexer.idx_storage = idx_storage | ||||
indexer.run(["https://github.com/librariesio/yarn-parser"]) | indexer.run(["https://github.com/librariesio/yarn-parser"]) | ||||
indexer.run(["https://github.com/librariesio/yarn-parser"] * 2) | indexer.run(["https://github.com/librariesio/yarn-parser"] * 2) | ||||
origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert len(rev_results) == 1 | assert len(dir_results) == 1 | ||||
orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
assert len(orig_results) == 1 | assert len(orig_results) == 1 | ||||
def test_origin_metadata_indexer_missing_head( | def test_origin_metadata_indexer_missing_head( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
Show All 19 Lines | |||||
) -> None: | ) -> None: | ||||
origin1 = "https://example.com" | origin1 = "https://example.com" | ||||
origin2 = "https://github.com/librariesio/yarn-parser" | origin2 = "https://github.com/librariesio/yarn-parser" | ||||
storage.origin_add([Origin(url=origin1)]) | storage.origin_add([Origin(url=origin1)]) | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
indexer.run([origin1, origin2]) | indexer.run([origin1, origin2]) | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert rev_results == [ | assert dir_results == [ | ||||
RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
id=rev_id, | id=dir_id, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
tool=rev_results[0].tool, | tool=dir_results[0].tool, | ||||
) | ) | ||||
] | ] | ||||
orig_results = list( | orig_results = list( | ||||
indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | ||||
) | ) | ||||
for orig_result in orig_results: | for orig_result in orig_results: | ||||
assert orig_results == [ | assert orig_results == [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=origin2, | id=origin2, | ||||
from_revision=rev_id, | from_directory=dir_id, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
tool=orig_results[0].tool, | tool=orig_results[0].tool, | ||||
) | ) | ||||
] | ] | ||||
def test_origin_metadata_indexer_duplicate_revision( | def test_origin_metadata_indexer_duplicate_directory( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
indexer.storage = storage | indexer.storage = storage | ||||
indexer.idx_storage = idx_storage | indexer.idx_storage = idx_storage | ||||
indexer.catch_exceptions = False | indexer.catch_exceptions = False | ||||
origin1 = "https://github.com/librariesio/yarn-parser" | origin1 = "https://github.com/librariesio/yarn-parser" | ||||
origin2 = "https://github.com/librariesio/yarn-parser.git" | origin2 = "https://github.com/librariesio/yarn-parser.git" | ||||
indexer.run([origin1, origin2]) | indexer.run([origin1, origin2]) | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert len(rev_results) == 1 | assert len(dir_results) == 1 | ||||
orig_results = list( | orig_results = list( | ||||
indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) | ||||
) | ) | ||||
assert len(orig_results) == 2 | assert len(orig_results) == 2 | ||||
def test_origin_metadata_indexer_no_metadata_file( | def test_origin_metadata_indexer_no_metadata_file( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): | with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): | ||||
indexer.run([origin]) | indexer.run([origin]) | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert rev_results == [] | assert dir_results == [] | ||||
orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
assert orig_results == [] | assert orig_results == [] | ||||
def test_origin_metadata_indexer_no_metadata( | def test_origin_metadata_indexer_no_metadata( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
with patch( | with patch( | ||||
"swh.indexer.metadata.RevisionMetadataIndexer" | "swh.indexer.metadata.DirectoryMetadataIndexer" | ||||
".translate_revision_intrinsic_metadata", | ".translate_directory_intrinsic_metadata", | ||||
return_value=(["npm"], {"@context": "foo"}), | return_value=(["npm"], {"@context": "foo"}), | ||||
): | ): | ||||
indexer.run([origin]) | indexer.run([origin]) | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert rev_results == [] | assert dir_results == [] | ||||
orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
assert orig_results == [] | assert orig_results == [] | ||||
def test_origin_metadata_indexer_error( | def test_origin_metadata_indexer_error( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
origin = "https://github.com/librariesio/yarn-parser" | origin = "https://github.com/librariesio/yarn-parser" | ||||
with patch( | with patch( | ||||
"swh.indexer.metadata.RevisionMetadataIndexer" | "swh.indexer.metadata.DirectoryMetadataIndexer" | ||||
".translate_revision_intrinsic_metadata", | ".translate_directory_intrinsic_metadata", | ||||
return_value=None, | return_value=None, | ||||
): | ): | ||||
indexer.run([origin]) | indexer.run([origin]) | ||||
rev_id = REVISION.id | dir_id = DIRECTORY2.id | ||||
rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) | dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) | ||||
assert rev_results == [] | assert dir_results == [] | ||||
orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) | ||||
assert orig_results == [] | assert orig_results == [] | ||||
def test_origin_metadata_indexer_unknown_origin( | def test_origin_metadata_indexer_unknown_origin( | ||||
swh_indexer_config, | swh_indexer_config, | ||||
idx_storage: IndexerStorageInterface, | idx_storage: IndexerStorageInterface, | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
obj_storage, | obj_storage, | ||||
) -> None: | ) -> None: | ||||
indexer = OriginMetadataIndexer(config=swh_indexer_config) | indexer = OriginMetadataIndexer(config=swh_indexer_config) | ||||
result = indexer.index_list(["https://unknown.org/foo"]) | result = indexer.index_list([Origin("https://unknown.org/foo")]) | ||||
assert not result | assert not result |