Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2022 The Software Heritage developers | # Copyright (C) 2017-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
from hypothesis import HealthCheck, given, settings, strategies | from hypothesis import HealthCheck, given, settings, strategies | ||||
import pytest | import pytest | ||||
from swh.indexer.codemeta import CODEMETA_TERMS | from swh.indexer.codemeta import CODEMETA_TERMS | ||||
from swh.indexer.metadata import ContentMetadataIndexer, RevisionMetadataIndexer | from swh.indexer.metadata import ContentMetadataIndexer, DirectoryMetadataIndexer | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_dictionary import MAPPINGS | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
from swh.indexer.metadata_dictionary.maven import MavenMapping | from swh.indexer.metadata_dictionary.maven import MavenMapping | ||||
from swh.indexer.metadata_dictionary.npm import NpmMapping | from swh.indexer.metadata_dictionary.npm import NpmMapping | ||||
from swh.indexer.metadata_dictionary.ruby import GemspecMapping | from swh.indexer.metadata_dictionary.ruby import GemspecMapping | ||||
from swh.indexer.storage.model import ContentMetadataRow, RevisionIntrinsicMetadataRow | from swh.indexer.storage.model import ContentMetadataRow, DirectoryIntrinsicMetadataRow | ||||
from swh.indexer.tests.utils import DIRECTORY2, REVISION | from swh.indexer.tests.utils import DIRECTORY2 | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Directory, DirectoryEntry, Revision | from swh.model.model import Directory, DirectoryEntry | ||||
from .utils import ( | from .utils import ( | ||||
BASE_TEST_CONFIG, | BASE_TEST_CONFIG, | ||||
YARN_PARSER_METADATA, | YARN_PARSER_METADATA, | ||||
fill_obj_storage, | fill_obj_storage, | ||||
fill_storage, | fill_storage, | ||||
json_document_strategy, | json_document_strategy, | ||||
xml_document_strategy, | xml_document_strategy, | ||||
) | ) | ||||
TRANSLATOR_TOOL = { | TRANSLATOR_TOOL = { | ||||
"name": "swh-metadata-translator", | "name": "swh-metadata-translator", | ||||
"version": "0.0.2", | "version": "0.0.2", | ||||
"configuration": {"type": "local", "context": "NpmMapping"}, | "configuration": {"type": "local", "context": "NpmMapping"}, | ||||
} | } | ||||
class ContentMetadataTestIndexer(ContentMetadataIndexer): | class ContentMetadataTestIndexer(ContentMetadataIndexer): | ||||
"""Specific Metadata whose configuration is enough to satisfy the | """Specific Metadata whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
def parse_config_file(self, *args, **kwargs): | def parse_config_file(self, *args, **kwargs): | ||||
assert False, "should not be called; the rev indexer configures it." | assert False, "should not be called; the dir indexer configures it." | ||||
REVISION_METADATA_CONFIG = { | DIRECTORY_METADATA_CONFIG = { | ||||
**BASE_TEST_CONFIG, | **BASE_TEST_CONFIG, | ||||
"tools": TRANSLATOR_TOOL, | "tools": TRANSLATOR_TOOL, | ||||
} | } | ||||
class TestMetadata: | class TestMetadata: | ||||
""" | """ | ||||
Tests metadata_mock_tool tool for Metadata detection | Tests metadata_mock_tool tool for Metadata detection | ||||
▲ Show 20 Lines • Show All 1,091 Lines • ▼ Show 20 Lines | """ | ||||
) | ) | ||||
def test_gemspec_adversarial(self, doc): | def test_gemspec_adversarial(self, doc): | ||||
parts = [b"Gem::Specification.new do |s|\n"] | parts = [b"Gem::Specification.new do |s|\n"] | ||||
for (k, v) in doc.items(): | for (k, v) in doc.items(): | ||||
parts.append(" s.{} = {}\n".format(k, repr(v)).encode()) | parts.append(" s.{} = {}\n".format(k, repr(v)).encode()) | ||||
parts.append(b"end\n") | parts.append(b"end\n") | ||||
self.gemspec_mapping.translate(b"".join(parts)) | self.gemspec_mapping.translate(b"".join(parts)) | ||||
def test_revision_metadata_indexer(self): | def test_directory_metadata_indexer(self): | ||||
metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG) | metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) | ||||
fill_obj_storage(metadata_indexer.objstorage) | fill_obj_storage(metadata_indexer.objstorage) | ||||
fill_storage(metadata_indexer.storage) | fill_storage(metadata_indexer.storage) | ||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | tool = metadata_indexer.idx_storage.indexer_configuration_get( | ||||
{f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | ||||
) | ) | ||||
assert tool is not None | assert tool is not None | ||||
rev = REVISION | dir_ = DIRECTORY2 | ||||
assert rev.directory == DIRECTORY2.id | |||||
metadata_indexer.idx_storage.content_metadata_add( | metadata_indexer.idx_storage.content_metadata_add( | ||||
[ | [ | ||||
ContentMetadataRow( | ContentMetadataRow( | ||||
id=DIRECTORY2.entries[0].target, | id=DIRECTORY2.entries[0].target, | ||||
indexer_configuration_id=tool["id"], | indexer_configuration_id=tool["id"], | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
metadata_indexer.run([rev.id]) | metadata_indexer.run([dir_.id]) | ||||
results = list( | results = list( | ||||
metadata_indexer.idx_storage.revision_intrinsic_metadata_get([REVISION.id]) | metadata_indexer.idx_storage.directory_intrinsic_metadata_get( | ||||
[DIRECTORY2.id] | |||||
) | |||||
) | ) | ||||
expected_results = [ | expected_results = [ | ||||
RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
id=rev.id, | id=dir_.id, | ||||
tool=TRANSLATOR_TOOL, | tool=TRANSLATOR_TOOL, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
] | ] | ||||
for result in results: | for result in results: | ||||
del result.tool["id"] | del result.tool["id"] | ||||
# then | # then | ||||
assert results == expected_results | assert results == expected_results | ||||
def test_revision_metadata_indexer_single_root_dir(self): | def test_directory_metadata_indexer_single_root_dir(self): | ||||
metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG) | metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) | ||||
fill_obj_storage(metadata_indexer.objstorage) | fill_obj_storage(metadata_indexer.objstorage) | ||||
fill_storage(metadata_indexer.storage) | fill_storage(metadata_indexer.storage) | ||||
# Add a parent directory, that is the only directory at the root | # Add a parent directory, that is the only directory at the root | ||||
# of the revision | # of the directory | ||||
rev = REVISION | dir_ = DIRECTORY2 | ||||
assert rev.directory == DIRECTORY2.id | |||||
directory = Directory( | new_dir = Directory( | ||||
entries=( | entries=( | ||||
DirectoryEntry( | DirectoryEntry( | ||||
name=b"foobar-1.0.0", | name=b"foobar-1.0.0", | ||||
type="dir", | type="dir", | ||||
target=rev.directory, | target=dir_.id, | ||||
perms=16384, | perms=16384, | ||||
), | ), | ||||
), | ), | ||||
) | ) | ||||
assert directory.id is not None | assert new_dir.id is not None | ||||
metadata_indexer.storage.directory_add([directory]) | metadata_indexer.storage.directory_add([new_dir]) | ||||
new_rev_dict = {**rev.to_dict(), "directory": directory.id} | |||||
new_rev_dict.pop("id") | |||||
new_rev = Revision.from_dict(new_rev_dict) | |||||
metadata_indexer.storage.revision_add([new_rev]) | |||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | tool = metadata_indexer.idx_storage.indexer_configuration_get( | ||||
{f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | ||||
) | ) | ||||
assert tool is not None | assert tool is not None | ||||
metadata_indexer.idx_storage.content_metadata_add( | metadata_indexer.idx_storage.content_metadata_add( | ||||
[ | [ | ||||
ContentMetadataRow( | ContentMetadataRow( | ||||
id=DIRECTORY2.entries[0].target, | id=DIRECTORY2.entries[0].target, | ||||
indexer_configuration_id=tool["id"], | indexer_configuration_id=tool["id"], | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
metadata_indexer.run([new_rev.id]) | metadata_indexer.run([new_dir.id]) | ||||
results = list( | results = list( | ||||
metadata_indexer.idx_storage.revision_intrinsic_metadata_get([new_rev.id]) | metadata_indexer.idx_storage.directory_intrinsic_metadata_get([new_dir.id]) | ||||
) | ) | ||||
expected_results = [ | expected_results = [ | ||||
RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
id=new_rev.id, | id=new_dir.id, | ||||
tool=TRANSLATOR_TOOL, | tool=TRANSLATOR_TOOL, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
] | ] | ||||
for result in results: | for result in results: | ||||
del result.tool["id"] | del result.tool["id"] | ||||
# then | # then | ||||
assert results == expected_results | assert results == expected_results |