Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | |||||
import unittest | import unittest | ||||
from hypothesis import given, strategies | |||||
import xmltodict | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.indexer.codemeta import CODEMETA_KEYS | |||||
from swh.indexer.metadata_dictionary import ( | from swh.indexer.metadata_dictionary import ( | ||||
CROSSWALK_TABLE, MAPPINGS, merge_values) | CROSSWALK_TABLE, MAPPINGS, merge_values) | ||||
from swh.indexer.metadata_detector import ( | from swh.indexer.metadata_detector import ( | ||||
detect_metadata, extract_minimal_metadata_dict | detect_metadata, extract_minimal_metadata_dict | ||||
) | ) | ||||
from swh.indexer.metadata import ( | from swh.indexer.metadata import ( | ||||
ContentMetadataIndexer, RevisionMetadataIndexer | ContentMetadataIndexer, RevisionMetadataIndexer | ||||
) | ) | ||||
from .utils import ( | from .utils import ( | ||||
BASE_TEST_CONFIG, fill_obj_storage, fill_storage | BASE_TEST_CONFIG, fill_obj_storage, fill_storage, | ||||
json_document_strategy | |||||
) | ) | ||||
TRANSLATOR_TOOL = { | TRANSLATOR_TOOL = { | ||||
'name': 'swh-metadata-translator', | 'name': 'swh-metadata-translator', | ||||
'version': '0.0.2', | 'version': '0.0.2', | ||||
'configuration': { | 'configuration': { | ||||
'type': 'local', | 'type': 'local', | ||||
▲ Show 20 Lines • Show All 748 Lines • ▼ Show 20 Lines | def test_compute_metadata_maven_empty_nodes(self): | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'name': 'Maven Default Project', | 'name': 'Maven Default Project', | ||||
'identifier': 'com.mycompany.app', | 'identifier': 'com.mycompany.app', | ||||
'version': '1.2.3', | 'version': '1.2.3', | ||||
'codeRepository': | 'codeRepository': | ||||
'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app', | 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app', | ||||
}) | }) | ||||
raw_content = b""" | |||||
<project> | |||||
<groupId></groupId> | |||||
<version>1.2.3</version> | |||||
</project>""" | |||||
result = self.maven_mapping.translate(raw_content) | |||||
self.assertEqual(result, { | |||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | |||||
'type': 'SoftwareSourceCode', | |||||
'version': '1.2.3', | |||||
}) | |||||
def test_compute_metadata_maven_invalid_licenses(self): | def test_compute_metadata_maven_invalid_licenses(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines | """ | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertEqual(result, { | self.assertEqual(result, { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'name': 'rb-system-with-aliases', | 'name': 'rb-system-with-aliases', | ||||
'description': 'execute system commands with aliases', | 'description': 'execute system commands with aliases', | ||||
}) | }) | ||||
@given(json_document_strategy( | |||||
keys=list(MAPPINGS['NpmMapping'].mapping))) | |||||
def test_npm_adversarial(self, doc): | |||||
raw = json.dumps(doc).encode() | |||||
self.npm_mapping.translate(raw) | |||||
@given(json_document_strategy(keys=CODEMETA_KEYS)) | |||||
def test_codemeta_adversarial(self, doc): | |||||
raw = json.dumps(doc).encode() | |||||
self.codemeta_mapping.translate(raw) | |||||
@given(json_document_strategy( | |||||
keys=list(MAPPINGS['MavenMapping'].mapping))) | |||||
def test_maven_adversarial(self, doc): | |||||
raw = xmltodict.unparse({'project': doc}, pretty=True) | |||||
self.maven_mapping.translate(raw) | |||||
@given(strategies.dictionaries( | |||||
# keys | |||||
strategies.one_of( | |||||
strategies.characters(), | |||||
*map(strategies.just, MAPPINGS['GemspecMapping'].mapping) | |||||
), | |||||
# values | |||||
strategies.recursive( | |||||
strategies.characters(), | |||||
lambda children: strategies.lists(children, 1) | |||||
) | |||||
)) | |||||
def test_gemspec_adversarial(self, doc): | |||||
parts = ['Gem::Specification.new do |s|\n'] | |||||
for (k, v) in doc.items(): | |||||
parts.append(' s.{} = {}\n'.format(k, repr(v))) | |||||
parts.append('end\n') | |||||
self.maven_mapping.translate(''.join(parts)) | |||||
def test_revision_metadata_indexer(self): | def test_revision_metadata_indexer(self): | ||||
metadata_indexer = RevisionMetadataIndexer( | metadata_indexer = RevisionMetadataIndexer( | ||||
config=REVISION_METADATA_CONFIG) | config=REVISION_METADATA_CONFIG) | ||||
fill_obj_storage(metadata_indexer.objstorage) | fill_obj_storage(metadata_indexer.objstorage) | ||||
fill_storage(metadata_indexer.storage) | fill_storage(metadata_indexer.storage) | ||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | tool = metadata_indexer.idx_storage.indexer_configuration_get( | ||||
{'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) | {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) | ||||
▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines |