Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import unittest | import unittest | ||||
from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS | from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_detector import extract_minimal_metadata_dict | from swh.indexer.metadata_detector import extract_minimal_metadata_dict | ||||
from swh.indexer.metadata import ContentMetadataIndexer | from swh.indexer.metadata import ContentMetadataIndexer | ||||
from swh.indexer.metadata import RevisionMetadataIndexer | from swh.indexer.metadata import RevisionMetadataIndexer | ||||
from swh.indexer.tests.test_utils import MockObjStorage, MockStorage | |||||
from swh.indexer.tests.test_utils import MockIndexerStorage | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from .test_utils import BASE_TEST_CONFIG | from .test_utils import BASE_TEST_CONFIG, fill_obj_storage, fill_storage | ||||
TRANSLATOR_TOOL = { | |||||
'name': 'swh-metadata-translator', | |||||
'version': '0.0.2', | |||||
'configuration': { | |||||
'type': 'local', | |||||
'context': 'NpmMapping' | |||||
} | |||||
} | |||||
class ContentMetadataTestIndexer(ContentMetadataIndexer): | class ContentMetadataTestIndexer(ContentMetadataIndexer): | ||||
"""Specific Metadata whose configuration is enough to satisfy the | """Specific Metadata whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
def parse_config_file(self, *args, **kwargs): | def parse_config_file(self, *args, **kwargs): | ||||
assert False, 'should not be called; the rev indexer configures it.' | assert False, 'should not be called; the rev indexer configures it.' | ||||
def prepare(self): | def prepare(self): | ||||
super().prepare() | super().prepare() | ||||
self.objstorage = MockObjStorage() | |||||
self.idx_storage = MockIndexerStorage() | |||||
class RevisionMetadataTestIndexer(RevisionMetadataIndexer): | class RevisionMetadataTestIndexer(RevisionMetadataIndexer): | ||||
"""Specific indexer whose configuration is enough to satisfy the | """Specific indexer whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
ContentMetadataIndexer = ContentMetadataTestIndexer | ContentMetadataIndexer = ContentMetadataTestIndexer | ||||
def parse_config_file(self, *args, **kwargs): | def parse_config_file(self, *args, **kwargs): | ||||
return { | return { | ||||
**BASE_TEST_CONFIG, | **BASE_TEST_CONFIG, | ||||
'tools': { | 'tools': TRANSLATOR_TOOL, | ||||
'name': 'swh-metadata-detector', | |||||
'version': '0.0.2', | |||||
'configuration': { | |||||
'type': 'local', | |||||
'context': 'NpmMapping' | |||||
} | |||||
} | |||||
} | } | ||||
def prepare(self): | def prepare(self): | ||||
super().prepare() | super().prepare() | ||||
self.storage = MockStorage() | |||||
self.idx_storage = MockIndexerStorage() | |||||
self.objstorage = MockObjStorage() | |||||
self.tools = list(self.register_tools(self.config['tools'])) | self.tools = list(self.register_tools(self.config['tools'])) | ||||
class Metadata(unittest.TestCase): | class Metadata(unittest.TestCase): | ||||
""" | """ | ||||
Tests metadata_mock_tool tool for Metadata detection | Tests metadata_mock_tool tool for Metadata detection | ||||
""" | """ | ||||
def setUp(self): | def setUp(self): | ||||
""" | """ | ||||
shows the entire diff in the results | shows the entire diff in the results | ||||
""" | """ | ||||
self.maxDiff = None | self.maxDiff = None | ||||
self.content_tool = { | |||||
'name': 'swh-metadata-translator', | |||||
'version': '0.0.2', | |||||
'configuration': { | |||||
'type': 'local', | |||||
'context': 'NpmMapping' | |||||
} | |||||
} | |||||
MockIndexerStorage.added_data = [] | |||||
def test_crosstable(self): | def test_crosstable(self): | ||||
self.assertEqual(CROSSWALK_TABLE['NodeJS'], { | self.assertEqual(CROSSWALK_TABLE['NodeJS'], { | ||||
'repository': 'http://schema.org/codeRepository', | 'repository': 'http://schema.org/codeRepository', | ||||
'os': 'http://schema.org/operatingSystem', | 'os': 'http://schema.org/operatingSystem', | ||||
'cpu': 'http://schema.org/processorRequirements', | 'cpu': 'http://schema.org/processorRequirements', | ||||
'engines': | 'engines': | ||||
'http://schema.org/processorRequirements', | 'http://schema.org/processorRequirements', | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | class Metadata(unittest.TestCase): | ||||
def test_index_content_metadata_npm(self): | def test_index_content_metadata_npm(self): | ||||
""" | """ | ||||
testing NPM with package.json | testing NPM with package.json | ||||
- one sha1 uses a file that can't be translated to metadata and | - one sha1 uses a file that can't be translated to metadata and | ||||
should return None in the translated metadata | should return None in the translated metadata | ||||
""" | """ | ||||
# given | # given | ||||
sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5', | sha1s = [ | ||||
'd4c647f0fc257591cc9ba1722484229780d1c607', | hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'), | ||||
'02fb2c89e14f7fab46701478c83779c7beb7b069'] | hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607'), | ||||
hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069'), | |||||
] | |||||
# this metadata indexer computes only metadata for package.json | # this metadata indexer computes only metadata for package.json | ||||
# in npm context with a hard mapping | # in npm context with a hard mapping | ||||
metadata_indexer = ContentMetadataTestIndexer( | metadata_indexer = ContentMetadataTestIndexer( | ||||
tool=self.content_tool, config=BASE_TEST_CONFIG.copy()) | tool=TRANSLATOR_TOOL, config=BASE_TEST_CONFIG.copy()) | ||||
fill_obj_storage(metadata_indexer.objstorage) | |||||
fill_storage(metadata_indexer.storage) | |||||
# when | # when | ||||
metadata_indexer.run(sha1s, policy_update='ignore-dups') | metadata_indexer.run(sha1s, policy_update='ignore-dups') | ||||
results = metadata_indexer.idx_storage.added_data | results = list(metadata_indexer.idx_storage.content_metadata_get( | ||||
sha1s)) | |||||
expected_results = [('content_metadata', False, [{ | expected_results = [{ | ||||
'translated_metadata': { | 'translated_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'schema:codeRepository': | 'schema:codeRepository': | ||||
'git+https://github.com/moranegg/metadata_test', | 'git+https://github.com/moranegg/metadata_test', | ||||
'description': 'Simple package.json test for indexer', | 'description': 'Simple package.json test for indexer', | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' | 'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5') | ||||
}, { | }, { | ||||
'translated_metadata': { | 'translated_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'codemeta:issueTracker': | 'codemeta:issueTracker': | ||||
'https://github.com/npm/npm/issues', | 'https://github.com/npm/npm/issues', | ||||
'schema:author': { | 'schema:author': { | ||||
'type': 'Person', | 'type': 'Person', | ||||
Show All 10 Lines | def test_index_content_metadata_npm(self): | ||||
'keywords': [ | 'keywords': [ | ||||
'install', | 'install', | ||||
'modules', | 'modules', | ||||
'package manager', | 'package manager', | ||||
'package.json' | 'package.json' | ||||
], | ], | ||||
'schema:url': 'https://docs.npmjs.com/' | 'schema:url': 'https://docs.npmjs.com/' | ||||
}, | }, | ||||
'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' | 'id': hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607') | ||||
}, { | }, { | ||||
'translated_metadata': None, | 'translated_metadata': None, | ||||
'id': '02fb2c89e14f7fab46701478c83779c7beb7b069' | 'id': hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069') | ||||
}])] | }] | ||||
for result in results: | for result in results: | ||||
metadata = result[2] | del result['tool'] | ||||
for item in metadata: | |||||
del item['indexer_configuration_id'] | |||||
# The assertion below returns False sometimes because of nested lists | # The assertion below returns False sometimes because of nested lists | ||||
self.assertEqual(expected_results, results) | self.assertEqual(expected_results, results) | ||||
def test_detect_metadata_package_json(self): | def test_detect_metadata_package_json(self): | ||||
# given | # given | ||||
df = [{ | df = [{ | ||||
'sha1_git': b'abc', | 'sha1_git': b'abc', | ||||
▲ Show 20 Lines • Show All 170 Lines • ▼ Show 20 Lines | def test_compute_metadata_maven(self): | ||||
'schema:identifier': 'com.mycompany.app', | 'schema:identifier': 'com.mycompany.app', | ||||
'version': '1.2.3', | 'version': '1.2.3', | ||||
'schema:codeRepository': | 'schema:codeRepository': | ||||
'http://repo1.maven.org/maven2/com/mycompany/app/my-app', | 'http://repo1.maven.org/maven2/com/mycompany/app/my-app', | ||||
}) | }) | ||||
def test_revision_metadata_indexer(self): | def test_revision_metadata_indexer(self): | ||||
metadata_indexer = RevisionMetadataTestIndexer() | metadata_indexer = RevisionMetadataTestIndexer() | ||||
fill_obj_storage(metadata_indexer.objstorage) | |||||
fill_storage(metadata_indexer.storage) | |||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | |||||
{'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) | |||||
assert tool is not None | |||||
metadata_indexer.idx_storage.content_metadata_add([{ | |||||
'indexer_configuration_id': tool['id'], | |||||
'id': b'cde', | |||||
'translated_metadata': { | |||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | |||||
'type': 'SoftwareSourceCode', | |||||
'codemeta:issueTracker': | |||||
'https://github.com/librariesio/yarn-parser/issues', | |||||
'version': '1.0.0', | |||||
'name': 'yarn-parser', | |||||
'schema:author': 'Andrew Nesbitt', | |||||
'url': | |||||
'https://github.com/librariesio/yarn-parser#readme', | |||||
'processorRequirements': {'node': '7.5'}, | |||||
'license': 'AGPL-3.0', | |||||
'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | |||||
'schema:codeRepository': | |||||
'git+https://github.com/librariesio/yarn-parser.git', | |||||
'description': | |||||
'Tiny web service for parsing yarn.lock files', | |||||
} | |||||
}]) | |||||
sha1_gits = [ | sha1_gits = [ | ||||
hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | ||||
] | ] | ||||
metadata_indexer.run(sha1_gits, 'update-dups') | metadata_indexer.run(sha1_gits, 'update-dups') | ||||
results = metadata_indexer.idx_storage.added_data | results = list(metadata_indexer.idx_storage.revision_metadata_get( | ||||
sha1_gits)) | |||||
expected_results = [('revision_metadata', True, [{ | expected_results = [{ | ||||
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | ||||
'tool': TRANSLATOR_TOOL, | |||||
'translated_metadata': { | 'translated_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'url': | 'url': | ||||
'https://github.com/librariesio/yarn-parser#readme', | 'https://github.com/librariesio/yarn-parser#readme', | ||||
'schema:codeRepository': | 'schema:codeRepository': | ||||
'git+https://github.com/librariesio/yarn-parser.git', | 'git+https://github.com/librariesio/yarn-parser.git', | ||||
'schema:author': 'Andrew Nesbitt', | 'schema:author': 'Andrew Nesbitt', | ||||
'license': 'AGPL-3.0', | 'license': 'AGPL-3.0', | ||||
'version': '1.0.0', | 'version': '1.0.0', | ||||
'description': | 'description': | ||||
'Tiny web service for parsing yarn.lock files', | 'Tiny web service for parsing yarn.lock files', | ||||
'codemeta:issueTracker': | 'codemeta:issueTracker': | ||||
'https://github.com/librariesio/yarn-parser/issues', | 'https://github.com/librariesio/yarn-parser/issues', | ||||
'name': 'yarn-parser', | 'name': 'yarn-parser', | ||||
'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | ||||
}, | }, | ||||
}])] | }] | ||||
for result in results: | for result in results: | ||||
metadata = result[2] | del result['tool']['id'] | ||||
for item in metadata: | |||||
del item['indexer_configuration_id'] | |||||
# then | # then | ||||
self.assertEqual(expected_results, results) | self.assertEqual(expected_results, results) |