Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import unittest | import unittest | ||||
import logging | |||||
from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS | from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_detector import extract_minimal_metadata_dict | from swh.indexer.metadata_detector import extract_minimal_metadata_dict | ||||
from swh.indexer.metadata import ContentMetadataIndexer | from swh.indexer.metadata import ContentMetadataIndexer | ||||
from swh.indexer.metadata import RevisionMetadataIndexer | from swh.indexer.metadata import RevisionMetadataIndexer | ||||
from swh.indexer.tests.test_utils import MockObjStorage, MockStorage | from swh.indexer.tests.test_utils import MockObjStorage, MockStorage | ||||
from swh.indexer.tests.test_utils import MockIndexerStorage | from swh.indexer.tests.test_utils import MockIndexerStorage | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from .test_utils import BASE_TEST_CONFIG | |||||
class ContentMetadataTestIndexer(ContentMetadataIndexer): | class ContentMetadataTestIndexer(ContentMetadataIndexer): | ||||
"""Specific Metadata whose configuration is enough to satisfy the | """Specific Metadata whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
def parse_config_file(self, *args, **kwargs): | |||||
assert False, 'should not be called; the rev indexer configures it.' | |||||
def prepare(self): | def prepare(self): | ||||
self.idx_storage = MockIndexerStorage() | super().prepare() | ||||
self.log = logging.getLogger('swh.indexer') | |||||
self.objstorage = MockObjStorage() | self.objstorage = MockObjStorage() | ||||
self.tools = self.register_tools(self.config['tools']) | self.idx_storage = MockIndexerStorage() | ||||
self.tool = self.tools[0] | |||||
self.results = [] | |||||
class RevisionMetadataTestIndexer(RevisionMetadataIndexer): | class RevisionMetadataTestIndexer(RevisionMetadataIndexer): | ||||
"""Specific indexer whose configuration is enough to satisfy the | """Specific indexer whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
ContentMetadataIndexer = ContentMetadataTestIndexer | ContentMetadataIndexer = ContentMetadataTestIndexer | ||||
def prepare(self): | def parse_config_file(self, *args, **kwargs): | ||||
self.config = { | return { | ||||
'storage': {}, | **BASE_TEST_CONFIG, | ||||
'objstorage': {}, | |||||
'indexer_storage': {}, | |||||
'tools': { | 'tools': { | ||||
'name': 'swh-metadata-detector', | 'name': 'swh-metadata-detector', | ||||
'version': '0.0.2', | 'version': '0.0.2', | ||||
'configuration': { | 'configuration': { | ||||
'type': 'local', | 'type': 'local', | ||||
'context': 'NpmMapping' | 'context': 'NpmMapping' | ||||
} | } | ||||
} | } | ||||
} | } | ||||
def prepare(self): | |||||
super().prepare() | |||||
self.storage = MockStorage() | self.storage = MockStorage() | ||||
self.idx_storage = MockIndexerStorage() | self.idx_storage = MockIndexerStorage() | ||||
self.log = logging.getLogger('swh.indexer') | |||||
self.objstorage = MockObjStorage() | self.objstorage = MockObjStorage() | ||||
self.tools = self.register_tools(self.config['tools']) | self.tools = list(self.register_tools(self.config['tools'])) | ||||
self.tool = self.tools[0] | |||||
class Metadata(unittest.TestCase): | class Metadata(unittest.TestCase): | ||||
""" | """ | ||||
Tests metadata_mock_tool tool for Metadata detection | Tests metadata_mock_tool tool for Metadata detection | ||||
""" | """ | ||||
def setUp(self): | def setUp(self): | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | def test_index_content_metadata_npm(self): | ||||
""" | """ | ||||
# given | # given | ||||
sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5', | sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5', | ||||
'd4c647f0fc257591cc9ba1722484229780d1c607', | 'd4c647f0fc257591cc9ba1722484229780d1c607', | ||||
'02fb2c89e14f7fab46701478c83779c7beb7b069'] | '02fb2c89e14f7fab46701478c83779c7beb7b069'] | ||||
# this metadata indexer computes only metadata for package.json | # this metadata indexer computes only metadata for package.json | ||||
# in npm context with a hard mapping | # in npm context with a hard mapping | ||||
metadata_indexer = ContentMetadataTestIndexer( | metadata_indexer = ContentMetadataTestIndexer( | ||||
tool=self.content_tool, config={}) | tool=self.content_tool, config=BASE_TEST_CONFIG.copy()) | ||||
# when | # when | ||||
metadata_indexer.run(sha1s, policy_update='ignore-dups') | metadata_indexer.run(sha1s, policy_update='ignore-dups') | ||||
results = metadata_indexer.idx_storage.added_data | results = metadata_indexer.idx_storage.added_data | ||||
expected_results = [('content_metadata', False, [{ | expected_results = [('content_metadata', False, [{ | ||||
'indexer_configuration_id': 30, | |||||
'translated_metadata': { | 'translated_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'schema:codeRepository': | 'schema:codeRepository': | ||||
'git+https://github.com/moranegg/metadata_test', | 'git+https://github.com/moranegg/metadata_test', | ||||
'description': 'Simple package.json test for indexer', | 'description': 'Simple package.json test for indexer', | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' | 'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' | ||||
}, { | }, { | ||||
'indexer_configuration_id': 30, | |||||
'translated_metadata': { | 'translated_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
'codemeta:issueTracker': | 'codemeta:issueTracker': | ||||
'https://github.com/npm/npm/issues', | 'https://github.com/npm/npm/issues', | ||||
'schema:author': { | 'schema:author': { | ||||
'type': 'Person', | 'type': 'Person', | ||||
'name': 'Isaac Z. Schlueter', | 'name': 'Isaac Z. Schlueter', | ||||
Show All 11 Lines | def test_index_content_metadata_npm(self): | ||||
'modules', | 'modules', | ||||
'package manager', | 'package manager', | ||||
'package.json' | 'package.json' | ||||
], | ], | ||||
'schema:url': 'https://docs.npmjs.com/' | 'schema:url': 'https://docs.npmjs.com/' | ||||
}, | }, | ||||
'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' | 'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' | ||||
}, { | }, { | ||||
'indexer_configuration_id': 30, | |||||
'translated_metadata': None, | 'translated_metadata': None, | ||||
'id': '02fb2c89e14f7fab46701478c83779c7beb7b069' | 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069' | ||||
}])] | }])] | ||||
for result in results: | |||||
metadata = result[2] | |||||
for item in metadata: | |||||
del item['indexer_configuration_id'] | |||||
# The assertion below returns False sometimes because of nested lists | # The assertion below returns False sometimes because of nested lists | ||||
self.assertEqual(expected_results, results) | self.assertEqual(expected_results, results) | ||||
def test_detect_metadata_package_json(self): | def test_detect_metadata_package_json(self): | ||||
# given | # given | ||||
df = [{ | df = [{ | ||||
'sha1_git': b'abc', | 'sha1_git': b'abc', | ||||
'name': b'index.js', | 'name': b'index.js', | ||||
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines | def test_revision_metadata_indexer(self): | ||||
'version': '1.0.0', | 'version': '1.0.0', | ||||
'description': | 'description': | ||||
'Tiny web service for parsing yarn.lock files', | 'Tiny web service for parsing yarn.lock files', | ||||
'codemeta:issueTracker': | 'codemeta:issueTracker': | ||||
'https://github.com/librariesio/yarn-parser/issues', | 'https://github.com/librariesio/yarn-parser/issues', | ||||
'name': 'yarn-parser', | 'name': 'yarn-parser', | ||||
'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | ||||
}, | }, | ||||
'indexer_configuration_id': 7 | |||||
}])] | }])] | ||||
for result in results: | |||||
metadata = result[2] | |||||
for item in metadata: | |||||
del item['indexer_configuration_id'] | |||||
# then | # then | ||||
self.assertEqual(expected_results, results) | self.assertEqual(expected_results, results) |