Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_origin_metadata.py
- This file was added.
# Copyright (C) 2018 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import logging | |||||
import unittest | |||||
from celery import task | |||||
from swh.indexer.metadata import OriginMetadataIndexer | |||||
from swh.indexer.tests.test_utils import MockObjStorage, MockStorage | |||||
from swh.indexer.tests.test_utils import MockIndexerStorage | |||||
from swh.indexer.tests.test_origin_head import TestOriginHeadIndexer | |||||
from swh.indexer.tests.test_metadata import TestRevisionMetadataIndexer | |||||
from . import start_worker_thread | |||||
class TestOriginMetadataIndexer(OriginMetadataIndexer): | |||||
def prepare(self): | |||||
self.config = { | |||||
'storage': { | |||||
'cls': 'remote', | |||||
'args': { | |||||
'url': 'http://localhost:9999', | |||||
} | |||||
}, | |||||
'tools': { | |||||
'name': 'origin-metadata', | |||||
moranegg: I'm not sure what tool should be used for the origin_intrinsic _metadata
the tool used for the… | |||||
'version': '0.0.1', | |||||
'configuration': {} | |||||
} | |||||
} | |||||
self.storage = MockStorage() | |||||
self.idx_storage = MockIndexerStorage() | |||||
self.log = logging.getLogger('swh.indexer') | |||||
self.objstorage = MockObjStorage() | |||||
self.destination_task = None | |||||
self.tools = self.register_tools(self.config['tools']) | |||||
self.tool = self.tools[0] | |||||
self.results = [] | |||||
@task | |||||
def test_revision_metadata_task(*args, **kwargs): | |||||
indexer = TestRevisionMetadataIndexer() | |||||
indexer.run(*args, **kwargs) | |||||
return indexer.results | |||||
@task | |||||
def test_origin_intrinsic_metadata_task(*args, **kwargs): | |||||
indexer = TestOriginMetadataIndexer() | |||||
indexer.run(*args, **kwargs) | |||||
return indexer.results | |||||
class TestOriginHeadIndexer(TestOriginHeadIndexer): | |||||
revision_metadata_task = test_revision_metadata_task | |||||
origin_intrinsic_metadata_task = test_origin_intrinsic_metadata_task | |||||
class TestOriginMetadata(unittest.TestCase): | |||||
def setUp(self): | |||||
self.maxDiff = None | |||||
MockIndexerStorage.added_data = [] | |||||
def test_pipeline(self): | |||||
indexer = TestOriginHeadIndexer() | |||||
with start_worker_thread(): | |||||
promise = indexer.run( | |||||
["git+https://github.com/moranegg/metadata_test"], | |||||
policy_update='update-dups', | |||||
parse_ids=True) | |||||
promise.get() | |||||
metadata = { | |||||
Not Done Inline ActionsI think it's important to specify what type of object this metadata describes: If this result is the expected result of an origin_intrinsic_metadata, I think there is missing information. moranegg: I think it's important to specify what type of object this metadata describes… | |||||
'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', | |||||
'translated_metadata': { | |||||
'identifier': None, | |||||
'maintainer': None, | |||||
'url': [ | |||||
'https://github.com/librariesio/yarn-parser#readme' | |||||
], | |||||
'codeRepository': [{ | |||||
'type': 'git', | |||||
'url': 'git+https://github.com/librariesio/yarn-parser.git' | |||||
}], | |||||
'author': ['Andrew Nesbitt'], | |||||
'license': ['AGPL-3.0'], | |||||
'version': ['1.0.0'], | |||||
'description': [ | |||||
'Tiny web service for parsing yarn.lock files' | |||||
], | |||||
'relatedLink': None, | |||||
'developmentStatus': None, | |||||
'operatingSystem': None, | |||||
'issueTracker': [{ | |||||
'url': 'https://github.com/librariesio/yarn-parser/issues' | |||||
}], | |||||
'softwareRequirements': [{ | |||||
'express': '^4.14.0', | |||||
'yarn': '^0.21.0', | |||||
'body-parser': '^1.15.2' | |||||
}], | |||||
'name': ['yarn-parser'], | |||||
'keywords': [['yarn', 'parse', 'lock', 'dependencies']], | |||||
'email': None | |||||
}, | |||||
'indexer_configuration_id': 7 | |||||
} | |||||
expected_results = [ | |||||
('origin_intrinsic_metadata', True, [metadata]), | |||||
Not Done Inline Actionshere both entries have the same result but shouldn't moranegg: here both entries have the same result but shouldn't | |||||
('revision_metadata', True, [metadata])] | |||||
results = list(indexer.idx_storage.added_data) | |||||
self.assertCountEqual(expected_results, results) |
I'm not sure what tool should be used for the origin_intrinsic _metadata
the tool used for the revision is a file detector and a metadata translator to CodeMeta
in the future we might want to use a different vocabulary, a different minimal set and maybe add files to the file detection procedure
so a revision can be indexed with different tool and have different results
That's why we said it is important to have the exact revision_metadata entry identifierd in the origin_intrinsic_metadata table.
I'm not sure if this should be with the tuple <revision_id, revision_tool_id> or create a revision_metadata object_id because at the moment there is no identifier on a revision_metadata entry.