Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import click | import click | ||||
import logging | import logging | ||||
from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer | from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer | ||||
from swh.indexer.metadata_dictionary import compute_metadata | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_detector import extract_minimal_metadata_dict | from swh.indexer.metadata_detector import extract_minimal_metadata_dict | ||||
from swh.indexer.storage import INDEXER_CFG_KEY | from swh.indexer.storage import INDEXER_CFG_KEY | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
class ContentMetadataIndexer(ContentIndexer): | class ContentMetadataIndexer(ContentIndexer): | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def index(self, id, data): | ||||
""" | """ | ||||
result = { | result = { | ||||
'id': id, | 'id': id, | ||||
'indexer_configuration_id': self.tool['id'], | 'indexer_configuration_id': self.tool['id'], | ||||
'translated_metadata': None | 'translated_metadata': None | ||||
} | } | ||||
try: | try: | ||||
context = self.tool['tool_configuration']['context'] | mapping_name = self.tool['tool_configuration']['context'] | ||||
result['translated_metadata'] = compute_metadata(context, data) | result['translated_metadata'] = MAPPINGS[mapping_name] \ | ||||
.translate(data) | |||||
# a twisted way to keep result with indexer object for get_results | # a twisted way to keep result with indexer object for get_results | ||||
self.results.append(result) | self.results.append(result) | ||||
except Exception: | except Exception: | ||||
self.log.exception( | self.log.exception( | ||||
"Problem during tool retrieval of metadata translation") | "Problem during tool retrieval of metadata translation") | ||||
return result | return result | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations(self, results, policy_update): | ||||
Show All 36 Lines | class RevisionMetadataIndexer(RevisionIndexer): | ||||
- store the results for revision | - store the results for revision | ||||
""" | """ | ||||
CONFIG_BASE_FILENAME = 'indexer/metadata' | CONFIG_BASE_FILENAME = 'indexer/metadata' | ||||
ADDITIONAL_CONFIG = { | ADDITIONAL_CONFIG = { | ||||
'tools': ('dict', { | 'tools': ('dict', { | ||||
'name': 'swh-metadata-detector', | 'name': 'swh-metadata-detector', | ||||
'version': '0.0.1', | 'version': '0.0.2', | ||||
'configuration': { | 'configuration': { | ||||
'type': 'local', | 'type': 'local', | ||||
'context': ['npm', 'codemeta'] | 'context': ['NpmMapping', 'CodemetaMapping'] | ||||
}, | }, | ||||
}), | }), | ||||
} | } | ||||
ContentMetadataIndexer = ContentMetadataIndexer | ContentMetadataIndexer = ContentMetadataIndexer | ||||
def prepare(self): | def prepare(self): | ||||
super().prepare() | super().prepare() | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | def translate_revision_metadata(self, detected_files): | ||||
Returns: | Returns: | ||||
dict: dict with translated metadata according to the CodeMeta | dict: dict with translated metadata according to the CodeMeta | ||||
vocabulary | vocabulary | ||||
""" | """ | ||||
translated_metadata = [] | translated_metadata = [] | ||||
tool = { | tool = { | ||||
'name': 'swh-metadata-translator', | 'name': 'swh-metadata-translator', | ||||
'version': '0.0.1', | 'version': '0.0.2', | ||||
'configuration': { | 'configuration': { | ||||
'type': 'local', | 'type': 'local', | ||||
'context': None | 'context': None | ||||
}, | }, | ||||
} | } | ||||
# TODO: iterate on each context, on each file | # TODO: iterate on each context, on each file | ||||
# -> get raw_contents | # -> get raw_contents | ||||
# -> translate each content | # -> translate each content | ||||
▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines |