Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from copy import deepcopy | from copy import deepcopy | ||||
from swh.core.utils import grouper | from swh.core.utils import grouper | ||||
from swh.indexer.codemeta import merge_documents | |||||
from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer | from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer | ||||
from swh.indexer.origin_head import OriginHeadIndexer | from swh.indexer.origin_head import OriginHeadIndexer | ||||
from swh.indexer.metadata_dictionary import MAPPINGS | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_detector import extract_minimal_metadata_dict | |||||
from swh.indexer.storage import INDEXER_CFG_KEY | from swh.indexer.storage import INDEXER_CFG_KEY | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
REVISION_GET_BATCH_SIZE = 10 | REVISION_GET_BATCH_SIZE = 10 | ||||
ORIGIN_GET_BATCH_SIZE = 10 | ORIGIN_GET_BATCH_SIZE = 10 | ||||
▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines | def translate_revision_intrinsic_metadata( | ||||
for result in c_metadata_indexer.results: | for result in c_metadata_indexer.results: | ||||
local_metadata = result['metadata'] | local_metadata = result['metadata'] | ||||
metadata.append(local_metadata) | metadata.append(local_metadata) | ||||
except Exception: | except Exception: | ||||
self.log.exception( | self.log.exception( | ||||
"Exception while indexing metadata on contents") | "Exception while indexing metadata on contents") | ||||
# transform metadata into min set with swh-metadata-detector | metadata = merge_documents(metadata) | ||||
min_metadata = extract_minimal_metadata_dict(metadata) | return (used_mappings, metadata) | ||||
return (used_mappings, min_metadata) | |||||
class OriginMetadataIndexer(OriginIndexer): | class OriginMetadataIndexer(OriginIndexer): | ||||
ADDITIONAL_CONFIG = RevisionMetadataIndexer.ADDITIONAL_CONFIG | ADDITIONAL_CONFIG = RevisionMetadataIndexer.ADDITIONAL_CONFIG | ||||
USE_TOOLS = False | USE_TOOLS = False | ||||
def __init__(self, config=None, **kwargs): | def __init__(self, config=None, **kwargs): | ||||
▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines |