Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/origin_head.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import click | import click | ||||
import logging | import logging | ||||
from celery import chain | |||||
from swh.indexer.indexer import OriginIndexer | from swh.indexer.indexer import OriginIndexer | ||||
from swh.indexer.tasks import SWHOriginMetadataTask, SWHRevisionMetadataTask | |||||
class OriginHeadIndexer(OriginIndexer): | class OriginHeadIndexer(OriginIndexer): | ||||
"""Origin-level indexer. | """Origin-level indexer. | ||||
This indexer is in charge of looking up the revision that acts as the | This indexer is in charge of looking up the revision that acts as the | ||||
"head" of an origin. | "head" of an origin. | ||||
In git, this is usually the commit pointed to by the 'master' branch.""" | In git, this is usually the commit pointed to by the 'master' branch.""" | ||||
ADDITIONAL_CONFIG = { | ADDITIONAL_CONFIG = { | ||||
'tools': ('dict', { | 'tools': ('dict', { | ||||
'name': 'origin-metadata', | 'name': 'origin-metadata', | ||||
'version': '0.0.1', | 'version': '0.0.1', | ||||
'configuration': {}, | 'configuration': {}, | ||||
}), | }), | ||||
} | } | ||||
CONFIG_BASE_FILENAME = 'indexer/origin_head' | |||||
revision_metadata_task = SWHRevisionMetadataTask() | |||||
origin_intrinsic_metadata_task = SWHOriginMetadataTask() | |||||
def filter(self, ids): | def filter(self, ids): | ||||
yield from ids | yield from ids | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations(self, results, policy_update): | ||||
"""Do nothing. The indexer's results are not persistant, they | """Do nothing. The indexer's results are not persistant, they | ||||
should only be piped to another indexer via the orchestrator.""" | should only be piped to another indexer via the orchestrator.""" | ||||
pass | pass | ||||
def next_step(self, results): | |||||
"""Once the head is found, call the RevisionMetadataIndexer | |||||
on these revisions, then call the OriginMetadataIndexer with | |||||
both the origin_id and the revision metadata, so it can copy the | |||||
revision metadata to the origin's metadata. | |||||
Args: | |||||
results (Iterable[dict]): Iterable of return values from `index`. | |||||
""" | |||||
if self.revision_metadata_task is None or \ | |||||
self.origin_intrinsic_metadata_task is None: | |||||
return | |||||
assert self.revision_metadata_task is not None | |||||
assert self.origin_intrinsic_metadata_task is not None | |||||
return chain( | |||||
self.revision_metadata_task.s( | |||||
ids=[res['revision_id'] for res in results], | |||||
policy_update='update-dups'), | |||||
self.origin_intrinsic_metadata_task.s( | |||||
origin_head_pairs=results, | |||||
policy_update='update-dups'), | |||||
)() | |||||
# Dispatch | # Dispatch | ||||
def index(self, origin): | def index(self, origin): | ||||
origin_id = origin['id'] | origin_id = origin['id'] | ||||
latest_snapshot = self.storage.snapshot_get_latest(origin_id) | latest_snapshot = self.storage.snapshot_get_latest(origin_id) | ||||
method = getattr(self, '_try_get_%s_head' % origin['type'], None) | method = getattr(self, '_try_get_%s_head' % origin['type'], None) | ||||
if method is None: | if method is None: | ||||
method = self._try_get_head_generic | method = self._try_get_head_generic | ||||
▲ Show 20 Lines • Show All 119 Lines • Show Last 20 Lines |