Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/origin_head.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import click | import click | ||||
import logging | import logging | ||||
from swh.scheduler import get_scheduler | from swh.scheduler import get_scheduler | ||||
from swh.scheduler.utils import create_task_dict | from swh.scheduler.utils import create_task_dict | ||||
from swh.indexer.indexer import OriginIndexer | from swh.indexer.indexer import OriginIndexer | ||||
from swh.model.hashutil import hash_to_hex | |||||
class OriginHeadIndexer(OriginIndexer): | class OriginHeadIndexer(OriginIndexer): | ||||
"""Origin-level indexer. | """Origin-level indexer. | ||||
This indexer is in charge of looking up the revision that acts as the | This indexer is in charge of looking up the revision that acts as the | ||||
"head" of an origin. | "head" of an origin. | ||||
In git, this is usually the commit pointed to by the 'master' branch.""" | In git, this is usually the commit pointed to by the 'master' branch.""" | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | def next_step(self, results, task): | ||||
# Second task to run after this one: copy the revision's metadata | # Second task to run after this one: copy the revision's metadata | ||||
# to the origin | # to the origin | ||||
sub_task = create_task_dict( | sub_task = create_task_dict( | ||||
origin_intrinsic_metadata_task, | origin_intrinsic_metadata_task, | ||||
'oneshot', | 'oneshot', | ||||
origin_head={ | origin_head={ | ||||
str(result['origin_id']): | str(result['origin_id']): | ||||
result['revision_id'].decode() | hash_to_hex(result['revision_id']) | ||||
for result in results}, | for result in results}, | ||||
policy_update='update-dups', | policy_update='update-dups', | ||||
) | ) | ||||
del sub_task['next_run'] # Not json-serializable | del sub_task['next_run'] # Not json-serializable | ||||
# First task to run after this one: index the metadata of the | # First task to run after this one: index the metadata of the | ||||
# revision | # revision | ||||
task = create_task_dict( | task = create_task_dict( | ||||
revision_metadata_task, | revision_metadata_task, | ||||
'oneshot', | 'oneshot', | ||||
ids=[res['revision_id'].decode() for res in results], | ids=[hash_to_hex(res['revision_id']) for res in results], | ||||
policy_update='update-dups', | policy_update='update-dups', | ||||
next_step=sub_task, | next_step=sub_task, | ||||
) | ) | ||||
if getattr(self, 'scheduler', None): | if getattr(self, 'scheduler', None): | ||||
scheduler = self.scheduler | scheduler = self.scheduler | ||||
else: | else: | ||||
scheduler = get_scheduler(**self.config['scheduler']) | scheduler = get_scheduler(**self.config['scheduler']) | ||||
scheduler.create_tasks([task]) | scheduler.create_tasks([task]) | ||||
▲ Show 20 Lines • Show All 128 Lines • Show Last 20 Lines |