Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | def index(self, rev): | ||||
dict: dictionary representing a revision_metadata, with keys: | dict: dictionary representing a revision_metadata, with keys: | ||||
- id (str): rev's identifier (sha1_git) | - id (str): rev's identifier (sha1_git) | ||||
- indexer_configuration_id (bytes): tool used | - indexer_configuration_id (bytes): tool used | ||||
- translated_metadata: dict of retrieved metadata | - translated_metadata: dict of retrieved metadata | ||||
""" | """ | ||||
result = { | result = { | ||||
'id': rev['id'].decode(), | 'id': rev['id'], | ||||
'indexer_configuration_id': self.tool['id'], | 'indexer_configuration_id': self.tool['id'], | ||||
'translated_metadata': None | 'translated_metadata': None | ||||
} | } | ||||
try: | try: | ||||
root_dir = rev['directory'] | root_dir = rev['directory'] | ||||
dir_ls = self.storage.directory_ls(root_dir, recursive=False) | dir_ls = self.storage.directory_ls(root_dir, recursive=False) | ||||
files = [entry for entry in dir_ls if entry['type'] == 'file'] | files = [entry for entry in dir_ls if entry['type'] == 'file'] | ||||
▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | def filter(self, ids): | ||||
return ids | return ids | ||||
def run(self, origin_head, policy_update): | def run(self, origin_head, policy_update): | ||||
"""Expected to be called with the result of RevisionMetadataIndexer | """Expected to be called with the result of RevisionMetadataIndexer | ||||
as first argument; ie. not a list of ids as other indexers would. | as first argument; ie. not a list of ids as other indexers would. | ||||
Args: | Args: | ||||
* `origin_head` (dict): {str(origin_id): rev_id.encode()} | * `origin_head` (dict): {str(origin_id): rev_id} | ||||
keys `origin_id` and `revision_id`, which is the result | keys `origin_id` and `revision_id`, which is the result | ||||
of OriginHeadIndexer. | of OriginHeadIndexer. | ||||
* `policy_update`: `'ignore-dups'` or `'update-dups'` | * `policy_update`: `'ignore-dups'` or `'update-dups'` | ||||
""" | """ | ||||
origin_head_map = {int(origin_id): rev_id | origin_head_map = {int(origin_id): hashutil.hash_to_bytes(rev_id) | ||||
for (origin_id, rev_id) in origin_head.items()} | for (origin_id, rev_id) in origin_head.items()} | ||||
# Fix up the argument order. revisions_metadata has to be the | # Fix up the argument order. revisions_metadata has to be the | ||||
# first argument because of celery.chain; the next line calls | # first argument because of celery.chain; the next line calls | ||||
# run() with the usual order, ie. origin ids first. | # run() with the usual order, ie. origin ids first. | ||||
return super().run(ids=list(origin_head_map), | return super().run(ids=list(origin_head_map), | ||||
policy_update=policy_update, | policy_update=policy_update, | ||||
parse_ids=False, | parse_ids=False, | ||||
Show All 37 Lines |