Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 297 Lines • ▼ Show 20 Lines | def index_list(self, origins): | ||||
'mappings': rev_metadata['mappings'], | 'mappings': rev_metadata['mappings'], | ||||
'indexer_configuration_id': | 'indexer_configuration_id': | ||||
rev_metadata['indexer_configuration_id'], | rev_metadata['indexer_configuration_id'], | ||||
} | } | ||||
results.append((orig_metadata, rev_metadata)) | results.append((orig_metadata, rev_metadata)) | ||||
return results | return results | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations(self, results, policy_update): | ||||
conflict_update = (policy_update == 'update-dups') | |||||
# Deduplicate revisions | |||||
rev_metadata = [] | |||||
orig_metadata = [] | |||||
for (orig_item, rev_item) in results: | |||||
ardumont: why not build the 2 lists (rev_metadata, orig_metadata) in one go since you iterate over it… | |||||
Done Inline ActionsGood catch vlorentz: Good catch | |||||
if rev_item not in rev_metadata: | |||||
rev_metadata.append(rev_item) | |||||
if rev_item not in orig_metadata: | |||||
orig_metadata.append(orig_item) | |||||
self.idx_storage.revision_metadata_add( | self.idx_storage.revision_metadata_add( | ||||
[rev_item for (orig_item, rev_item) in results], | rev_metadata, conflict_update=conflict_update) | ||||
conflict_update=(policy_update == 'update-dups')) | |||||
self.idx_storage.origin_intrinsic_metadata_add( | self.idx_storage.origin_intrinsic_metadata_add( | ||||
[orig_item for (orig_item, rev_item) in results], | orig_metadata, conflict_update=conflict_update) | ||||
conflict_update=(policy_update == 'update-dups')) | |||||
@click.command() | @click.command() | ||||
@click.option('--revs', '-i', | @click.option('--revs', '-i', | ||||
help='Default sha1_git to lookup', multiple=True) | help='Default sha1_git to lookup', multiple=True) | ||||
def main(revs): | def main(revs): | ||||
_git_sha1s = list(map(hashutil.hash_to_bytes, revs)) | _git_sha1s = list(map(hashutil.hash_to_bytes, revs)) | ||||
rev_metadata_indexer = RevisionMetadataIndexer() | rev_metadata_indexer = RevisionMetadataIndexer() | ||||
rev_metadata_indexer.run(_git_sha1s, 'update-dups') | rev_metadata_indexer.run(_git_sha1s, 'update-dups') | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
main() | main() |
why not build the 2 lists (rev_metadata, orig_metadata) in one go since you iterate over it here already?