Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 291 Lines • ▼ Show 20 Lines | def index(self, origin): | ||||
'metadata': rev_metadata['translated_metadata'], | 'metadata': rev_metadata['translated_metadata'], | ||||
'mappings': rev_metadata['mappings'], | 'mappings': rev_metadata['mappings'], | ||||
'indexer_configuration_id': | 'indexer_configuration_id': | ||||
rev_metadata['indexer_configuration_id'], | rev_metadata['indexer_configuration_id'], | ||||
} | } | ||||
return (orig_metadata, rev_metadata) | return (orig_metadata, rev_metadata) | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations(self, results, policy_update): | ||||
conflict_update = (policy_update == 'update-dups') | |||||
# Deduplicate revisions | |||||
rev_metadata = [] | |||||
for (orig_item, rev_item) in results: | |||||
if rev_item not in rev_metadata: | |||||
rev_metadata.append(rev_item) | |||||
ardumont: why not build the 2 lists (rev_metadata, orig_metadata) in one go since you iterate over it… | |||||
vlorentzAuthorUnsubmitted Done Inline ActionsGood catch vlorentz: Good catch | |||||
self.idx_storage.revision_metadata_add( | self.idx_storage.revision_metadata_add( | ||||
[rev_item for (orig_item, rev_item) in results], | rev_metadata, conflict_update=conflict_update) | ||||
conflict_update=(policy_update == 'update-dups')) | |||||
self.idx_storage.origin_intrinsic_metadata_add( | self.idx_storage.origin_intrinsic_metadata_add( | ||||
[orig_item for (orig_item, rev_item) in results], | [orig_item for (orig_item, rev_item) in results], | ||||
conflict_update=(policy_update == 'update-dups')) | conflict_update=conflict_update) | ||||
@click.command() | @click.command() | ||||
@click.option('--revs', '-i', | @click.option('--revs', '-i', | ||||
help='Default sha1_git to lookup', multiple=True) | help='Default sha1_git to lookup', multiple=True) | ||||
def main(revs): | def main(revs): | ||||
_git_sha1s = list(map(hashutil.hash_to_bytes, revs)) | _git_sha1s = list(map(hashutil.hash_to_bytes, revs)) | ||||
rev_metadata_indexer = RevisionMetadataIndexer() | rev_metadata_indexer = RevisionMetadataIndexer() | ||||
rev_metadata_indexer.run(_git_sha1s, 'update-dups') | rev_metadata_indexer.run(_git_sha1s, 'update-dups') | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
main() | main() |
why not build the 2 lists (rev_metadata, orig_metadata) in one go since you iterate over it here already?