Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines | def persist_index_computations(self, results, policy_update): | ||||
conflict_update = (policy_update == 'update-dups') | conflict_update = (policy_update == 'update-dups') | ||||
# Deduplicate revisions | # Deduplicate revisions | ||||
rev_metadata = [] | rev_metadata = [] | ||||
orig_metadata = [] | orig_metadata = [] | ||||
revs_to_delete = [] | revs_to_delete = [] | ||||
origs_to_delete = [] | origs_to_delete = [] | ||||
for (orig_item, rev_item) in results: | for (orig_item, rev_item) in results: | ||||
assert rev_item['mappings'] == orig_item['mappings'] | assert rev_item['metadata'] == orig_item['metadata'] | ||||
if rev_item['mappings']: | if not rev_item['metadata'] or \ | ||||
olasd: This can be rewritten as `if ref_item['metadata'].keys() <= {'@context'}` | |||||
Done Inline ActionsTIL vlorentz: TIL | |||||
Done Inline Actions(though I still have to check for Noneness) vlorentz: (though I still have to check for `None`ness) | |||||
# Only store translated metadata if we found a metadata file. | rev_item['metadata'].keys() <= {'@context'}: | ||||
# Otherwise it's just an empty dict with a "@context" key. | # If we didn't find any metadata, don't store a DB record | ||||
if rev_item not in rev_metadata: | # (and delete existing ones, if any) | ||||
rev_metadata.append(rev_item) | |||||
if orig_item not in orig_metadata: | |||||
orig_metadata.append(orig_item) | |||||
else: | |||||
if rev_item not in revs_to_delete: | if rev_item not in revs_to_delete: | ||||
revs_to_delete.append(rev_item) | revs_to_delete.append(rev_item) | ||||
if orig_item not in origs_to_delete: | if orig_item not in origs_to_delete: | ||||
origs_to_delete.append(orig_item) | origs_to_delete.append(orig_item) | ||||
else: | |||||
if rev_item not in rev_metadata: | |||||
rev_metadata.append(rev_item) | |||||
if orig_item not in orig_metadata: | |||||
orig_metadata.append(orig_item) | |||||
Not Done Inline ActionsCouldn't we turn these into sets rather than doing O(n^2) equality lookups? (I'm guessing n is tiny and it doesn't really matter, but still :p) olasd: Couldn't we turn these into sets rather than doing `O(n^2)` equality lookups? (I'm guessing n… | |||||
Done Inline ActionsNo, they contain unhashable data, and n is too small to do something clever. vlorentz: No, they contain unhashable data, and `n` is too small to do something clever. | |||||
if rev_metadata: | if rev_metadata: | ||||
self.idx_storage.revision_intrinsic_metadata_add( | self.idx_storage.revision_intrinsic_metadata_add( | ||||
rev_metadata, conflict_update=conflict_update) | rev_metadata, conflict_update=conflict_update) | ||||
if orig_metadata: | if orig_metadata: | ||||
self.idx_storage.origin_intrinsic_metadata_add( | self.idx_storage.origin_intrinsic_metadata_add( | ||||
orig_metadata, conflict_update=conflict_update) | orig_metadata, conflict_update=conflict_update) | ||||
# revs_to_delete should always be empty unless we changed a mapping | # revs_to_delete should always be empty unless we changed a mapping | ||||
# to detect less files. | # to detect less files or less content. | ||||
# However, origs_to_delete may be empty whenever an upstream deletes | # However, origs_to_delete may be empty whenever an upstream deletes | ||||
# a metadata file. | # a metadata file. | ||||
if origs_to_delete: | if origs_to_delete: | ||||
self.idx_storage.origin_intrinsic_metadata_delete(origs_to_delete) | self.idx_storage.origin_intrinsic_metadata_delete(origs_to_delete) | ||||
if revs_to_delete: | if revs_to_delete: | ||||
self.idx_storage.revision_intrinsic_metadata_delete(revs_to_delete) | self.idx_storage.revision_intrinsic_metadata_delete(revs_to_delete) |
This can be rewritten as if ref_item['metadata'].keys() <= {'@context'}