Page MenuHomeSoftware Heritage

D1081.diff
No OneTemporary

D1081.diff

diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -303,13 +303,22 @@
return results
def persist_index_computations(self, results, policy_update):
+ conflict_update = (policy_update == 'update-dups')
+
+ # Deduplicate revisions
+ rev_metadata = []
+ orig_metadata = []
+ for (orig_item, rev_item) in results:
+ if rev_item not in rev_metadata:
+ rev_metadata.append(rev_item)
+ if rev_item not in orig_metadata:
+ orig_metadata.append(orig_item)
+
self.idx_storage.revision_metadata_add(
- [rev_item for (orig_item, rev_item) in results],
- conflict_update=(policy_update == 'update-dups'))
+ rev_metadata, conflict_update=conflict_update)
self.idx_storage.origin_intrinsic_metadata_add(
- [orig_item for (orig_item, rev_item) in results],
- conflict_update=(policy_update == 'update-dups'))
+ orig_metadata, conflict_update=conflict_update)
@click.command()
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -75,3 +75,25 @@
for result in results:
del result['tool']
assert results == [origin_metadata]
+
+
+def test_origin_metadata_indexer_duplicates(
+ idx_storage, storage, obj_storage, origin_metadata_indexer):
+ indexer = OriginMetadataIndexer()
+ indexer.storage = storage
+ indexer.idx_storage = idx_storage
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"]*2)
+
+ origin = storage.origin_get({
+ 'type': 'git',
+ 'url': 'https://github.com/librariesio/yarn-parser'})
+ rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
+
+ results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ assert len(results) == 1
+
+ results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
+ origin['id']]))
+ assert len(results) == 1

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 9:01 AM (20 h, 12 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226324

Event Timeline