diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -311,7 +311,7 @@ for (orig_item, rev_item) in results: if rev_item not in rev_metadata: rev_metadata.append(rev_item) - if rev_item not in orig_metadata: + if orig_item not in orig_metadata: orig_metadata.append(orig_item) self.idx_storage.revision_metadata_add( diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -614,10 +614,12 @@ """ + items = [] for item in metadata: item = item.copy() item['id'] = item.pop('origin_id') - self._origin_intrinsic_metadata.add([item], conflict_update) + items.append(item) + self._origin_intrinsic_metadata.add(items, conflict_update) def origin_intrinsic_metadata_search_fulltext( self, conjunction, limit=100): diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -1053,6 +1053,48 @@ # metadata did change as the v2 was used to overwrite v1 self.assertEqual(actual_metadata, expected_metadata_v2) + def test_origin_intrinsic_metadata_add__duplicate_twice(self): + # given + tool_id = self.tools['swh-metadata-detector']['id'] + + metadata = { + 'developmentStatus': None, + 'version': None, + 'operatingSystem': None, + 'description': None, + 'keywords': None, + 'issueTracker': None, + 'name': None, + 'author': None, + 'relatedLink': None, + 'url': None, + 'license': None, + 'maintainer': None, + 'email': None, + 'softwareRequirements': None, + 'identifier': None, + } + metadata_rev = { + 'id': self.revision_id_2, + 'translated_metadata': metadata, + 'mappings': ['mapping1'], + 'indexer_configuration_id': tool_id, + } + metadata_origin = { + 'origin_id': self.origin_id_1, + 'metadata': metadata, + 'indexer_configuration_id': tool_id, + 'mappings': ['mapping1'], + 'from_revision': self.revision_id_2, + } + + # when + self.storage.revision_metadata_add([metadata_rev]) + + with self.assertRaises(ValueError): + self.storage.origin_intrinsic_metadata_add([ + metadata_origin, metadata_origin]) + def test_origin_intrinsic_metadata_search_fulltext(self): # given tool_id = self.tools['swh-metadata-detector']['id']