Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9347970
D1206.id3803.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D1206.id3803.diff
View Options
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -293,14 +293,37 @@
# Deduplicate revisions
rev_metadata = []
orig_metadata = []
+ revs_to_delete = set()
+ origs_to_delete = set()
for (orig_item, rev_item) in results:
- if rev_item not in rev_metadata:
- rev_metadata.append(rev_item)
- if orig_item not in orig_metadata:
- orig_metadata.append(orig_item)
-
- self.idx_storage.revision_metadata_add(
- rev_metadata, conflict_update=conflict_update)
-
- self.idx_storage.origin_intrinsic_metadata_add(
- orig_metadata, conflict_update=conflict_update)
+ assert rev_item['mappings'] == orig_item['mappings']
+ if rev_item['mappings']:
+ if rev_item not in rev_metadata:
+ rev_metadata.append(rev_item)
+ if orig_item not in orig_metadata:
+ orig_metadata.append(orig_item)
+ else:
+ revs_to_delete.add((
+ rev_item['id'],
+ rev_item['indexer_configuration_id'],
+ ))
+ origs_to_delete.add((
+ orig_item['origin_id'],
+ orig_item['indexer_configuration_id'],
+ ))
+
+ if rev_metadata:
+ self.idx_storage.revision_metadata_add(
+ rev_metadata, conflict_update=conflict_update)
+ if orig_metadata:
+ self.idx_storage.origin_intrinsic_metadata_add(
+ orig_metadata, conflict_update=conflict_update)
+
+ # revs_to_delete should always be empty unless we changed a mapping
+ # to detect less files.
+ # However, origs_to_delete may be empty whenever an upstream deletes
+ # a metadata file.
+ for (orig_id, tool_id) in origs_to_delete:
+ self.idx_storage.origin_intrinsic_metadata_delete(orig_id, tool_id)
+ for (rev_id, tool_id) in revs_to_delete:
+ self.idx_storage.revision_metadata_delete(rev_id, tool_id)
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from unittest.mock import patch
+
from swh.model.hashutil import hash_to_bytes
from swh.indexer.metadata import OriginMetadataIndexer
@@ -155,3 +157,54 @@
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
origin1['id'], origin2['id']]))
assert len(results) == 2
+
+
+def test_origin_metadata_indexer_no_metadata(
+ idx_storage, storage, obj_storage):
+
+ indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
+ with patch('swh.indexer.metadata_dictionary.NpmMapping.filename',
+ b'foo.json'):
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+
+ origin = storage.origin_get({
+ 'type': 'git',
+ 'url': 'https://github.com/librariesio/yarn-parser'})
+ rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
+
+ results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ assert results == []
+
+ results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
+ origin['id']]))
+ assert results == []
+
+
+def test_origin_metadata_indexer_delete_metadata(
+ idx_storage, storage, obj_storage):
+
+ indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+
+ origin = storage.origin_get({
+ 'type': 'git',
+ 'url': 'https://github.com/librariesio/yarn-parser'})
+ rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
+
+ results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ assert results != []
+
+ results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
+ origin['id']]))
+ assert results != []
+
+ with patch('swh.indexer.metadata_dictionary.NpmMapping.filename',
+ b'foo.json'):
+ indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+
+ results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ assert results == []
+
+ results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
+ origin['id']]))
+ assert results == []
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 3 2025, 6:05 PM (4 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226239
Attached To
D1206: Prevent origin metadata indexer from writing empty records
Event Timeline
Log In to Comment