Page MenuHomeSoftware Heritage

D8388.diff
No OneTemporary

D8388.diff

diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from copy import deepcopy
+import itertools
from typing import (
Any,
Callable,
@@ -73,19 +74,19 @@
def process_journal_objects(self, objects: ObjectsDict) -> Dict:
summary: Dict[str, Any] = {"status": "uneventful"}
try:
- results = []
+ results = {}
for item in objects.get("raw_extrinsic_metadata", []):
remd = RawExtrinsicMetadata.from_dict(item)
sentry_sdk.set_tag("swh-indexer-remd-swhid", remd.swhid())
- results.extend(self.index(remd.id, data=remd))
+ results[remd.target] = self.index(remd.id, data=remd)
except Exception:
if not self.catch_exceptions:
raise
summary["status"] = "failed"
return summary
- summary_persist = self.persist_index_computations(results)
- self.results = results
+ self.results = list(itertools.chain.from_iterable(results.values()))
+ summary_persist = self.persist_index_computations(self.results)
if summary_persist:
for value in summary_persist.values():
if value > 0:
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -281,3 +281,32 @@
call.origin_get_by_sha1([b"\x01" * 20])
]
assert results == []
+
+ def test_extrinsic_metadata_indexer_duplicate_origin(self, mocker):
+ """Nominal case, calling the mapping and storing the result"""
+ origin = "https://example.org/jdoe/myrepo"
+
+ metadata_indexer = ExtrinsicMetadataIndexer(config=DIRECTORY_METADATA_CONFIG)
+ metadata_indexer.catch_exceptions = False
+ metadata_indexer.storage = mocker.patch.object(metadata_indexer, "storage")
+ metadata_indexer.storage.origin_get_by_sha1.return_value = [{"url": origin}]
+
+ tool = metadata_indexer.idx_storage.indexer_configuration_get(
+ {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()}
+ )
+ assert tool is not None
+
+ assert metadata_indexer.process_journal_objects(
+ {
+ "raw_extrinsic_metadata": [
+ REMD.to_dict(),
+ {**REMD.to_dict(), "id": b"\x00" * 20},
+ ]
+ }
+ ) == {"status": "eventful", "origin_extrinsic_metadata:add": 1}
+
+ results = list(
+ metadata_indexer.idx_storage.origin_extrinsic_metadata_get([origin])
+ )
+ assert len(results) == 1, results
+ assert results[0].from_remd_id == b"\x00" * 20

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 20, 9:05 PM (4 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217584

Event Timeline