Page MenuHomeSoftware Heritage

D8863.id.diff
No OneTemporary

D8863.id.diff

diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from copy import deepcopy
+import hashlib
import itertools
import logging
import time
@@ -109,7 +110,14 @@
raise NotImplementedError(
"ExtrinsicMetadataIndexer.index() without RawExtrinsicMetadata data"
)
- if data.target.object_type != ExtendedObjectType.ORIGIN:
+ if data.target.object_type == ExtendedObjectType.ORIGIN:
+ origin_sha1 = data.target.object_id
+ elif data.origin is not None:
+ # HACK: As swh-search does (yet?) not support searching on directories
+ # and traversing back to origins, we index metadata on non-origins with
+ # an origin context as if they were on the origin itself.
+ origin_sha1 = hashlib.sha1(data.origin.encode()).digest()
+ else:
# other types are not supported yet
return []
@@ -136,7 +144,7 @@
# TODO: batch requests to origin_get_by_sha1()
for _ in range(6):
- origins = self.storage.origin_get_by_sha1([data.target.object_id])
+ origins = self.storage.origin_get_by_sha1([origin_sha1])
try:
(origin,) = origins
if origin is not None:
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -61,8 +61,8 @@
DEPOSIT_REMD = RawExtrinsicMetadata(
target=ExtendedSWHID(
- object_type=ExtendedObjectType.ORIGIN,
- object_id=b"\x01" * 20,
+ object_type=ExtendedObjectType.DIRECTORY,
+ object_id=b"\x02" * 20,
),
discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
authority=MetadataAuthority(
@@ -87,6 +87,7 @@
</author>
</atom:entry>
""".encode(),
+ origin="https://example.org/jdoe/myrepo",
)
GITHUB_REMD = RawExtrinsicMetadata(
@@ -295,7 +296,9 @@
) == {"status": "eventful", "origin_extrinsic_metadata:add": 1}
assert metadata_indexer.storage.method_calls == [
- call.origin_get_by_sha1([b"\x01" * 20])
+ call.origin_get_by_sha1(
+ [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"]
+ )
]
results = list(
@@ -337,7 +340,9 @@
) == {"status": "uneventful", "origin_extrinsic_metadata:add": 0}
assert metadata_indexer.storage.method_calls == [
- call.origin_get_by_sha1([b"\x01" * 20])
+ call.origin_get_by_sha1(
+ [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"]
+ )
]
results = list(

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 6:25 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216875

Event Timeline