Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066348
D8863.id31947.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D8863.id31947.diff
View Options
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from copy import deepcopy
+import hashlib
import itertools
import logging
import time
@@ -109,7 +110,14 @@
raise NotImplementedError(
"ExtrinsicMetadataIndexer.index() without RawExtrinsicMetadata data"
)
- if data.target.object_type != ExtendedObjectType.ORIGIN:
+ if data.target.object_type == ExtendedObjectType.ORIGIN:
+ origin_sha1 = data.target.object_id
+ elif data.origin is not None:
+ # HACK: As swh-search does (yet?) not support searching on directories
+ # and traversing back to origins, we index metadata on non-origins with
+ # an origin context as if they were on the origin itself.
+ origin_sha1 = hashlib.sha1(data.origin.encode()).digest()
+ else:
# other types are not supported yet
return []
@@ -136,7 +144,7 @@
# TODO: batch requests to origin_get_by_sha1()
for _ in range(6):
- origins = self.storage.origin_get_by_sha1([data.target.object_id])
+ origins = self.storage.origin_get_by_sha1([origin_sha1])
try:
(origin,) = origins
if origin is not None:
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -61,8 +61,8 @@
DEPOSIT_REMD = RawExtrinsicMetadata(
target=ExtendedSWHID(
- object_type=ExtendedObjectType.ORIGIN,
- object_id=b"\x01" * 20,
+ object_type=ExtendedObjectType.DIRECTORY,
+ object_id=b"\x02" * 20,
),
discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
authority=MetadataAuthority(
@@ -87,6 +87,7 @@
</author>
</atom:entry>
""".encode(),
+ origin="https://example.org/jdoe/myrepo",
)
GITHUB_REMD = RawExtrinsicMetadata(
@@ -295,7 +296,9 @@
) == {"status": "eventful", "origin_extrinsic_metadata:add": 1}
assert metadata_indexer.storage.method_calls == [
- call.origin_get_by_sha1([b"\x01" * 20])
+ call.origin_get_by_sha1(
+ [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"]
+ )
]
results = list(
@@ -337,7 +340,9 @@
) == {"status": "uneventful", "origin_extrinsic_metadata:add": 0}
assert metadata_indexer.storage.method_calls == [
- call.origin_get_by_sha1([b"\x01" * 20])
+ call.origin_get_by_sha1(
+ [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"]
+ )
]
results = list(
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 6:25 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216875
Attached To
D8863: ExtrinsicMetadataIndexer: Add support for metadata with origin in context
Event Timeline
Log In to Comment