diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from copy import deepcopy +import hashlib import itertools import logging import time @@ -109,7 +110,14 @@ raise NotImplementedError( "ExtrinsicMetadataIndexer.index() without RawExtrinsicMetadata data" ) - if data.target.object_type != ExtendedObjectType.ORIGIN: + if data.target.object_type == ExtendedObjectType.ORIGIN: + origin_sha1 = data.target.object_id + elif data.origin is not None: + # HACK: As swh-search does (yet?) not support searching on directories + # and traversing back to origins, we index metadata on non-origins with + # an origin context as if they were on the origin itself. + origin_sha1 = hashlib.sha1(data.origin.encode()).digest() + else: # other types are not supported yet return [] @@ -136,7 +144,7 @@ # TODO: batch requests to origin_get_by_sha1() for _ in range(6): - origins = self.storage.origin_get_by_sha1([data.target.object_id]) + origins = self.storage.origin_get_by_sha1([origin_sha1]) try: (origin,) = origins if origin is not None: diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -61,8 +61,8 @@ DEPOSIT_REMD = RawExtrinsicMetadata( target=ExtendedSWHID( - object_type=ExtendedObjectType.ORIGIN, - object_id=b"\x01" * 20, + object_type=ExtendedObjectType.DIRECTORY, + object_id=b"\x02" * 20, ), discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), authority=MetadataAuthority( @@ -87,6 +87,7 @@ """.encode(), + origin="https://example.org/jdoe/myrepo", ) GITHUB_REMD = RawExtrinsicMetadata( @@ -295,7 +296,9 @@ ) == {"status": "eventful", "origin_extrinsic_metadata:add": 1} assert metadata_indexer.storage.method_calls == [ - call.origin_get_by_sha1([b"\x01" * 20]) + call.origin_get_by_sha1( + [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"] + ) ] results = list( @@ -337,7 +340,9 @@ ) == {"status": "uneventful", "origin_extrinsic_metadata:add": 0} assert metadata_indexer.storage.method_calls == [ - call.origin_get_by_sha1([b"\x01" * 20]) + call.origin_get_by_sha1( + [b"\xb1\x0c\\\xd2w\x1b\xdd\xac\x07\xdb\xdf>\x93O1\xd0\xc9L\x0c\xcf"] + ) ] results = list(