diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -112,7 +112,7 @@ # other types are not supported yet return [] - if data.authority.type != MetadataAuthorityType.FORGE: + if data.authority.type == MetadataAuthorityType.REGISTRY: # metadata provided by a third-party; don't trust it # (technically this could be handled below, but we check it here # to return early; sparing a translation and origin lookup) diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -28,7 +28,7 @@ from swh.model.hashutil import hash_to_bytes from swh.model.model import Content, Origin, OriginVisitStatus -from .test_metadata import REMD +from .test_metadata import GITHUB_REMD from .utils import ( DIRECTORY2, RAW_CONTENT_IDS, @@ -711,7 +711,7 @@ origin = Origin("http://example.org/repo.git") storage.origin_add([origin]) - raw_extrinsic_metadata = attr.evolve(REMD, target=origin.swhid()) + raw_extrinsic_metadata = attr.evolve(GITHUB_REMD, target=origin.swhid()) raw_extrinsic_metadata = attr.evolve( raw_extrinsic_metadata, id=raw_extrinsic_metadata.compute_hash() ) diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -59,7 +59,37 @@ "tools": TRANSLATOR_TOOL, } -REMD = RawExtrinsicMetadata( +DEPOSIT_REMD = RawExtrinsicMetadata( + target=ExtendedSWHID( + object_type=ExtendedObjectType.ORIGIN, + object_id=b"\x01" * 20, + ), + discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), + authority=MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url="https://example.org/", + ), + fetcher=MetadataFetcher( + name="example-fetcher", + version="1.0.0", + ), + format="sword-v2-atom-codemeta-v2", + metadata=""" + + My Software + + Author 1 + foo@example.org + + + Author 2 + + + """.encode(), +) + +GITHUB_REMD = RawExtrinsicMetadata( target=ExtendedSWHID( object_type=ExtendedObjectType.ORIGIN, object_id=b"\x01" * 20, @@ -199,7 +229,7 @@ metadata_indexer = ExtrinsicMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) metadata_indexer.storage = mocker.patch.object(metadata_indexer, "storage") - remd = attr.evolve(REMD, format="unknown format") + remd = attr.evolve(GITHUB_REMD, format="unknown format") results = metadata_indexer.index(remd.id, data=remd) @@ -221,7 +251,7 @@ assert tool is not None assert metadata_indexer.process_journal_objects( - {"raw_extrinsic_metadata": [REMD.to_dict()]} + {"raw_extrinsic_metadata": [GITHUB_REMD.to_dict()]} ) == {"status": "eventful", "origin_extrinsic_metadata:add": 1} assert metadata_indexer.storage.method_calls == [ @@ -241,19 +271,90 @@ "type": "https://forgefed.org/ns#Repository", "name": "test software", }, - from_remd_id=REMD.id, + from_remd_id=GITHUB_REMD.id, mappings=["github"], ) ] + def test_extrinsic_metadata_indexer_firstparty_deposit(self, mocker): + """Also nominal case, calling the mapping and storing the result""" + origin = "https://example.org/jdoe/myrepo" + + metadata_indexer = ExtrinsicMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) + metadata_indexer.catch_exceptions = False + metadata_indexer.storage = mocker.patch.object(metadata_indexer, "storage") + metadata_indexer.storage.origin_get_by_sha1.return_value = [{"url": origin}] + + tool = metadata_indexer.idx_storage.indexer_configuration_get( + {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} + ) + assert tool is not None + + assert metadata_indexer.process_journal_objects( + {"raw_extrinsic_metadata": [DEPOSIT_REMD.to_dict()]} + ) == {"status": "eventful", "origin_extrinsic_metadata:add": 1} + + assert metadata_indexer.storage.method_calls == [ + call.origin_get_by_sha1([b"\x01" * 20]) + ] + + results = list( + metadata_indexer.idx_storage.origin_extrinsic_metadata_get([origin]) + ) + assert results == [ + OriginExtrinsicMetadataRow( + id="https://example.org/jdoe/myrepo", + tool={"id": tool["id"], **TRANSLATOR_TOOL}, + metadata={ + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "author": [ + {"email": "foo@example.org", "name": "Author 1"}, + {"name": "Author 2"}, + ], + "name": "My Software", + }, + from_remd_id=DEPOSIT_REMD.id, + mappings=["sword-codemeta"], + ) + ] + + def test_extrinsic_metadata_indexer_thirdparty_deposit(self, mocker): + """Metadata-only deposit: currently ignored""" + origin = "https://not-from-example.org/jdoe/myrepo" + + metadata_indexer = ExtrinsicMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) + metadata_indexer.catch_exceptions = False + metadata_indexer.storage = mocker.patch.object(metadata_indexer, "storage") + metadata_indexer.storage.origin_get_by_sha1.return_value = [{"url": origin}] + + tool = metadata_indexer.idx_storage.indexer_configuration_get( + {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} + ) + assert tool is not None + + assert metadata_indexer.process_journal_objects( + {"raw_extrinsic_metadata": [DEPOSIT_REMD.to_dict()]} + ) == {"status": "uneventful", "origin_extrinsic_metadata:add": 0} + + assert metadata_indexer.storage.method_calls == [ + call.origin_get_by_sha1([b"\x01" * 20]) + ] + + results = list( + metadata_indexer.idx_storage.origin_extrinsic_metadata_get([origin]) + ) + assert results == [] + def test_extrinsic_metadata_indexer_nonforge_authority(self, mocker): """Early abort on non-forge authorities""" metadata_indexer = ExtrinsicMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) metadata_indexer.storage = mocker.patch.object(metadata_indexer, "storage") remd = attr.evolve( - REMD, - authority=attr.evolve(REMD.authority, type=MetadataAuthorityType.REGISTRY), + GITHUB_REMD, + authority=attr.evolve( + GITHUB_REMD.authority, type=MetadataAuthorityType.REGISTRY + ), ) results = metadata_indexer.index(remd.id, data=remd) @@ -276,7 +377,7 @@ ) assert tool is not None - results = metadata_indexer.index(REMD.id, data=REMD) + results = metadata_indexer.index(GITHUB_REMD.id, data=GITHUB_REMD) assert metadata_indexer.storage.method_calls == [ call.origin_get_by_sha1([b"\x01" * 20]) @@ -300,8 +401,8 @@ assert metadata_indexer.process_journal_objects( { "raw_extrinsic_metadata": [ - REMD.to_dict(), - {**REMD.to_dict(), "id": b"\x00" * 20}, + GITHUB_REMD.to_dict(), + {**GITHUB_REMD.to_dict(), "id": b"\x00" * 20}, ] } ) == {"status": "eventful", "origin_extrinsic_metadata:add": 1}