diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -27,7 +27,12 @@ from swh.deposit.models import Deposit from swh.deposit.utils import compute_metadata_context from swh.model import hashutil -from swh.model.identifiers import ExtendedSWHID, QualifiedSWHID, ValidationError +from swh.model.identifiers import ( + ExtendedObjectType, + ExtendedSWHID, + QualifiedSWHID, + ValidationError, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, @@ -681,7 +686,43 @@ target_swhid = extended_swhid_from_qualified(swhid_reference) - # store that metadata to the metadata storage + # Check the target object already exists in the archive + if target_swhid.object_type in (ExtendedObjectType.CONTENT,): + if list( + self.storage.content_missing_per_sha1_git([target_swhid.object_id]) + ): + raise DepositError( + BAD_REQUEST, + f"Cannot load metadata on {target_swhid}, this content " + f"object does not exist in the archive (yet?).", + ) + elif target_swhid.object_type in ( + ExtendedObjectType.DIRECTORY, + ExtendedObjectType.REVISION, + ExtendedObjectType.RELEASE, + ExtendedObjectType.SNAPSHOT, + ): + target_type_name = target_swhid.object_type.name.lower() + method = getattr(self.storage, target_type_name + "_missing") + if list(method([target_swhid.object_id])): + raise DepositError( + BAD_REQUEST, + f"Cannot load metadata on {target_swhid}, this {target_type_name} " + f"object does not exist in the archive (yet?).", + ) + elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,): + if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])): + raise DepositError( + BAD_REQUEST, + f"Cannot load metadata on origin, it is not (yet?) known to the " + f"archive.", + ) + else: + raise DepositError( + BAD_REQUEST, + "Cannot deposit metadata on {target_swhid.object_type.value}.", + ) + metadata_object = RawExtrinsicMetadata( target=target_swhid, # core swhid or origin discovery_date=deposit_request.date, diff --git a/swh/deposit/config.py b/swh/deposit/config.py --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -104,6 +104,7 @@ "version": __version__, "configuration": {"sword_version": "2"}, } + self.storage: StorageInterface = get_storage(**self.config["storage"]) self.storage_metadata: StorageInterface = get_storage( **self.config["storage_metadata"] ) diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py --- a/swh/deposit/tests/api/test_collection_post_atom.py +++ b/swh/deposit/tests/api/test_collection_post_atom.py @@ -7,6 +7,7 @@ from io import BytesIO import uuid +import warnings import attr from django.urls import reverse_lazy as reverse @@ -23,8 +24,16 @@ from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import post_atom from swh.deposit.utils import compute_metadata_context, extended_swhid_from_qualified -from swh.model.identifiers import QualifiedSWHID +from swh.model.hypothesis_strategies import ( + directories, + present_contents, + releases, + revisions, + snapshots, +) +from swh.model.identifiers import ObjectType, QualifiedSWHID from swh.model.model import ( + Content, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, @@ -34,6 +43,30 @@ from swh.storage.interface import PagedResult +def _insert_object(swh_storage, swhid): + """Insert an object with the given swhid in the archive""" + if swhid.object_type == ObjectType.CONTENT: + with warnings.catch_warnings(): + # hypothesis doesn't like us using .example(), but we know what we're doing + warnings.simplefilter("ignore") + obj = present_contents().example() + swh_storage.content_add([attr.evolve(obj, sha1_git=swhid.object_id)]) + else: + object_type_name = swhid.object_type.name.lower() + strategy = { + "directory": directories, + "revision": revisions, + "release": releases, + "snapshot": snapshots, + }[object_type_name] + method = getattr(swh_storage, object_type_name + "_add") + with warnings.catch_warnings(): + # hypothesis doesn't like us using .example(), but we know what we're doing + warnings.simplefilter("ignore") + obj = strategy().example() + method([attr.evolve(obj, id=swhid.object_id)]) + + def test_post_deposit_atom_201_even_with_decimal( authenticated_client, deposit_collection, atom_dataset ): @@ -478,13 +511,15 @@ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) deposit_client = authenticated_client.deposit_client + _insert_object(swh_storage, swhid_reference) + response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) - assert response.status_code == status.HTTP_201_CREATED + assert response.status_code == status.HTTP_201_CREATED, response.content.decode() response_content = parse_xml(BytesIO(response.content)) # Ensure the deposit is finalized @@ -558,13 +593,14 @@ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) origin_swhid = Origin(url).swhid() deposit_client = authenticated_client.deposit_client + swh_storage.origin_add([Origin(url)]) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) - assert response.status_code == status.HTTP_201_CREATED + assert response.status_code == status.HTTP_201_CREATED, response.content.decode() response_content = parse_xml(BytesIO(response.content)) # Ensure the deposit is finalized deposit_id = int(response_content["swh:deposit_id"]) @@ -622,3 +658,66 @@ next_page_token=None, ) assert deposit.complete_date == discovery_date + + +@pytest.mark.parametrize( + "swhid", + [ + "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49", + "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49", + "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49", + "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", + "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", + "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa + "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + ], +) +def test_deposit_metadata_unknown_swhid( + swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage, +): + """Posting a swhid reference is rejected if the referenced object is unknown + + """ + swhid_reference = QualifiedSWHID.from_string(swhid) + swhid_target = extended_swhid_from_qualified(swhid_reference) + + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + deposit_client = authenticated_client.deposit_client + + response = post_atom( + authenticated_client, + reverse(COL_IRI, args=[deposit_collection.name]), + data=xml_data, + ) + + assert ( + response.status_code == status.HTTP_400_BAD_REQUEST + ), response.content.decode() + response_content = parse_xml(BytesIO(response.content)) + assert "object does not exist" in response_content["sword:error"]["atom:summary"] + + +def test_deposit_metadata_unknown_origin( + authenticated_client, deposit_collection, atom_dataset, swh_storage, +): + """Posting a swhid reference is stored on raw extrinsic metadata storage + + """ + url = "https://gitlab.org/user/repo" + xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) + origin_swhid = Origin(url).swhid() + deposit_client = authenticated_client.deposit_client + response = post_atom( + authenticated_client, + reverse(COL_IRI, args=[deposit_collection.name]), + data=xml_data, + ) + + assert ( + response.status_code == status.HTTP_400_BAD_REQUEST + ), response.content.decode() + response_content = parse_xml(BytesIO(response.content)) + assert "known to the archive" in response_content["sword:error"]["atom:summary"] diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -88,6 +88,7 @@ "extraction_dir": "/tmp/swh-deposit/test/extraction-dir", "checks": False, "scheduler": {"cls": "local", **swh_scheduler_config,}, + "storage": swh_storage_backend_config, "storage_metadata": swh_storage_backend_config, }