diff --git a/docs/specs/swh.xsd b/docs/specs/swh.xsd --- a/docs/specs/swh.xsd +++ b/docs/specs/swh.xsd @@ -14,6 +14,13 @@ + + + + + + + diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -111,6 +111,7 @@ deposit_parent: Optional[Deposit] = None if external_id: + # TODO: delete this when clients stopped relying on the slug try: # find a deposit parent (same external id, status load to success) deposit_parent = ( diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -147,6 +147,15 @@ return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) +def check_client_origin(client: DepositClient, origin_url: str): + provider_url = client.provider_url.rstrip("/") + "/" + if not origin_url.startswith(provider_url): + raise DepositError( + FORBIDDEN, + f"Cannot create origin {origin_url}, it must start with " f"{provider_url}", + ) + + class AuthenticatedAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check @@ -737,18 +746,34 @@ ) create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin") + add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin") + + if create_origin and add_to_origin: + raise DepositError( + BAD_REQUEST, + " and are mutually exclusive, " + "as they respectively create a new origin and add to an existing " + "origin.", + ) + if create_origin: origin_url = create_origin["swh:origin"]["@url"] - if origin_url is not None: - provider_url = deposit.client.provider_url.rstrip("/") + "/" - if not origin_url.startswith(provider_url): - raise DepositError( - FORBIDDEN, - f"Cannot create origin {origin_url}, it must start with " - f"{provider_url}", - ) + check_client_origin(deposit.client, origin_url) deposit.origin_url = origin_url + if add_to_origin: + origin_url = add_to_origin["swh:origin"]["@url"] + check_client_origin(deposit.client, origin_url) + deposit.parent = ( + Deposit.objects.filter( + client=deposit.client, + origin_url=origin_url, + status=DEPOSIT_STATUS_LOAD_SUCCESS, + ) + .order_by("-id")[0:1] + .get() + ) + if "atom:external_identifier" in metadata: # Deprecated tag. # When clients stopped using it, this should raise an error @@ -761,6 +786,11 @@ " from now on.", ) + if deposit.parent: + raise DepositError( + BAD_REQUEST, " is deprecated.", + ) + if headers.slug and metadata["atom:external_identifier"] != headers.slug: raise DepositError( BAD_REQUEST, @@ -782,8 +812,8 @@ raise DepositError( BAD_REQUEST, " is for metadata-only deposits and " - " / Slug are for code deposits, " - "only one may be used on a given deposit.", + " / / Slug are for " + "code deposits, only one may be used on a given deposit.", ) self._deposit_put( diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py --- a/swh/deposit/tests/api/test_collection.py +++ b/swh/deposit/tests/api/test_collection.py @@ -207,6 +207,43 @@ assert new_deposit.parent == deposit +def test_add_deposit_with_add_to_origin( + authenticated_client, + deposit_collection, + completed_deposit, + atom_dataset, + deposit_user, +): + """Posting deposit with creates a new deposit with parent + + """ + # given multiple deposit already loaded + deposit = completed_deposit + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + origin_url = deposit_user.provider_url + deposit.external_id + + # adding a new deposit with the same external id as a completed deposit + # creates the parenting chain + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, + ) + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + assert deposit_id != deposit.id + + new_deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == new_deposit.collection + assert deposit.origin_url == origin_url + + assert new_deposit != deposit + assert new_deposit.parent == deposit + + def test_add_deposit_external_id_conflict_no_parent( authenticated_client, another_authenticated_client, @@ -265,6 +302,8 @@ of a different client creates a parent relationship with the deposit of the right client instead of the last matching deposit + This test does not have an equivalent for origin url conflicts, as these + can not happen (assuming clients do not have provider_url overlaps) """ # given multiple deposit already loaded deposit = completed_deposit @@ -301,3 +340,60 @@ assert new_deposit != deposit assert new_deposit.parent == deposit + + +def test_add_deposit_add_to_origin_conflict( + authenticated_client, + another_authenticated_client, + deposit_collection, + deposit_another_collection, + atom_dataset, + sample_archive, + deposit_user, + deposit_another_user, +): + """Posting a deposit with an referencing an origin + owned by a different client raises an error + + """ + external_id = "foobar" + origin_url = deposit_another_user.provider_url + external_id + + # create a deposit for that other user, with the same slug + create_deposit( + another_authenticated_client, + deposit_another_collection.name, + sample_archive, + external_id, + DEPOSIT_STATUS_LOAD_SUCCESS, + ) + + # adding a new deposit with the same external id as a completed deposit + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % origin_url, + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + assert b"must start with" in response.content + + +def test_add_deposit_add_to_wrong_origin( + authenticated_client, deposit_collection, atom_dataset, sample_archive, +): + """Posting a deposit with an referencing an origin + not starting with the provider_url raises an error + + """ + origin_url = "http://example.org/foo" + + # adding a new deposit with the same external id as a completed deposit + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % origin_url, + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + assert b"must start with" in response.content diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py --- a/swh/deposit/tests/api/test_collection_post_atom.py +++ b/swh/deposit/tests/api/test_collection_post_atom.py @@ -96,7 +96,51 @@ assert b"Malformed xml metadata" in response.content -def test_post_deposit_atom_403_wrong_origin_url_prefix( +def test_post_deposit_atom_400_both_create_origin_and_add_to_origin( + authenticated_client, deposit_collection, atom_dataset +): + """Posting a badly formatted atom should return a 400 response + + """ + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-both-create-origin-and-add-to-origin"], + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert ( + b"<swh:create_origin> and <swh:add_to_origin> " + b"are mutually exclusive" + ) in response.content + + +def test_add_deposit_with_add_to_origin_and_external_identifier( + authenticated_client, + deposit_collection, + completed_deposit, + atom_dataset, + deposit_user, +): + """Posting deposit with creates a new deposit with parent + + """ + # given multiple deposit already loaded + origin_url = deposit_user.provider_url + completed_deposit.external_id + + # adding a new deposit with the same external id as a completed deposit + # creates the parenting chain + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"] + % origin_url, + ) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"<external_identifier> is deprecated." in response.content + + +def test_post_deposit_atom_403_create_wrong_origin_url_prefix( authenticated_client, deposit_collection, atom_dataset, deposit_user ): """Creating an origin for a prefix not owned by the client is forbidden @@ -118,6 +162,28 @@ assert expected_msg in response.content.decode() +def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix( + authenticated_client, deposit_collection, atom_dataset, deposit_user +): + """Creating an origin for a prefix not owned by the client is forbidden + + """ + origin_url = "http://example.org/foo" + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, + HTTP_IN_PROGRESS="true", + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + expected_msg = ( + f"Cannot create origin {origin_url}, " + f"it must start with {deposit_user.provider_url}" + ) + assert expected_msg in response.content.decode() + + def test_post_deposit_atom_use_slug_header( authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker ): @@ -178,10 +244,11 @@ assert deposit.status == DEPOSIT_STATUS_DEPOSITED -def test_post_deposit_atom_with_external_identifier( +def test_post_deposit_atom_with_mismatched_slug_and_external_identifier( authenticated_client, deposit_collection, atom_dataset ): - """Posting an atom entry without a slug header should return a 400 + """Posting an atom entry with mismatched slug header and external_identifier + should return a 400 """ external_id = "foobar" diff --git a/swh/deposit/tests/api/test_collection_post_metadata.py b/swh/deposit/tests/api/test_collection_post_metadata.py --- a/swh/deposit/tests/api/test_collection_post_metadata.py +++ b/swh/deposit/tests/api/test_collection_post_metadata.py @@ -206,7 +206,7 @@ """Posting a swhid reference is stored on raw extrinsic metadata storage """ - xml_data = atom_dataset["entry-data-with-origin"].format(url=url) + xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) deposit_client = authenticated_client.deposit_client response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml rename from swh/deposit/tests/data/atom/entry-data-with-origin.xml rename to swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml --- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml @@ -6,8 +6,8 @@ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a dudess - - - + + + diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml copy from swh/deposit/tests/data/atom/entry-data-with-origin.xml copy to swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml --- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml @@ -5,9 +5,10 @@ Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a dudess + foo - - - + + + diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml rename from swh/deposit/tests/data/atom/entry-data-with-origin.xml rename to swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml --- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml @@ -6,8 +6,11 @@ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a dudess - - - + + + + + +