diff --git a/docs/specs/swh.xsd b/docs/specs/swh.xsd
--- a/docs/specs/swh.xsd
+++ b/docs/specs/swh.xsd
@@ -14,6 +14,13 @@
+
+
+
+
+
+
+
diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py
--- a/swh/deposit/api/collection.py
+++ b/swh/deposit/api/collection.py
@@ -111,6 +111,7 @@
deposit_parent: Optional[Deposit] = None
if external_id:
+ # TODO: delete this when clients stopped relying on the slug
try:
# find a deposit parent (same external id, status load to success)
deposit_parent = (
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -147,6 +147,15 @@
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id)
+def check_client_origin(client: DepositClient, origin_url: str):
+ provider_url = client.provider_url.rstrip("/") + "/"
+ if not origin_url.startswith(provider_url):
+ raise DepositError(
+ FORBIDDEN,
+ f"Cannot create origin {origin_url}, it must start with " f"{provider_url}",
+ )
+
+
class AuthenticatedAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
@@ -737,18 +746,34 @@
)
create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin")
+ add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin")
+
+ if create_origin and add_to_origin:
+ raise DepositError(
+ BAD_REQUEST,
+ " and are mutually exclusive, "
+ "as they respectively create a new origin and add to an existing "
+ "origin.",
+ )
+
if create_origin:
origin_url = create_origin["swh:origin"]["@url"]
- if origin_url is not None:
- provider_url = deposit.client.provider_url.rstrip("/") + "/"
- if not origin_url.startswith(provider_url):
- raise DepositError(
- FORBIDDEN,
- f"Cannot create origin {origin_url}, it must start with "
- f"{provider_url}",
- )
+ check_client_origin(deposit.client, origin_url)
deposit.origin_url = origin_url
+ if add_to_origin:
+ origin_url = add_to_origin["swh:origin"]["@url"]
+ check_client_origin(deposit.client, origin_url)
+ deposit.parent = (
+ Deposit.objects.filter(
+ client=deposit.client,
+ origin_url=origin_url,
+ status=DEPOSIT_STATUS_LOAD_SUCCESS,
+ )
+ .order_by("-id")[0:1]
+ .get()
+ )
+
if "atom:external_identifier" in metadata:
# Deprecated tag.
# When clients stopped using it, this should raise an error
@@ -761,6 +786,11 @@
" from now on.",
)
+ if deposit.parent:
+ raise DepositError(
+ BAD_REQUEST, " is deprecated.",
+ )
+
if headers.slug and metadata["atom:external_identifier"] != headers.slug:
raise DepositError(
BAD_REQUEST,
@@ -782,8 +812,8 @@
raise DepositError(
BAD_REQUEST,
" is for metadata-only deposits and "
- " / Slug are for code deposits, "
- "only one may be used on a given deposit.",
+ " / / Slug are for "
+ "code deposits, only one may be used on a given deposit.",
)
self._deposit_put(
diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py
--- a/swh/deposit/tests/api/test_collection.py
+++ b/swh/deposit/tests/api/test_collection.py
@@ -207,6 +207,43 @@
assert new_deposit.parent == deposit
+def test_add_deposit_with_add_to_origin(
+ authenticated_client,
+ deposit_collection,
+ completed_deposit,
+ atom_dataset,
+ deposit_user,
+):
+ """Posting deposit with creates a new deposit with parent
+
+ """
+ # given multiple deposit already loaded
+ deposit = completed_deposit
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+ origin_url = deposit_user.provider_url + deposit.external_id
+
+ # adding a new deposit with the same external id as a completed deposit
+ # creates the parenting chain
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content["swh:deposit_id"]
+
+ assert deposit_id != deposit.id
+
+ new_deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.collection == new_deposit.collection
+ assert deposit.origin_url == origin_url
+
+ assert new_deposit != deposit
+ assert new_deposit.parent == deposit
+
+
def test_add_deposit_external_id_conflict_no_parent(
authenticated_client,
another_authenticated_client,
@@ -265,6 +302,8 @@
of a different client creates a parent relationship with the deposit
of the right client instead of the last matching deposit
+ This test does not have an equivalent for origin url conflicts, as these
+ can not happen (assuming clients do not have provider_url overlaps)
"""
# given multiple deposit already loaded
deposit = completed_deposit
@@ -301,3 +340,60 @@
assert new_deposit != deposit
assert new_deposit.parent == deposit
+
+
+def test_add_deposit_add_to_origin_conflict(
+ authenticated_client,
+ another_authenticated_client,
+ deposit_collection,
+ deposit_another_collection,
+ atom_dataset,
+ sample_archive,
+ deposit_user,
+ deposit_another_user,
+):
+ """Posting a deposit with an referencing an origin
+ owned by a different client raises an error
+
+ """
+ external_id = "foobar"
+ origin_url = deposit_another_user.provider_url + external_id
+
+ # create a deposit for that other user, with the same slug
+ create_deposit(
+ another_authenticated_client,
+ deposit_another_collection.name,
+ sample_archive,
+ external_id,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ )
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
+
+
+def test_add_deposit_add_to_wrong_origin(
+ authenticated_client, deposit_collection, atom_dataset, sample_archive,
+):
+ """Posting a deposit with an referencing an origin
+ not starting with the provider_url raises an error
+
+ """
+ origin_url = "http://example.org/foo"
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -96,7 +96,7 @@
assert b"Malformed xml metadata" in response.content
-def test_post_deposit_atom_403_wrong_origin_url_prefix(
+def test_post_deposit_atom_403_create_wrong_origin_url_prefix(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Creating an origin for a prefix not owned by the client is forbidden
@@ -118,6 +118,28 @@
assert expected_msg in response.content.decode()
+def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix(
+ authenticated_client, deposit_collection, atom_dataset, deposit_user
+):
+ """Creating an origin for a prefix not owned by the client is forbidden
+
+ """
+ origin_url = "http://example.org/foo"
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ HTTP_IN_PROGRESS="true",
+ )
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ expected_msg = (
+ f"Cannot create origin {origin_url}, "
+ f"it must start with {deposit_user.provider_url}"
+ )
+ assert expected_msg in response.content.decode()
+
+
def test_post_deposit_atom_use_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
@@ -178,10 +200,11 @@
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
-def test_post_deposit_atom_with_external_identifier(
+def test_post_deposit_atom_with_mismatched_slug_and_external_identifier(
authenticated_client, deposit_collection, atom_dataset
):
- """Posting an atom entry without a slug header should return a 400
+ """Posting an atom entry with mismatched slug header and external_identifier
+ should return a 400
"""
external_id = "foobar"
diff --git a/swh/deposit/tests/api/test_collection_post_metadata.py b/swh/deposit/tests/api/test_collection_post_metadata.py
--- a/swh/deposit/tests/api/test_collection_post_metadata.py
+++ b/swh/deposit/tests/api/test_collection_post_metadata.py
@@ -206,7 +206,7 @@
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
- xml_data = atom_dataset["entry-data-with-origin"].format(url=url)
+ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
deposit_client = authenticated_client.deposit_client
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
rename from swh/deposit/tests/data/atom/entry-data-with-origin.xml
rename to swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
@@ -6,8 +6,8 @@
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
dudess
-
-
-
+
+
+