diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,9 +27,14 @@
from swh.deposit.api.checks import check_metadata
from swh.deposit.api.converters import convert_status_detail
from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication
-from swh.deposit.models import Deposit, DEPOSIT_METADATA_ONLY
+from swh.deposit.models import DEPOSIT_METADATA_ONLY, Deposit
from swh.deposit.parsers import parse_xml
-from swh.deposit.utils import NAMESPACES, compute_metadata_context
+from swh.deposit.utils import (
+ NAMESPACES,
+ check_url_match_provider,
+ compute_metadata_context,
+ parse_swh_metadata_provenance,
+)
from swh.model import hashutil
from swh.model.model import (
MetadataAuthority,
@@ -159,15 +164,6 @@
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id)
-def check_client_origin(client: DepositClient, origin_url: str):
- provider_url = client.provider_url.rstrip("/") + "/"
- if not origin_url.startswith(provider_url):
- raise DepositError(
- FORBIDDEN,
- f"Cannot create origin {origin_url}, it must start with {provider_url}",
- )
-
-
class APIBase(APIConfig, APIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
@@ -859,6 +855,14 @@
)
if swhid_ref is not None:
+ # It's suggested to user to provide it
+ metadata_provenance_url = parse_swh_metadata_provenance(metadata_tree)
+ if metadata_provenance_url:
+ # If the provenance is provided, ensure it matches client provider url
+ check_url_match_provider(
+ metadata_provenance_url, deposit.client.provider_url
+ )
+
deposit.save() # We need a deposit id
target_swhid, depo, depo_request = self._store_metadata_deposit(
deposit, swhid_ref, metadata_tree, raw_metadata
@@ -911,12 +915,12 @@
if create_origin:
origin_url = create_origin
- check_client_origin(deposit.client, origin_url)
+ check_url_match_provider(origin_url, deposit.client.provider_url)
deposit.origin_url = origin_url
if add_to_origin:
origin_url = add_to_origin
- check_client_origin(deposit.client, origin_url)
+ check_url_match_provider(origin_url, deposit.client.provider_url)
deposit.parent = (
Deposit.objects.filter(
client=deposit.client,
diff --git a/swh/deposit/tests/api/test_collection_add_to_origin.py b/swh/deposit/tests/api/test_collection_add_to_origin.py
--- a/swh/deposit/tests/api/test_collection_add_to_origin.py
+++ b/swh/deposit/tests/api/test_collection_add_to_origin.py
@@ -149,8 +149,4 @@
HTTP_IN_PROGRESS="true",
)
assert response.status_code == status.HTTP_403_FORBIDDEN
- expected_msg = (
- f"Cannot create origin {origin_url}, "
- f"it must start with {deposit_user.provider_url}"
- )
- assert expected_msg in response.content.decode()
+ assert "URL mismatch" in response.content.decode()
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -238,11 +238,7 @@
HTTP_IN_PROGRESS="true",
)
assert response.status_code == status.HTTP_403_FORBIDDEN
- expected_msg = (
- f"Cannot create origin {origin_url}, "
- f"it must start with {deposit_user.provider_url}"
- )
- assert expected_msg in response.content.decode()
+ assert "URL mismatch" in response.content.decode()
def test_post_deposit_atom_use_slug_header(
@@ -510,7 +506,7 @@
"""
invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+ xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid)
response = post_atom(
authenticated_client,
@@ -521,6 +517,29 @@
assert b"Invalid SWHID reference" in response.content
+def test_deposit_metadata_invalid_metadata_provenance(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting invalid metadata provenance is bad request returned to client
+
+ """
+ invalid_swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ xml_data = atom_dataset["entry-data-with-swhid"].format(
+ swhid=invalid_swhid,
+ metadata_provenance_url=(
+ "https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"
+ ),
+ )
+
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"URL mismatch" in response.content
+
+
def test_deposit_metadata_fails_functional_checks(
authenticated_client, deposit_collection, atom_dataset
):
@@ -565,7 +584,10 @@
swhid_reference = QualifiedSWHID.from_string(swhid)
swhid_target = extended_swhid_from_qualified(swhid_reference)
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ xml_data = atom_dataset["entry-data-with-swhid"].format(
+ swhid=swhid,
+ metadata_provenance_url="https://hal-test.archives-ouvertes.fr/hal-abcdefgh",
+ )
deposit_client = authenticated_client.deposit_client
_insert_object(swh_storage, swhid_reference)
@@ -732,7 +754,7 @@
"""Posting a swhid reference is rejected if the referenced object is unknown
"""
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid)
response = post_atom(
authenticated_client,
@@ -763,7 +785,7 @@
for an extended object type
"""
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid)
response = post_atom(
authenticated_client,
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -880,7 +880,16 @@
"""
api_url_basename = "deposit.test.metadataonly"
swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
- metadata = atom_dataset[metadata_entry_key].format(swhid=swhid)
+ atom_data = atom_dataset[metadata_entry_key]
+ if metadata_entry_key == "entry-data-with-swhid":
+ metadata = atom_data.format(
+ swhid=swhid,
+ metadata_provenance_url=(
+ "https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"
+ ),
+ )
+ else:
+ metadata = atom_data.format(swhid=swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as m:
m.write(metadata)
@@ -933,7 +942,7 @@
"""
api_url_basename = "deposit.test.metadataonly"
invalid_swhid = "ssh:2:sth:xxx"
- metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+ metadata = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
@@ -11,7 +11,7 @@
- https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh
+ {metadata_provenance_url}
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -141,7 +141,7 @@
@pytest.fixture
def xml_with_swhid(atom_dataset):
- return atom_dataset["entry-data-with-swhid"]
+ return atom_dataset["entry-data-with-swhid-no-prov"]
@pytest.mark.parametrize(
@@ -156,7 +156,7 @@
],
)
def test_parse_swh_reference_swhid(swhid, xml_with_swhid):
- xml_data = xml_with_swhid.format(swhid=swhid)
+ xml_data = xml_with_swhid.format(swhid=swhid,)
metadata = ElementTree.fromstring(xml_data)
actual_swhid = utils.parse_swh_reference(metadata)
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -9,6 +9,7 @@
import iso8601
+from swh.deposit.errors import FORBIDDEN, DepositError
from swh.model.exceptions import ValidationError
from swh.model.model import TimestampWithTimezone
from swh.model.swhids import ExtendedSWHID, ObjectType, QualifiedSWHID
@@ -85,9 +86,7 @@
)
-def parse_swh_metadata_provenance(
- metadata: ElementTree.Element,
-) -> Optional[Union[QualifiedSWHID, str]]:
+def parse_swh_metadata_provenance(metadata: ElementTree.Element,) -> Optional[str]:
"""Parse swh metadata-provenance within the metadata dict reference if found, None
otherwise.
@@ -257,3 +256,16 @@
"""
return f'<{link}>; rel="{link_name}"'
+
+
+def check_url_match_provider(url: str, provider_url: str) -> None:
+ """Check url matches the provider url.
+
+ Raises DepositError in case of mismatch
+
+ """
+ provider_url = provider_url.rstrip("/") + "/"
+ if not url.startswith(provider_url):
+ raise DepositError(
+ FORBIDDEN, f"URL mismatch: {url} must start with {provider_url}",
+ )