diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -27,9 +27,14 @@ from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication -from swh.deposit.models import Deposit, DEPOSIT_METADATA_ONLY +from swh.deposit.models import DEPOSIT_METADATA_ONLY, Deposit from swh.deposit.parsers import parse_xml -from swh.deposit.utils import NAMESPACES, compute_metadata_context +from swh.deposit.utils import ( + NAMESPACES, + check_url_match_provider, + compute_metadata_context, + parse_swh_metadata_provenance, +) from swh.model import hashutil from swh.model.model import ( MetadataAuthority, @@ -159,15 +164,6 @@ return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) -def check_client_origin(client: DepositClient, origin_url: str): - provider_url = client.provider_url.rstrip("/") + "/" - if not origin_url.startswith(provider_url): - raise DepositError( - FORBIDDEN, - f"Cannot create origin {origin_url}, it must start with {provider_url}", - ) - - class APIBase(APIConfig, APIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. @@ -859,6 +855,14 @@ ) if swhid_ref is not None: + # It's suggested to user to provide it + metadata_provenance_url = parse_swh_metadata_provenance(metadata_tree) + if metadata_provenance_url: + # If the provenance is provided, ensure it matches client provider url + check_url_match_provider( + metadata_provenance_url, deposit.client.provider_url + ) + deposit.save() # We need a deposit id target_swhid, depo, depo_request = self._store_metadata_deposit( deposit, swhid_ref, metadata_tree, raw_metadata @@ -911,12 +915,12 @@ if create_origin: origin_url = create_origin - check_client_origin(deposit.client, origin_url) + check_url_match_provider(origin_url, deposit.client.provider_url) deposit.origin_url = origin_url if add_to_origin: origin_url = add_to_origin - check_client_origin(deposit.client, origin_url) + check_url_match_provider(origin_url, deposit.client.provider_url) deposit.parent = ( Deposit.objects.filter( client=deposit.client, diff --git a/swh/deposit/tests/api/test_collection_add_to_origin.py b/swh/deposit/tests/api/test_collection_add_to_origin.py --- a/swh/deposit/tests/api/test_collection_add_to_origin.py +++ b/swh/deposit/tests/api/test_collection_add_to_origin.py @@ -149,8 +149,4 @@ HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_403_FORBIDDEN - expected_msg = ( - f"Cannot create origin {origin_url}, " - f"it must start with {deposit_user.provider_url}" - ) - assert expected_msg in response.content.decode() + assert "URL mismatch" in response.content.decode() diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py --- a/swh/deposit/tests/api/test_collection_post_atom.py +++ b/swh/deposit/tests/api/test_collection_post_atom.py @@ -238,11 +238,7 @@ HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_403_FORBIDDEN - expected_msg = ( - f"Cannot create origin {origin_url}, " - f"it must start with {deposit_user.provider_url}" - ) - assert expected_msg in response.content.decode() + assert "URL mismatch" in response.content.decode() def test_post_deposit_atom_use_slug_header( @@ -510,7 +506,7 @@ """ invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) + xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid) response = post_atom( authenticated_client, @@ -521,6 +517,29 @@ assert b"Invalid SWHID reference" in response.content +def test_deposit_metadata_invalid_metadata_provenance( + authenticated_client, deposit_collection, atom_dataset +): + """Posting invalid metadata provenance is bad request returned to client + + """ + invalid_swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" + xml_data = atom_dataset["entry-data-with-swhid"].format( + swhid=invalid_swhid, + metadata_provenance_url=( + "https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh" + ), + ) + + response = post_atom( + authenticated_client, + reverse(COL_IRI, args=[deposit_collection.name]), + data=xml_data, + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + assert b"URL mismatch" in response.content + + def test_deposit_metadata_fails_functional_checks( authenticated_client, deposit_collection, atom_dataset ): @@ -565,7 +584,10 @@ swhid_reference = QualifiedSWHID.from_string(swhid) swhid_target = extended_swhid_from_qualified(swhid_reference) - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + xml_data = atom_dataset["entry-data-with-swhid"].format( + swhid=swhid, + metadata_provenance_url="https://hal-test.archives-ouvertes.fr/hal-abcdefgh", + ) deposit_client = authenticated_client.deposit_client _insert_object(swh_storage, swhid_reference) @@ -732,7 +754,7 @@ """Posting a swhid reference is rejected if the referenced object is unknown """ - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid) response = post_atom( authenticated_client, @@ -763,7 +785,7 @@ for an extended object type """ - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid) response = post_atom( authenticated_client, diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -880,7 +880,16 @@ """ api_url_basename = "deposit.test.metadataonly" swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" - metadata = atom_dataset[metadata_entry_key].format(swhid=swhid) + atom_data = atom_dataset[metadata_entry_key] + if metadata_entry_key == "entry-data-with-swhid": + metadata = atom_data.format( + swhid=swhid, + metadata_provenance_url=( + "https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh" + ), + ) + else: + metadata = atom_data.format(swhid=swhid) metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") with open(metadata_path, "w") as m: m.write(metadata) @@ -933,7 +942,7 @@ """ api_url_basename = "deposit.test.metadataonly" invalid_swhid = "ssh:2:sth:xxx" - metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) + metadata = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid) metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") with open(metadata_path, "w") as f: f.write(metadata) diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml --- a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml @@ -11,7 +11,7 @@ - https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh + {metadata_provenance_url} diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -141,7 +141,7 @@ @pytest.fixture def xml_with_swhid(atom_dataset): - return atom_dataset["entry-data-with-swhid"] + return atom_dataset["entry-data-with-swhid-no-prov"] @pytest.mark.parametrize( @@ -156,7 +156,7 @@ ], ) def test_parse_swh_reference_swhid(swhid, xml_with_swhid): - xml_data = xml_with_swhid.format(swhid=swhid) + xml_data = xml_with_swhid.format(swhid=swhid,) metadata = ElementTree.fromstring(xml_data) actual_swhid = utils.parse_swh_reference(metadata) diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -9,6 +9,7 @@ import iso8601 +from swh.deposit.errors import FORBIDDEN, DepositError from swh.model.exceptions import ValidationError from swh.model.model import TimestampWithTimezone from swh.model.swhids import ExtendedSWHID, ObjectType, QualifiedSWHID @@ -85,9 +86,7 @@ ) -def parse_swh_metadata_provenance( - metadata: ElementTree.Element, -) -> Optional[Union[QualifiedSWHID, str]]: +def parse_swh_metadata_provenance(metadata: ElementTree.Element,) -> Optional[str]: """Parse swh metadata-provenance within the metadata dict reference if found, None otherwise. @@ -257,3 +256,16 @@ """ return f'<{link}>; rel="{link_name}"' + + +def check_url_match_provider(url: str, provider_url: str) -> None: + """Check url matches the provider url. + + Raises DepositError in case of mismatch + + """ + provider_url = provider_url.rstrip("/") + "/" + if not url.startswith(provider_url): + raise DepositError( + FORBIDDEN, f"URL mismatch: {url} must start with {provider_url}", + )