diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -75,7 +75,11 @@ ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_xml -from ..utils import extended_swhid_from_qualified, parse_swh_reference +from ..utils import ( + extended_swhid_from_qualified, + parse_swh_deposit_origin, + parse_swh_reference, +) ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @@ -911,14 +915,9 @@ ) def _set_deposit_origin_from_metadata(self, deposit, metadata, headers): - create_origin = metadata.find( - "swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES - ) - add_to_origin = metadata.find( - "swh:deposit/swh:add_to_origin/swh:origin", namespaces=NAMESPACES - ) + (create_origin, add_to_origin) = parse_swh_deposit_origin(metadata) - if create_origin is not None and add_to_origin is not None: + if create_origin and add_to_origin: raise DepositError( BAD_REQUEST, " and are mutually exclusive, " @@ -926,13 +925,13 @@ "origin.", ) - if create_origin is not None: - origin_url = create_origin.attrib["url"] + if create_origin: + origin_url = create_origin check_client_origin(deposit.client, origin_url) deposit.origin_url = origin_url - if add_to_origin is not None: - origin_url = add_to_origin.attrib["url"] + if add_to_origin: + origin_url = add_to_origin check_client_origin(deposit.client, origin_url) deposit.parent = ( Deposit.objects.filter( diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -278,23 +278,25 @@ if metadata: from xml.etree import ElementTree - from swh.deposit.utils import parse_swh_metadata_provenance, parse_xml - - metadata_raw = open(metadata, "r").read() - metadata_dict = parse_xml(metadata_raw) - metadata_swh = metadata_dict.get("swh:deposit", {}) - if ( - "swh:create_origin" not in metadata_swh - and "swh:add_to_origin" not in metadata_swh - ): + from swh.deposit.utils import ( + parse_swh_deposit_origin, + parse_swh_metadata_provenance, + ) + + metadata_tree = ElementTree.fromstring(open(metadata).read()) + (create_origin, add_to_origin) = parse_swh_deposit_origin(metadata_tree) + if create_origin and add_to_origin: + logger.error( + "The metadata file provided must not contain both " + '"" and "" tags', + ) + elif not create_origin and not add_to_origin: logger.warning( "The metadata file provided should contain " '"" or "" tag', ) - meta_prov_url = parse_swh_metadata_provenance( - ElementTree.fromstring(metadata_raw) - ) + meta_prov_url = parse_swh_metadata_provenance(metadata_tree) if not meta_prov_url: logger.warning( diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -4,7 +4,7 @@ # See top-level LICENSE file for more information import logging -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Tuple, Union from xml.etree import ElementTree import iso8601 @@ -109,7 +109,7 @@ - https://url.org/metadata/url + https://example.org/metadata/url @@ -131,11 +131,49 @@ return None +def parse_swh_deposit_origin( + metadata: ElementTree.Element, +) -> Tuple[Optional[str], Optional[str]]: + """Parses and from metadata document, + if any. + + .. code-block:: xml + + + + + + + + .. code-block:: xml + + + + + + + + Returns: + tuple of (origin_to_create, origin_to_add). If both are non-None, this + should typically be an error raised to the user. + """ + create_origin = metadata.find( + "swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES + ) + add_to_origin = metadata.find( + "swh:deposit/swh:add_to_origin/swh:origin", namespaces=NAMESPACES + ) + + return ( + None if create_origin is None else create_origin.attrib["url"], + None if add_to_origin is None else add_to_origin.attrib["url"], + ) + + def parse_swh_reference( metadata: ElementTree.Element, ) -> Optional[Union[QualifiedSWHID, str]]: - """Parse swh reference within the metadata dict (or origin) reference if found, - None otherwise. + """Parse within the metadata document, if any. .. code-block:: xml