diff --git a/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml b/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml @@ -0,0 +1,13 @@ + + + Awesome stuff + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + ssedud + + + {url} + + + diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -196,11 +196,12 @@ @pytest.fixture -def xml_with_empty_reference(): +def xml_swh_deposit_template(): xml_data = """ - + - {swh_reference} + {swh_deposit} """ @@ -216,8 +217,8 @@ """""", ], ) -def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref): - xml_body = xml_with_empty_reference.format(swh_reference=xml_ref) +def test_parse_swh_reference_empty(xml_swh_deposit_template, xml_ref): + xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref) metadata = utils.parse_xml(xml_body) assert utils.parse_swh_reference(metadata) is None @@ -271,3 +272,32 @@ with pytest.raises(ValidationError): utils.parse_swh_reference(metadata) + + +@pytest.mark.parametrize( + "xml_ref", + [ + "", + "", + "", + ], +) +def test_parse_swh_metatada_provenance_empty(xml_swh_deposit_template, xml_ref): + xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref) + metadata = utils.parse_xml(xml_body) + + assert utils.parse_swh_metadata_provenance(metadata) is None + + +@pytest.fixture +def xml_with_metadata_provenance(atom_dataset): + return atom_dataset["entry-data-with-metadata-provenance"] + + +def test_parse_swh_metadata_provenance2(xml_with_metadata_provenance): + xml_data = xml_with_metadata_provenance.format(url="https://url.org/metadata/url") + metadata = utils.parse_xml(xml_data) + + actual_url = utils.parse_swh_metadata_provenance(metadata) + + assert actual_url == "https://url.org/metadata/url" diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -25,6 +25,7 @@ "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", "http://purl.org/net/sword/terms/": "sword", "https://www.softwareheritage.org/schema/2018/deposit": "swh", + "https://schema.org/": "schema", } data = xmltodict.parse( @@ -146,6 +147,42 @@ ) +def parse_swh_metadata_provenance( + metadata: Dict, +) -> Optional[Union[QualifiedSWHID, str]]: + """Parse swh metadata-provenance within the metadata dict reference if found, None + otherwise. + + .. code-block:: xml + + + + https://url.org/metadata/url + + + + Args: + metadata: result of parsing an Atom document with :func:`parse_xml` + + Raises: + ValidationError in case of invalid xml + + Returns: + Either the metadata provenance url if any or None otherwise + + """ + + swh_deposit = metadata.get("swh:deposit") + if not swh_deposit: + return None + + swh_metadata_provenance = swh_deposit.get("swh:metadata-provenance") + if not swh_metadata_provenance: + return None + + return swh_metadata_provenance.get("schema:url") + + def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]: """Parse swh reference within the metadata dict (or origin) reference if found, None otherwise.