diff --git a/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml b/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-metadata-provenance.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome stuff
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ ssedud
+
+
+ {url}
+
+
+
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -196,11 +196,12 @@
@pytest.fixture
-def xml_with_empty_reference():
+def xml_swh_deposit_template():
xml_data = """
-
+
- {swh_reference}
+ {swh_deposit}
"""
@@ -216,8 +217,8 @@
"""""",
],
)
-def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref):
- xml_body = xml_with_empty_reference.format(swh_reference=xml_ref)
+def test_parse_swh_reference_empty(xml_swh_deposit_template, xml_ref):
+ xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
metadata = utils.parse_xml(xml_body)
assert utils.parse_swh_reference(metadata) is None
@@ -271,3 +272,32 @@
with pytest.raises(ValidationError):
utils.parse_swh_reference(metadata)
+
+
+@pytest.mark.parametrize(
+ "xml_ref",
+ [
+ "",
+ "",
+ "",
+ ],
+)
+def test_parse_swh_metatada_provenance_empty(xml_swh_deposit_template, xml_ref):
+ xml_body = xml_swh_deposit_template.format(swh_deposit=xml_ref)
+ metadata = utils.parse_xml(xml_body)
+
+ assert utils.parse_swh_metadata_provenance(metadata) is None
+
+
+@pytest.fixture
+def xml_with_metadata_provenance(atom_dataset):
+ return atom_dataset["entry-data-with-metadata-provenance"]
+
+
+def test_parse_swh_metadata_provenance2(xml_with_metadata_provenance):
+ xml_data = xml_with_metadata_provenance.format(url="https://url.org/metadata/url")
+ metadata = utils.parse_xml(xml_data)
+
+ actual_url = utils.parse_swh_metadata_provenance(metadata)
+
+ assert actual_url == "https://url.org/metadata/url"
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -25,6 +25,7 @@
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta",
"http://purl.org/net/sword/terms/": "sword",
"https://www.softwareheritage.org/schema/2018/deposit": "swh",
+ "https://schema.org/": "schema",
}
data = xmltodict.parse(
@@ -146,6 +147,42 @@
)
+def parse_swh_metadata_provenance(
+ metadata: Dict,
+) -> Optional[Union[QualifiedSWHID, str]]:
+ """Parse swh metadata-provenance within the metadata dict reference if found, None
+ otherwise.
+
+ .. code-block:: xml
+
+
+
+ https://url.org/metadata/url
+
+
+
+ Args:
+ metadata: result of parsing an Atom document with :func:`parse_xml`
+
+ Raises:
+ ValidationError in case of invalid xml
+
+ Returns:
+ Either the metadata provenance url if any or None otherwise
+
+ """
+
+ swh_deposit = metadata.get("swh:deposit")
+ if not swh_deposit:
+ return None
+
+ swh_metadata_provenance = swh_deposit.get("swh:metadata-provenance")
+ if not swh_metadata_provenance:
+ return None
+
+ return swh_metadata_provenance.get("schema:url")
+
+
def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]:
"""Parse swh reference within the metadata dict (or origin) reference if found,
None otherwise.