diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py --- a/swh/deposit/api/checks.py +++ b/swh/deposit/api/checks.py @@ -19,11 +19,10 @@ from typing import Dict, Optional, Tuple from xml.etree import ElementTree -import iso8601 import pkg_resources import xmlschema -from swh.deposit.utils import NAMESPACES, normalize_date, parse_swh_metadata_provenance +from swh.deposit.utils import NAMESPACES, parse_swh_metadata_provenance MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" INVALID_DATE_FORMAT = "Invalid date format" @@ -35,6 +34,7 @@ @dataclasses.dataclass class Schemas: swh: xmlschema.XMLSchema11 + codemeta: xmlschema.XMLSchema11 @functools.lru_cache(1) @@ -44,7 +44,7 @@ pkg_resources.resource_string("swh.deposit", f"xsd/{name}.xsd").decode() ) - return Schemas(swh=load_xsd("swh")) + return Schemas(swh=load_xsd("swh"), codemeta=load_xsd("codemeta")) def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]: @@ -93,25 +93,21 @@ except xmlschema.exceptions.XMLSchemaException as e: return False, {"metadata": [{"fields": ["swh:deposit"], "summary": str(e)}]} - fields = [] - - for commit_date in metadata.findall( - "codemeta:datePublished", namespaces=NAMESPACES - ): - try: - normalize_date(commit_date.text) - except iso8601.iso8601.ParseError: - fields.append("codemeta:datePublished") - - for author_date in metadata.findall("codemeta:dateCreated", namespaces=NAMESPACES): + detail = [] + for child in metadata: + for schema_element in schemas().codemeta.root_elements: + if child.tag in schema_element.name: + break + else: + # Tag is not specified in the schema, don't validate it + continue try: - normalize_date(author_date.text) - except iso8601.iso8601.ParseError: - fields.append("codemeta:dateCreated") + schemas().codemeta.validate(child) + except xmlschema.exceptions.XMLSchemaException as e: + detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)}) - if fields: - detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}] - return False, {"metadata": detail + suggested_fields} + if detail: + return False, {"metadata": detail} if suggested_fields: # it's fine but warn about missing suggested fields return True, {"metadata": suggested_fields} diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -253,23 +253,6 @@ "fields": ["atom:author or codemeta:author"], }, ), - ( - f"""\ - - something - something-else - bar - someone - 2020-aa-21 - 2020-12-bb - {PROVENANCE_XML} - - """, - { - "summary": "Invalid date format", - "fields": ["codemeta:datePublished", "codemeta:dateCreated"], - }, - ), ] ] @@ -286,6 +269,29 @@ _parameters3 = [ (textwrap.dedent(metadata_ko), expected_summary) for (metadata_ko, expected_summary) in [ + ( + f"""\ + + something + something-else + bar + someone + 2020-aa-21 + 2020-12-bb + {PROVENANCE_XML} + + """, + [ + { + "summary": ".*Reason: invalid value '2020-aa-21'.*", + "fields": ["codemeta:datePublished"], + }, + { + "summary": ".*Reason: invalid value '2020-12-bb'.*", + "fields": ["codemeta:dateCreated"], + }, + ], + ), ( f"""\ @@ -297,30 +303,17 @@ 2020-12-bb """, - { - "summary": "Invalid date format", - "fields": ["codemeta:datePublished", "codemeta:dateCreated"], - }, + [ + { + "summary": ".*Reason: invalid value '2020-aa-21'.*", + "fields": ["codemeta:datePublished"], + }, + { + "summary": ".*Reason: invalid value '2020-12-bb'.*", + "fields": ["codemeta:dateCreated"], + }, + ], ), - ] -] - - -@pytest.mark.parametrize("metadata_ko,expected_invalid_summary", _parameters3) -def test_api_checks_check_metadata_fields_ko_and_missing_suggested_fields( - metadata_ko, expected_invalid_summary, swh_checks_deposit -): - actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko)) - assert actual_check is False - assert error_detail == { - "metadata": [expected_invalid_summary] - + [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}] - } - - -_parameters4 = [ - (textwrap.dedent(metadata_ko), expected_summary) - for (metadata_ko, expected_summary) in [ ( f"""\ @@ -335,13 +328,15 @@ """, - { - "summary": ( - r".*Reason: Unexpected child with tag 'swh:invalid'.*" - r"Instance:.*swh:invalid.*" - ), - "fields": ["swh:deposit"], - }, + [ + { + "summary": ( + r".*Reason: Unexpected child with tag 'swh:invalid'.*" + r"Instance:.*swh:invalid.*" + ), + "fields": ["swh:deposit"], + } + ], ), ( f"""\ @@ -360,12 +355,14 @@ """, - { - "summary": ( - r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*" - ), - "fields": ["swh:deposit"], - }, + [ + { + "summary": ( + r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*" + ), + "fields": ["swh:deposit"], + } + ], ), ( f"""\ @@ -384,14 +381,16 @@ """, - { - "summary": ( - r".*Reason: assertion test if false.*" - r"Schema:\n*" - r' *]+ id="swhdeposit-incompatible-create-and-add".*' - ), - "fields": ["swh:deposit"], - }, + [ + { + "summary": ( + r".*Reason: assertion test if false.*" + r"Schema:\n*" + r' *]+ id="swhdeposit-incompatible-create-and-add".*' + ), + "fields": ["swh:deposit"], + } + ], ), ( f"""\ @@ -410,14 +409,16 @@ """, - { - "summary": ( - r".*Reason: assertion test if false.*" - r"Schema:\n*" - r' *]+ id="swhdeposit-incompatible-create-and-reference".*' - ), - "fields": ["swh:deposit"], - }, + [ + { + "summary": ( + r".*Reason: assertion test if false.*" + r"Schema:\n*" + r' *]+ id="swhdeposit-incompatible-create-and-reference".*' + ), + "fields": ["swh:deposit"], + } + ], ), ( f"""\ @@ -436,14 +437,16 @@ """, - { - "summary": ( - r".*Reason: assertion test if false.*" - r"Schema:\n*" - r' *]+ id="swhdeposit-incompatible-add-and-reference".*' - ), - "fields": ["swh:deposit"], - }, + [ + { + "summary": ( + r".*Reason: assertion test if false.*" + r"Schema:\n*" + r' *]+ id="swhdeposit-incompatible-add-and-reference".*' + ), + "fields": ["swh:deposit"], + } + ], ), ( f"""\ @@ -460,10 +463,12 @@ """, - { - "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", - "fields": ["swh:deposit"], - }, + [ + { + "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", + "fields": ["swh:deposit"], + }, + ], ), ( f"""\ @@ -480,10 +485,12 @@ """, - { - "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", - "fields": ["swh:deposit"], - }, + [ + { + "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", + "fields": ["swh:deposit"], + }, + ], ), ( f"""\ @@ -500,25 +507,33 @@ """, - { - "summary": r".*Reason: Unexpected child with tag 'swh:object'.*", - "fields": ["swh:deposit"], - }, + [ + { + "summary": r".*Reason: Unexpected child with tag 'swh:object'.*", + "fields": ["swh:deposit"], + }, + ], ), ] ] -@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters4) +@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3) def test_api_checks_check_metadata_ko_schema( - metadata_ko, expected_summary, swh_checks_deposit + metadata_ko, expected_summaries, swh_checks_deposit ): actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko)) assert actual_check is False - assert len(error_detail["metadata"]) == 1, error_detail["metadata"] - assert error_detail["metadata"][0]["fields"] == expected_summary["fields"] + assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[ + "metadata" + ] + + for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries): + assert detail["fields"] == expected_summary["fields"] - # xmlschema returns very detailed errors, we cannot reasonably test them - # for equality - summary = error_detail["metadata"][0]["summary"] - assert re.match(expected_summary["summary"], summary, re.DOTALL), summary + # xmlschema returns very detailed errors, we cannot reasonably test them + # for equality + summary = detail["summary"] + assert re.match( + expected_summary["summary"], summary, re.DOTALL + ), f"Failed to match {expected_summary['summary']!r} with:\n{summary}" diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd new file mode 100644 --- /dev/null +++ b/swh/deposit/xsd/codemeta.xsd @@ -0,0 +1,16 @@ + + + + + + + + + + +