diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -19,11 +19,10 @@
from typing import Dict, Optional, Tuple
from xml.etree import ElementTree
-import iso8601
import pkg_resources
import xmlschema
-from swh.deposit.utils import NAMESPACES, normalize_date, parse_swh_metadata_provenance
+from swh.deposit.utils import NAMESPACES, parse_swh_metadata_provenance
MANDATORY_FIELDS_MISSING = "Mandatory fields are missing"
INVALID_DATE_FORMAT = "Invalid date format"
@@ -35,6 +34,7 @@
@dataclasses.dataclass
class Schemas:
swh: xmlschema.XMLSchema11
+ codemeta: xmlschema.XMLSchema11
@functools.lru_cache(1)
@@ -44,7 +44,7 @@
pkg_resources.resource_string("swh.deposit", f"xsd/{name}.xsd").decode()
)
- return Schemas(swh=load_xsd("swh"))
+ return Schemas(swh=load_xsd("swh"), codemeta=load_xsd("codemeta"))
def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]:
@@ -93,25 +93,21 @@
except xmlschema.exceptions.XMLSchemaException as e:
return False, {"metadata": [{"fields": ["swh:deposit"], "summary": str(e)}]}
- fields = []
-
- for commit_date in metadata.findall(
- "codemeta:datePublished", namespaces=NAMESPACES
- ):
- try:
- normalize_date(commit_date.text)
- except iso8601.iso8601.ParseError:
- fields.append("codemeta:datePublished")
-
- for author_date in metadata.findall("codemeta:dateCreated", namespaces=NAMESPACES):
+ detail = []
+ for child in metadata:
+ for schema_element in schemas().codemeta.root_elements:
+ if child.tag in schema_element.name:
+ break
+ else:
+ # Tag is not specified in the schema, don't validate it
+ continue
try:
- normalize_date(author_date.text)
- except iso8601.iso8601.ParseError:
- fields.append("codemeta:dateCreated")
+ schemas().codemeta.validate(child)
+ except xmlschema.exceptions.XMLSchemaException as e:
+ detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)})
- if fields:
- detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}]
- return False, {"metadata": detail + suggested_fields}
+ if detail:
+ return False, {"metadata": detail}
if suggested_fields: # it's fine but warn about missing suggested fields
return True, {"metadata": suggested_fields}
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -253,23 +253,6 @@
"fields": ["atom:author or codemeta:author"],
},
),
- (
- f"""\
-
- something
- something-else
- bar
- someone
- 2020-aa-21
- 2020-12-bb
- {PROVENANCE_XML}
-
- """,
- {
- "summary": "Invalid date format",
- "fields": ["codemeta:datePublished", "codemeta:dateCreated"],
- },
- ),
]
]
@@ -286,6 +269,29 @@
_parameters3 = [
(textwrap.dedent(metadata_ko), expected_summary)
for (metadata_ko, expected_summary) in [
+ (
+ f"""\
+
+ something
+ something-else
+ bar
+ someone
+ 2020-aa-21
+ 2020-12-bb
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Reason: invalid value '2020-aa-21'.*",
+ "fields": ["codemeta:datePublished"],
+ },
+ {
+ "summary": ".*Reason: invalid value '2020-12-bb'.*",
+ "fields": ["codemeta:dateCreated"],
+ },
+ ],
+ ),
(
f"""\
@@ -297,30 +303,17 @@
2020-12-bb
""",
- {
- "summary": "Invalid date format",
- "fields": ["codemeta:datePublished", "codemeta:dateCreated"],
- },
+ [
+ {
+ "summary": ".*Reason: invalid value '2020-aa-21'.*",
+ "fields": ["codemeta:datePublished"],
+ },
+ {
+ "summary": ".*Reason: invalid value '2020-12-bb'.*",
+ "fields": ["codemeta:dateCreated"],
+ },
+ ],
),
- ]
-]
-
-
-@pytest.mark.parametrize("metadata_ko,expected_invalid_summary", _parameters3)
-def test_api_checks_check_metadata_fields_ko_and_missing_suggested_fields(
- metadata_ko, expected_invalid_summary, swh_checks_deposit
-):
- actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
- assert actual_check is False
- assert error_detail == {
- "metadata": [expected_invalid_summary]
- + [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}]
- }
-
-
-_parameters4 = [
- (textwrap.dedent(metadata_ko), expected_summary)
- for (metadata_ko, expected_summary) in [
(
f"""\
@@ -335,13 +328,15 @@
""",
- {
- "summary": (
- r".*Reason: Unexpected child with tag 'swh:invalid'.*"
- r"Instance:.*swh:invalid.*"
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:invalid'.*"
+ r"Instance:.*swh:invalid.*"
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -360,12 +355,14 @@
""",
- {
- "summary": (
- r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -384,14 +381,16 @@
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *]+ id="swhdeposit-incompatible-create-and-add".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *]+ id="swhdeposit-incompatible-create-and-add".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -410,14 +409,16 @@
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *]+ id="swhdeposit-incompatible-create-and-reference".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *]+ id="swhdeposit-incompatible-create-and-reference".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -436,14 +437,16 @@
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *]+ id="swhdeposit-incompatible-add-and-reference".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *]+ id="swhdeposit-incompatible-add-and-reference".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -460,10 +463,12 @@
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
(
f"""\
@@ -480,10 +485,12 @@
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
(
f"""\
@@ -500,25 +507,33 @@
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
]
]
-@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters4)
+@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3)
def test_api_checks_check_metadata_ko_schema(
- metadata_ko, expected_summary, swh_checks_deposit
+ metadata_ko, expected_summaries, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
- assert len(error_detail["metadata"]) == 1, error_detail["metadata"]
- assert error_detail["metadata"][0]["fields"] == expected_summary["fields"]
+ assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[
+ "metadata"
+ ]
+
+ for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries):
+ assert detail["fields"] == expected_summary["fields"]
- # xmlschema returns very detailed errors, we cannot reasonably test them
- # for equality
- summary = error_detail["metadata"][0]["summary"]
- assert re.match(expected_summary["summary"], summary, re.DOTALL), summary
+ # xmlschema returns very detailed errors, we cannot reasonably test them
+ # for equality
+ summary = detail["summary"]
+ assert re.match(
+ expected_summary["summary"], summary, re.DOTALL
+ ), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"
diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd
new file mode 100644
--- /dev/null
+++ b/swh/deposit/xsd/codemeta.xsd
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+