Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124075
D7249.id26317.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D7249.id26317.diff
View Options
diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -19,11 +19,10 @@
from typing import Dict, Optional, Tuple
from xml.etree import ElementTree
-import iso8601
import pkg_resources
import xmlschema
-from swh.deposit.utils import NAMESPACES, normalize_date, parse_swh_metadata_provenance
+from swh.deposit.utils import NAMESPACES, parse_swh_metadata_provenance
MANDATORY_FIELDS_MISSING = "Mandatory fields are missing"
INVALID_DATE_FORMAT = "Invalid date format"
@@ -35,6 +34,7 @@
@dataclasses.dataclass
class Schemas:
swh: xmlschema.XMLSchema11
+ codemeta: xmlschema.XMLSchema11
@functools.lru_cache(1)
@@ -44,7 +44,7 @@
pkg_resources.resource_string("swh.deposit", f"xsd/{name}.xsd").decode()
)
- return Schemas(swh=load_xsd("swh"))
+ return Schemas(swh=load_xsd("swh"), codemeta=load_xsd("codemeta"))
def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]:
@@ -93,25 +93,21 @@
except xmlschema.exceptions.XMLSchemaException as e:
return False, {"metadata": [{"fields": ["swh:deposit"], "summary": str(e)}]}
- fields = []
-
- for commit_date in metadata.findall(
- "codemeta:datePublished", namespaces=NAMESPACES
- ):
- try:
- normalize_date(commit_date.text)
- except iso8601.iso8601.ParseError:
- fields.append("codemeta:datePublished")
-
- for author_date in metadata.findall("codemeta:dateCreated", namespaces=NAMESPACES):
+ detail = []
+ for child in metadata:
+ for schema_element in schemas().codemeta.root_elements:
+ if child.tag in schema_element.name:
+ break
+ else:
+ # Tag is not specified in the schema, don't validate it
+ continue
try:
- normalize_date(author_date.text)
- except iso8601.iso8601.ParseError:
- fields.append("codemeta:dateCreated")
+ schemas().codemeta.validate(child)
+ except xmlschema.exceptions.XMLSchemaException as e:
+ detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)})
- if fields:
- detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}]
- return False, {"metadata": detail + suggested_fields}
+ if detail:
+ return False, {"metadata": detail}
if suggested_fields: # it's fine but warn about missing suggested fields
return True, {"metadata": suggested_fields}
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -253,23 +253,6 @@
"fields": ["atom:author or codemeta:author"],
},
),
- (
- f"""\
- <entry {XMLNS}>
- <url>something</url>
- <external_identifier>something-else</external_identifier>
- <title>bar</title>
- <author>someone</author>
- <codemeta:datePublished>2020-aa-21</codemeta:datePublished>
- <codemeta:dateCreated>2020-12-bb</codemeta:dateCreated>
- {PROVENANCE_XML}
- </entry>
- """,
- {
- "summary": "Invalid date format",
- "fields": ["codemeta:datePublished", "codemeta:dateCreated"],
- },
- ),
]
]
@@ -286,6 +269,29 @@
_parameters3 = [
(textwrap.dedent(metadata_ko), expected_summary)
for (metadata_ko, expected_summary) in [
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <codemeta:datePublished>2020-aa-21</codemeta:datePublished>
+ <codemeta:dateCreated>2020-12-bb</codemeta:dateCreated>
+ {PROVENANCE_XML}
+ </entry>
+ """,
+ [
+ {
+ "summary": ".*Reason: invalid value '2020-aa-21'.*",
+ "fields": ["codemeta:datePublished"],
+ },
+ {
+ "summary": ".*Reason: invalid value '2020-12-bb'.*",
+ "fields": ["codemeta:dateCreated"],
+ },
+ ],
+ ),
(
f"""\
<entry {XMLNS}>
@@ -297,30 +303,17 @@
<codemeta:dateCreated>2020-12-bb</codemeta:dateCreated>
</entry>
""",
- {
- "summary": "Invalid date format",
- "fields": ["codemeta:datePublished", "codemeta:dateCreated"],
- },
+ [
+ {
+ "summary": ".*Reason: invalid value '2020-aa-21'.*",
+ "fields": ["codemeta:datePublished"],
+ },
+ {
+ "summary": ".*Reason: invalid value '2020-12-bb'.*",
+ "fields": ["codemeta:dateCreated"],
+ },
+ ],
),
- ]
-]
-
-
-@pytest.mark.parametrize("metadata_ko,expected_invalid_summary", _parameters3)
-def test_api_checks_check_metadata_fields_ko_and_missing_suggested_fields(
- metadata_ko, expected_invalid_summary, swh_checks_deposit
-):
- actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
- assert actual_check is False
- assert error_detail == {
- "metadata": [expected_invalid_summary]
- + [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}]
- }
-
-
-_parameters4 = [
- (textwrap.dedent(metadata_ko), expected_summary)
- for (metadata_ko, expected_summary) in [
(
f"""\
<entry {XMLNS}>
@@ -335,13 +328,15 @@
</swh:deposit>
</entry>
""",
- {
- "summary": (
- r".*Reason: Unexpected child with tag 'swh:invalid'.*"
- r"Instance:.*swh:invalid.*"
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:invalid'.*"
+ r"Instance:.*swh:invalid.*"
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -360,12 +355,14 @@
</swh:deposit>
</entry>
""",
- {
- "summary": (
- r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -384,14 +381,16 @@
</swh:deposit>
</entry>
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-add".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-add".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -410,14 +409,16 @@
</swh:deposit>
</entry>
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-reference".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-reference".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -436,14 +437,16 @@
</swh:deposit>
</entry>
""",
- {
- "summary": (
- r".*Reason: assertion test if false.*"
- r"Schema:\n*"
- r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-add-and-reference".*'
- ),
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": (
+ r".*Reason: assertion test if false.*"
+ r"Schema:\n*"
+ r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-add-and-reference".*'
+ ),
+ "fields": ["swh:deposit"],
+ }
+ ],
),
(
f"""\
@@ -460,10 +463,12 @@
</swh:deposit>
</entry>
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
(
f"""\
@@ -480,10 +485,12 @@
</swh:deposit>
</entry>
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
(
f"""\
@@ -500,25 +507,33 @@
</swh:deposit>
</entry>
""",
- {
- "summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
- "fields": ["swh:deposit"],
- },
+ [
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
+ "fields": ["swh:deposit"],
+ },
+ ],
),
]
]
-@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters4)
+@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3)
def test_api_checks_check_metadata_ko_schema(
- metadata_ko, expected_summary, swh_checks_deposit
+ metadata_ko, expected_summaries, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
- assert len(error_detail["metadata"]) == 1, error_detail["metadata"]
- assert error_detail["metadata"][0]["fields"] == expected_summary["fields"]
+ assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[
+ "metadata"
+ ]
+
+ for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries):
+ assert detail["fields"] == expected_summary["fields"]
- # xmlschema returns very detailed errors, we cannot reasonably test them
- # for equality
- summary = error_detail["metadata"][0]["summary"]
- assert re.match(expected_summary["summary"], summary, re.DOTALL), summary
+ # xmlschema returns very detailed errors, we cannot reasonably test them
+ # for equality
+ summary = detail["summary"]
+ assert re.match(
+ expected_summary["summary"], summary, re.DOTALL
+ ), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"
diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd
new file mode 100644
--- /dev/null
+++ b/swh/deposit/xsd/codemeta.xsd
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ targetNamespace="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ elementFormDefault="qualified"
+ >
+ <!-- This is a non-exhaustive schema used to find likely errors in XML-encoded
+ Codemeta documents -->
+
+ <xsd:element name="datePublished" type="codemeta:DateOrDateTime" />
+ <xsd:element name="dateCreated" type="codemeta:DateOrDateTime" />
+
+ <xsd:simpleType name="DateOrDateTime">
+ <xsd:union memberTypes="xsd:date xsd:dateTime"/>
+ </xsd:simpleType>
+</xsd:schema>
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 12:13 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221040
Attached To
D7249: Use xmlschema to validate dates, instead of custom code.
Event Timeline
Log In to Comment