Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123338
D7243.id26258.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D7243.id26258.diff
View Options
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -6,6 +6,7 @@
recursive-include swh/deposit/templates *
recursive-include swh/deposit/tests/data *
recursive-include swh/deposit/tests/*/data *
+recursive-include swh/deposit/xsd *
recursive-include swh py.typed
include tox.ini
include pytest.ini
diff --git a/docs/specs/protocol-reference.rst b/docs/specs/protocol-reference.rst
--- a/docs/specs/protocol-reference.rst
+++ b/docs/specs/protocol-reference.rst
@@ -351,7 +351,7 @@
Here is an XML schema to summarize the syntax described in this document:
-.. literalinclude:: swh.xsd
+.. literalinclude:: ../swh/deposit/xsd/swh.xsd
:language: xml
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -33,6 +33,9 @@
[mypy-rest_framework.*]
ignore_missing_imports = True
+[mypy-xmlschema.*]
+ignore_missing_imports = True
+
[mypy-xmltodict.*]
ignore_missing_imports = True
diff --git a/requirements-server.txt b/requirements-server.txt
--- a/requirements-server.txt
+++ b/requirements-server.txt
@@ -2,3 +2,4 @@
djangorestframework
psycopg2 < 2.9
setuptools
+xmlschema
diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -14,10 +14,14 @@
"""
+import dataclasses
+import functools
from typing import Dict, Optional, Tuple
from xml.etree import ElementTree
import iso8601
+import pkg_resources
+import xmlschema
from swh.deposit.utils import NAMESPACES, normalize_date, parse_swh_metadata_provenance
@@ -28,6 +32,21 @@
METADATA_PROVENANCE_KEY = "swh:metadata-provenance"
+@dataclasses.dataclass
+class Schemas:
+ swh: xmlschema.XMLSchema
+
+
+@functools.lru_cache(1)
+def schemas() -> Schemas:
+ def load_xsd(name) -> xmlschema.XMLSchema11:
+ return xmlschema.XMLSchema11(
+ pkg_resources.resource_string("swh.deposit", f"xsd/{name}.xsd").decode()
+ )
+
+ return Schemas(swh=load_xsd("swh"))
+
+
def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]:
"""Check metadata for mandatory field presence and date format.
@@ -67,6 +86,13 @@
detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}]
return False, {"metadata": detail + suggested_fields}
+ deposit_elt = metadata.find("swh:deposit", namespaces=NAMESPACES)
+ if deposit_elt:
+ try:
+ schemas().swh.validate(deposit_elt)
+ except xmlschema.exceptions.XMLSchemaException as e:
+ return False, {"metadata": [{"fields": ["swh:deposit"], "summary": str(e)}]}
+
fields = []
for commit_date in metadata.findall(
@@ -89,4 +115,5 @@
if suggested_fields: # it's fine but warn about missing suggested fields
return True, {"metadata": suggested_fields}
+
return True, None
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -3,6 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+# disable flake8 on this file because of line length
+# flake8: noqa
+
+import re
import textwrap
from typing import Any, Dict
from xml.etree import ElementTree
@@ -81,6 +85,70 @@
</entry>
""",
),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ </entry>
+ """,
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:add_to_origin>
+ <swh:origin url="http://example.org" />
+ </swh:add_to_origin>
+ <swh:metadata-provenance>
+ <schema:url>some-metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
+ </swh:deposit>
+ </entry>
+ """,
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:origin url="http://example.org" />
+ </swh:reference>
+ <swh:metadata-provenance>
+ <schema:url>some-metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
+ </swh:deposit>
+ </entry>
+ """,
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
+ </swh:reference>
+ <swh:metadata-provenance>
+ <schema:url>some-metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
+ </swh:deposit>
+ </entry>
+ """,
+ ),
]
]
@@ -248,3 +316,131 @@
"metadata": [expected_invalid_summary]
+ [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}]
}
+
+
+_parameters4 = [
+ (textwrap.dedent(metadata_ko), expected_summary)
+ for (metadata_ko, expected_summary) in [
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:invalid>
+ <swh:origin url="http://example.org" />
+ </swh:invalid>
+ </swh:deposit>
+ </entry>
+ """,
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:invalid'.*"
+ r"Instance:.*swh:invalid.*"
+ ),
+ "fields": ["swh:deposit"],
+ },
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:add_to_origin>
+ <swh:origin url="http://example.org" />
+ </swh:add_to_origin>
+ <swh:add_to_origin>
+ <swh:origin url="http://example.org" />
+ </swh:add_to_origin>
+ </swh:deposit>
+ </entry>
+ """,
+ {
+ "summary": (
+ r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
+ ),
+ "fields": ["swh:deposit"],
+ },
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
+ <swh:origin url="http://example.org" />
+ </swh:reference>
+ </swh:deposit>
+ </entry>
+ """,
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:origin url="http://example.org" />
+ <swh:origin url="http://example.org" />
+ </swh:reference>
+ </swh:deposit>
+ </entry>
+ """,
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
+ "fields": ["swh:deposit"],
+ },
+ ),
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>something</url>
+ <external_identifier>something-else</external_identifier>
+ <title>bar</title>
+ <author>someone</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:origin url="http://example.org" />
+ <swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
+ </swh:reference>
+ </swh:deposit>
+ </entry>
+ """,
+ {
+ "summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
+ "fields": ["swh:deposit"],
+ },
+ ),
+ ]
+]
+
+
+@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters4)
+def test_api_checks_check_metadata_ko_schema(
+ metadata_ko, expected_summary, swh_checks_deposit
+):
+ actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
+ assert actual_check is False
+ assert len(error_detail["metadata"]) == 1, error_detail["metadata"]
+ assert error_detail["metadata"][0]["fields"] == expected_summary["fields"]
+
+ # xmlschema returns very detailed errors, we cannot reasonably test them
+ # for equality
+ summary = error_detail["metadata"][0]["summary"]
+ assert re.match(expected_summary["summary"], summary, re.DOTALL), summary
diff --git a/docs/specs/swh.xsd b/swh/deposit/xsd/swh.xsd
rename from docs/specs/swh.xsd
rename to swh/deposit/xsd/swh.xsd
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 11:29 PM (21 h, 43 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3233557
Attached To
D7243: Add schema validation of <swh:deposit> using swh.xsd
Event Timeline
Log In to Comment