diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py --- a/swh/deposit/api/checks.py +++ b/swh/deposit/api/checks.py @@ -16,6 +16,7 @@ import dataclasses import functools +import re from typing import Dict, Iterator, Optional, Tuple, cast import urllib from xml.etree import ElementTree @@ -55,22 +56,33 @@ # # However, this would give an unreadable error, so we implement it here # in Python instead. - try: - url = urllib.parse.urlparse(element.text) - except ValueError: + yield from absolute_uri_validator(element, xsd_element) + elif type_name == "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}identifierType": + # Made-up type, that allows both absolute URIs and HAL-IDs + if not re.match("hal-[0-9]+", element.text or ""): + yield from absolute_uri_validator(element, xsd_element) + + +def absolute_uri_validator( + element: ElementTree.Element, + xsd_element: xmlschema.validators.elements.Xsd11Element, +) -> Iterator[xmlschema.XMLSchemaValidationError]: + try: + url = urllib.parse.urlparse(element.text) + except ValueError: + yield xmlschema.XMLSchemaValidationError( + xsd_element, element, f"{element.text!r} is not a valid URI", + ) + else: + if not url.scheme or not url.netloc: + yield xmlschema.XMLSchemaValidationError( + xsd_element, element, f"{element.text!r} is not an absolute URI", + ) + elif " " in url.netloc: + # urllib is a little too permissive... yield xmlschema.XMLSchemaValidationError( xsd_element, element, f"{element.text!r} is not a valid URI", ) - else: - if not url.scheme or not url.netloc: - yield xmlschema.XMLSchemaValidationError( - xsd_element, element, f"{element.text!r} is not an absolute URI", - ) - elif " " in url.netloc: - # urllib is a little too permissive... - yield xmlschema.XMLSchemaValidationError( - xsd_element, element, f"{element.text!r} is not a valid URI", - ) @dataclasses.dataclass diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -146,6 +146,38 @@ """, ), + ( + "identifier-is-halid", + f"""\ + + some url + bar + + The Author + + hal-12345 + {PROVENANCE_XML} + + """, + ), + ( + "identifier-is-propertyvalue", + f"""\ + + some url + bar + + The Author + + + schema:PropertyValue + HAL-ID + hal-02527911 + + {PROVENANCE_XML} + + """, + ), ( "codemeta-dates", f"""\ diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd --- a/swh/deposit/xsd/codemeta.xsd +++ b/swh/deposit/xsd/codemeta.xsd @@ -52,11 +52,23 @@ Therefore, more custom checks are implemented in swh/deposit/api/checks.py in order to allow either. --> + + + + + + + + + + + + -