Page MenuHomeSoftware Heritage

D7320.diff
No OneTemporary

D7320.diff

diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -32,6 +32,8 @@
SUGGESTED_FIELDS_MISSING = "Suggested fields are missing"
METADATA_PROVENANCE_KEY = "swh:metadata-provenance"
+AFFILIATION_NO_NAME = "Reason: affiliation does not have a <codemeta:name> element"
+
def extra_validator(element, xsd_element):
"""Performs extra checks on Atom elements that cannot be implemented purely
@@ -142,6 +144,44 @@
schemas().codemeta.validate(child, extra_validator=extra_validator)
except xmlschema.exceptions.XMLSchemaException as e:
detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)})
+ else:
+ # Manually validate <codemeta:affiliation>. Unfortunately, this cannot be
+ # validated by codemeta.xsd, because Codemeta has conflicting requirements:
+ # 1. https://codemeta.github.io/terms/ requires it to be Text (represented
+ # by simple content), but
+ # 2. https://doi.org/10.5063/SCHEMA/CODEMETA-2.0 requires it to be an
+ # Organization (represented by complex content)
+ # And this is (legitimately) not representable in XML Schema.
+ #
+ # See https://github.com/codemeta/codemeta/pull/239 for a discussion about
+ # this issue.
+ for affiliation in child.findall(
+ "codemeta:affiliation", namespaces=NAMESPACES
+ ):
+ if len(affiliation) > 0:
+ # This is a complex element (as required by
+ # https://codemeta.github.io/terms/), then we want to make sure
+ # there is at least a name.
+ if not affiliation.findtext("codemeta:name", namespaces=NAMESPACES):
+ detail.append(
+ {
+ "fields": [schema_element.prefixed_name],
+ "summary": AFFILIATION_NO_NAME,
+ }
+ )
+ break
+ else:
+ # This is a simple element (as required by
+ # https://doi.org/10.5063/SCHEMA/CODEMETA-2.0)
+ if affiliation.text is None or not affiliation.text.strip():
+ # Completely empty element
+ detail.append(
+ {
+ "fields": [schema_element.prefixed_name],
+ "summary": AFFILIATION_NO_NAME,
+ }
+ )
+ break
if detail:
return False, {"metadata": detail + suggested_fields}
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -226,7 +226,9 @@
""",
),
(
- "codemeta-affiliation",
+ # Required by codemeta.jsonld, but forbidden by
+ # https://codemeta.github.io/terms/
+ "element-in--affiliation",
f"""\
<entry {XMLNS}>
<url>some url</url>
@@ -241,6 +243,22 @@
</entry>
""",
),
+ (
+ # Forbidden by codemeta.jsonld, but required by
+ # https://codemeta.github.io/terms/
+ "chardata-in-affiliation",
+ f"""\
+ <entry {XMLNS}>
+ <url>some url</url>
+ <codemeta:name>bar</codemeta:name>
+ <codemeta:author>
+ <codemeta:name>someone</codemeta:name>
+ <codemeta:affiliation>My Orga</codemeta:affiliation>
+ </codemeta:author>
+ {PROVENANCE_XML}
+ </entry>
+ """,
+ ),
(
"swh:add_to_origin",
f"""\
@@ -494,21 +512,44 @@
],
),
(
- "chardata-in-affiliation",
+ "affiliation-with-no-name",
f"""\
<entry {XMLNS}>
<url>some url</url>
<codemeta:name>bar</codemeta:name>
<codemeta:author>
<codemeta:name>someone</codemeta:name>
- <codemeta:affiliation>My Orga</codemeta:affiliation>
+ <codemeta:affiliation>
+ <codemeta:url>http://example.org</codemeta:url>
+ </codemeta:affiliation>
</codemeta:author>
{PROVENANCE_XML}
</entry>
""",
[
{
- "summary": ".*Reason: character data between child elements.*",
+ "summary": ".*Reason: affiliation does not have a <codemeta:name> element.*",
+ "fields": ["codemeta:author"],
+ },
+ ],
+ ),
+ (
+ "empty-affiliation",
+ f"""\
+ <entry {XMLNS}>
+ <url>some url</url>
+ <codemeta:name>bar</codemeta:name>
+ <codemeta:author>
+ <codemeta:name>someone</codemeta:name>
+ <codemeta:affiliation>
+ </codemeta:affiliation>
+ </codemeta:author>
+ {PROVENANCE_XML}
+ </entry>
+ """,
+ [
+ {
+ "summary": ".*Reason: affiliation does not have a <codemeta:name> element.*",
"fields": ["codemeta:author"],
},
],
diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd
--- a/swh/deposit/xsd/codemeta.xsd
+++ b/swh/deposit/xsd/codemeta.xsd
@@ -48,8 +48,9 @@
<!-- note that codemeta:affiliation expects Organization as value,
but https://codemeta.github.io/terms/ mistakenly documents it as Text.
- See https://github.com/codemeta/codemeta/pull/239 -->
- <xsd:element name="affiliation" type="codemeta:PersonOrOrganization" />
+ See https://github.com/codemeta/codemeta/pull/239
+ Therefore, more custom checks are implemented in swh/deposit/api/checks.py
+ in order to allow either. -->
<xsd:element name="name" type="xsd:string" />
<xsd:element name="givenName" type="xsd:string" />

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 7:17 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217259

Event Timeline