diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py --- a/swh/deposit/api/checks.py +++ b/swh/deposit/api/checks.py @@ -32,6 +32,8 @@ SUGGESTED_FIELDS_MISSING = "Suggested fields are missing" METADATA_PROVENANCE_KEY = "swh:metadata-provenance" +AFFILIATION_NO_NAME = "Reason: affiliation does not have a element" + def extra_validator(element, xsd_element): """Performs extra checks on Atom elements that cannot be implemented purely @@ -142,6 +144,44 @@ schemas().codemeta.validate(child, extra_validator=extra_validator) except xmlschema.exceptions.XMLSchemaException as e: detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)}) + else: + # Manually validate . Unfortunately, this cannot be + # validated by codemeta.xsd, because Codemeta has conflicting requirements: + # 1. https://codemeta.github.io/terms/ requires it to be Text (represented + # by simple content), but + # 2. https://doi.org/10.5063/SCHEMA/CODEMETA-2.0 requires it to be an + # Organization (represented by complex content) + # And this is (legitimately) not representable in XML Schema. + # + # See https://github.com/codemeta/codemeta/pull/239 for a discussion about + # this issue. + for affiliation in child.findall( + "codemeta:affiliation", namespaces=NAMESPACES + ): + if len(affiliation) > 0: + # This is a complex element (as required by + # https://codemeta.github.io/terms/), then we want to make sure + # there is at least a name. + if not affiliation.findtext("codemeta:name", namespaces=NAMESPACES): + detail.append( + { + "fields": [schema_element.prefixed_name], + "summary": AFFILIATION_NO_NAME, + } + ) + break + else: + # This is a simple element (as required by + # https://doi.org/10.5063/SCHEMA/CODEMETA-2.0) + if affiliation.text is None or not affiliation.text.strip(): + # Completely empty element + detail.append( + { + "fields": [schema_element.prefixed_name], + "summary": AFFILIATION_NO_NAME, + } + ) + break if detail: return False, {"metadata": detail + suggested_fields} diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -226,7 +226,9 @@ """, ), ( - "codemeta-affiliation", + # Required by codemeta.jsonld, but forbidden by + # https://codemeta.github.io/terms/ + "element-in--affiliation", f"""\ some url @@ -241,6 +243,22 @@ """, ), + ( + # Forbidden by codemeta.jsonld, but required by + # https://codemeta.github.io/terms/ + "chardata-in-affiliation", + f"""\ + + some url + bar + + someone + My Orga + + {PROVENANCE_XML} + + """, + ), ( "swh:add_to_origin", f"""\ @@ -494,21 +512,44 @@ ], ), ( - "chardata-in-affiliation", + "affiliation-with-no-name", f"""\ some url bar someone - My Orga + + http://example.org + {PROVENANCE_XML} """, [ { - "summary": ".*Reason: character data between child elements.*", + "summary": ".*Reason: affiliation does not have a element.*", + "fields": ["codemeta:author"], + }, + ], + ), + ( + "empty-affiliation", + f"""\ + + some url + bar + + someone + + + + {PROVENANCE_XML} + + """, + [ + { + "summary": ".*Reason: affiliation does not have a element.*", "fields": ["codemeta:author"], }, ], diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd --- a/swh/deposit/xsd/codemeta.xsd +++ b/swh/deposit/xsd/codemeta.xsd @@ -48,8 +48,9 @@ - + See https://github.com/codemeta/codemeta/pull/239 + Therefore, more custom checks are implemented in swh/deposit/api/checks.py + in order to allow either. -->