diff --git a/swh/indexer/metadata_dictionary/codemeta.py b/swh/indexer/metadata_dictionary/codemeta.py
--- a/swh/indexer/metadata_dictionary/codemeta.py
+++ b/swh/indexer/metadata_dictionary/codemeta.py
@@ -6,7 +6,7 @@
import collections
import json
import re
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
import xml.etree.ElementTree as ET
import xmltodict
@@ -61,8 +61,8 @@
def supported_terms(cls) -> List[str]:
return [term for term in CODEMETA_TERMS if not term.startswith("@")]
- def xml_to_jsonld(self, e: ET.Element) -> Dict[str, Any]:
- doc: Dict[str, List[Dict[str, Any]]] = collections.defaultdict(list)
+ def xml_to_jsonld(self, e: ET.Element) -> Union[str, Dict[str, Any]]:
+ doc: Dict[str, List[Union[str, Dict[str, Any]]]] = collections.defaultdict(list)
for child in e:
m = _TAG_RE.match(child.tag)
assert m, f"Tag with no namespace: {child}"
@@ -84,12 +84,6 @@
# expansion will convert it to a full URI based on
# "@context": CODEMETA_CONTEXT_URL
jsonld_child = self.xml_to_jsonld(child)
- if localname == "type" and isinstance(jsonld_child, dict):
- # With a codemeta context, this is later translated to a JSON-LD
- # @type, which must be either an array of strings or a string.
- if set(jsonld_child) != {"@value"}:
- raise ValueError(f'Unexpected value for "type": {jsonld_child}')
- jsonld_child = jsonld_child["@value"]
doc[localname].append(jsonld_child)
else:
# Otherwise, we already know the URI
@@ -102,7 +96,7 @@
text = e.text.strip() if e.text else None
if text:
# TODO: check doc is empty, and raise mixed-content error otherwise?
- doc_["@value"] = text
+ return text
return doc_
@@ -113,6 +107,8 @@
# Transform to JSON-LD document
doc = self.xml_to_jsonld(root)
+ assert isinstance(doc, dict), f"Root object is not a dict: {doc}"
+
# Add @context to JSON-LD expansion replaces the "codemeta:" prefix
# hash (which uses the context URL as namespace URI for historical
# reasons) into properties in `http://schema.org/` and
diff --git a/swh/indexer/tests/metadata_dictionary/test_codemeta.py b/swh/indexer/tests/metadata_dictionary/test_codemeta.py
--- a/swh/indexer/tests/metadata_dictionary/test_codemeta.py
+++ b/swh/indexer/tests/metadata_dictionary/test_codemeta.py
@@ -213,6 +213,7 @@
Author 2
+ 2022-10-26
Author 3
bar@example.org
@@ -229,6 +230,7 @@
{"name": "Author 2"},
{"name": "Author 3", "email": "bar@example.org"},
],
+ "dateCreated": "2022-10-26",
}
@@ -273,13 +275,16 @@
def test_sword_schemaorg_in_codemeta_constrained():
"""Resulting property has the compact URI 'schema:url' instead of just
the term 'url', because term 'url' is defined by the Codemeta schema
- has having type '@id'."""
+ has having type '@id'.
+ Ditto for dates (with type http://schema.org/Date)."""
content = """
My Software
http://example.org/my-software
+ foo
+ 2022-10-26
"""
@@ -288,6 +293,8 @@
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"name": "My Software",
"schema:url": "http://example.org/my-software",
+ "schema:dateCreated": "foo",
+ "schema:dateModified": "2022-10-26",
}
@@ -388,6 +395,6 @@
],
"license": {"name": "GNU General Public License v3.0 or later"},
"name": "The assignment problem",
- "schema:url": "http://example.org/",
+ "url": "http://example.org/",
"name": "The assignment problem",
}