diff --git a/swh/indexer/metadata_dictionary/codemeta.py b/swh/indexer/metadata_dictionary/codemeta.py --- a/swh/indexer/metadata_dictionary/codemeta.py +++ b/swh/indexer/metadata_dictionary/codemeta.py @@ -6,7 +6,7 @@ import collections import json import re -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import xml.etree.ElementTree as ET import xmltodict @@ -61,8 +61,8 @@ def supported_terms(cls) -> List[str]: return [term for term in CODEMETA_TERMS if not term.startswith("@")] - def xml_to_jsonld(self, e: ET.Element) -> Dict[str, Any]: - doc: Dict[str, List[Dict[str, Any]]] = collections.defaultdict(list) + def xml_to_jsonld(self, e: ET.Element) -> Union[str, Dict[str, Any]]: + doc: Dict[str, List[Union[str, Dict[str, Any]]]] = collections.defaultdict(list) for child in e: m = _TAG_RE.match(child.tag) assert m, f"Tag with no namespace: {child}" @@ -84,12 +84,6 @@ # expansion will convert it to a full URI based on # "@context": CODEMETA_CONTEXT_URL jsonld_child = self.xml_to_jsonld(child) - if localname == "type" and isinstance(jsonld_child, dict): - # With a codemeta context, this is later translated to a JSON-LD - # @type, which must be either an array of strings or a string. - if set(jsonld_child) != {"@value"}: - raise ValueError(f'Unexpected value for "type": {jsonld_child}') - jsonld_child = jsonld_child["@value"] doc[localname].append(jsonld_child) else: # Otherwise, we already know the URI @@ -102,7 +96,7 @@ text = e.text.strip() if e.text else None if text: # TODO: check doc is empty, and raise mixed-content error otherwise? - doc_["@value"] = text + return text return doc_ @@ -113,6 +107,8 @@ # Transform to JSON-LD document doc = self.xml_to_jsonld(root) + assert isinstance(doc, dict), f"Root object is not a dict: {doc}" + # Add @context to JSON-LD expansion replaces the "codemeta:" prefix # hash (which uses the context URL as namespace URI for historical # reasons) into properties in `http://schema.org/` and diff --git a/swh/indexer/tests/metadata_dictionary/test_codemeta.py b/swh/indexer/tests/metadata_dictionary/test_codemeta.py --- a/swh/indexer/tests/metadata_dictionary/test_codemeta.py +++ b/swh/indexer/tests/metadata_dictionary/test_codemeta.py @@ -213,6 +213,7 @@ Author 2 + 2022-10-26 Author 3 bar@example.org @@ -229,6 +230,7 @@ {"name": "Author 2"}, {"name": "Author 3", "email": "bar@example.org"}, ], + "dateCreated": "2022-10-26", } @@ -388,6 +390,6 @@ ], "license": {"name": "GNU General Public License v3.0 or later"}, "name": "The assignment problem", - "schema:url": "http://example.org/", + "url": "http://example.org/", "name": "The assignment problem", }