Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/codemeta.py
Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | def extrinsic_metadata_formats(cls) -> Tuple[str, ...]: | ||||||||
"sword-v2-atom-codemeta-v2", | "sword-v2-atom-codemeta-v2", | ||||||||
) | ) | ||||||||
@classmethod | @classmethod | ||||||||
def supported_terms(cls) -> List[str]: | def supported_terms(cls) -> List[str]: | ||||||||
return [term for term in CODEMETA_TERMS if not term.startswith("@")] | return [term for term in CODEMETA_TERMS if not term.startswith("@")] | ||||||||
def xml_to_jsonld(self, e: ET.Element) -> Union[str, Dict[str, Any]]: | def xml_to_jsonld(self, e: ET.Element) -> Union[str, Dict[str, Any]]: | ||||||||
# Keys are JSON-LD property names (URIs or terms). | |||||||||
# Values are either a single string (if key is "type") or list of | |||||||||
# other dicts with the same type recursively. | |||||||||
ardumontUnsubmitted Not Done Inline Actions
ardumont: | |||||||||
Done Inline Actionsoops, missed your comment vlorentz: oops, missed your comment | |||||||||
# To simply annotations, we omit the single string case here. | |||||||||
doc: Dict[str, List[Union[str, Dict[str, Any]]]] = collections.defaultdict(list) | doc: Dict[str, List[Union[str, Dict[str, Any]]]] = collections.defaultdict(list) | ||||||||
for child in e: | for child in e: | ||||||||
m = _TAG_RE.match(child.tag) | m = _TAG_RE.match(child.tag) | ||||||||
assert m, f"Tag with no namespace: {child}" | assert m, f"Tag with no namespace: {child}" | ||||||||
namespace = m.group("namespace") | namespace = m.group("namespace") | ||||||||
localname = m.group("localname") | localname = m.group("localname") | ||||||||
if namespace == ATOM_URI and localname in ("title", "name"): | if namespace == ATOM_URI and localname in ("title", "name"): | ||||||||
# Convert Atom to Codemeta name; in case codemeta:name | # Convert Atom to Codemeta name; in case codemeta:name | ||||||||
# is not provided or different | # is not provided or different | ||||||||
Show All 19 Lines | def xml_to_jsonld(self, e: ET.Element) -> Union[str, Dict[str, Any]]: | ||||||||
) | ) | ||||||||
and isinstance(jsonld_child, str) | and isinstance(jsonld_child, str) | ||||||||
and _DATE_RE.match(jsonld_child) | and _DATE_RE.match(jsonld_child) | ||||||||
): | ): | ||||||||
# Dates missing a leading zero for their day/month, used | # Dates missing a leading zero for their day/month, used | ||||||||
# to be allowed by the deposit; so we need to reformat them | # to be allowed by the deposit; so we need to reformat them | ||||||||
# to be valid ISO8601. | # to be valid ISO8601. | ||||||||
jsonld_child = iso8601.parse_date(jsonld_child).date().isoformat() | jsonld_child = iso8601.parse_date(jsonld_child).date().isoformat() | ||||||||
if localname == "id": | |||||||||
# JSON-LD only allows a single id, and they have to be strings. | |||||||||
if localname in doc or not isinstance(jsonld_child, str): | |||||||||
continue | |||||||||
olasdUnsubmitted Done Inline ActionsShouldn't this spit a warning of some sort? olasd: Shouldn't this spit a warning of some sort? | |||||||||
else: | |||||||||
doc[localname] = jsonld_child # type: ignore[assignment] | |||||||||
else: | |||||||||
doc[localname].append(jsonld_child) | doc[localname].append(jsonld_child) | ||||||||
else: | else: | ||||||||
# Otherwise, we already know the URI | # Otherwise, we already know the URI | ||||||||
doc[f"{namespace}{localname}"].append(self.xml_to_jsonld(child)) | doc[f"{namespace}{localname}"].append(self.xml_to_jsonld(child)) | ||||||||
# The above needed doc values to be list to work; now we allow any type | # The above needed doc values to be list to work; now we allow any type | ||||||||
# of value as key "@value" cannot have a list as value. | # of value as key "@value" cannot have a list as value. | ||||||||
doc_: Dict[str, Any] = doc | doc_: Dict[str, Any] = doc | ||||||||
▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines |