diff --git a/docs/metadata-workflow.rst b/docs/metadata-workflow.rst --- a/docs/metadata-workflow.rst +++ b/docs/metadata-workflow.rst @@ -166,7 +166,7 @@ (other than the `codemeta` mapping, which is the identity function, and therefore supports all properties): -.. program-output:: python3 -m swh.indexer.cli mapping list-terms --exclude-mapping codemeta --exclude-mapping sword-codemeta +.. program-output:: python3 -m swh.indexer.cli mapping list-terms --exclude-mapping codemeta --exclude-mapping json-sword-codemeta --exclude-mapping sword-codemeta :nostderr: diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -25,6 +25,7 @@ EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = { "GitHubMapping": github.GitHubMapping, + "JsonSwordCodemetaMapping": codemeta.JsonSwordCodemetaMapping, "SwordCodemetaMapping": codemeta.SwordCodemetaMapping, } diff --git a/swh/indexer/metadata_dictionary/codemeta.py b/swh/indexer/metadata_dictionary/codemeta.py --- a/swh/indexer/metadata_dictionary/codemeta.py +++ b/swh/indexer/metadata_dictionary/codemeta.py @@ -9,6 +9,8 @@ from typing import Any, Dict, List, Optional, Tuple import xml.etree.ElementTree as ET +import xmltodict + from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CODEMETA_TERMS, compact, expand from .base import BaseExtrinsicMapping, SingleFileIntrinsicMapping @@ -115,3 +117,33 @@ def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: return compact(metadata, forgefed=False) + + +class JsonSwordCodemetaMapping(SwordCodemetaMapping): + """ + Variant of :class:`SwordCodemetaMapping` that reads the legacy + ``sword-v2-atom-codemeta-v2-in-json`` format and converts it back to + ``sword-v2-atom-codemeta-v2`` XML + """ + + name = "json-sword-codemeta" + + @classmethod + def extrinsic_metadata_formats(cls) -> Tuple[str, ...]: + return ("sword-v2-atom-codemeta-v2-in-json",) + + def translate(self, content: bytes) -> Optional[Dict[str, Any]]: + # ``content`` was generated by calling ``xmltodict.parse()`` on a XML document, + # so ``xmltodict.unparse()`` is guaranteed to return a document that is + # semantically equivalent to the original and pass it to SwordCodemetaMapping. + json_doc = json.loads(content) + + if json_doc.get("@xmlns") != ATOM_URI: + # Technically, non-default XMLNS were allowed, but it does not seem like + # anyone used them, so they do not need to be implemented here. + raise NotImplementedError(f"Unexpected XMLNS set: {json_doc}") + + # Root tag was stripped by swh-deposit + json_doc = {"entry": json_doc} + + return super().translate(xmltodict.unparse(json_doc)) diff --git a/swh/indexer/tests/metadata_dictionary/test_codemeta.py b/swh/indexer/tests/metadata_dictionary/test_codemeta.py --- a/swh/indexer/tests/metadata_dictionary/test_codemeta.py +++ b/swh/indexer/tests/metadata_dictionary/test_codemeta.py @@ -349,3 +349,19 @@ "Codemeta Name 2", ], } + + +def test_json_sword(): + content = """{"id": "hal-01243573", "@xmlns": "http://www.w3.org/2005/Atom", "author": {"name": "Author 1", "email": "foo@example.org"}, "client": "hal", "codemeta:url": "http://example.org/", "codemeta:name": "The assignment problem", "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", "codemeta:author": {"codemeta:name": "Author 2"}, "codemeta:license": {"codemeta:name": "GNU General Public License v3.0 or later"}}""" # noqa + result = MAPPINGS["JsonSwordCodemetaMapping"]().translate(content) + assert result == { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "author": [ + {"name": "Author 1", "email": "foo@example.org"}, + {"name": "Author 2"}, + ], + "license": {"name": "GNU General Public License v3.0 or later"}, + "name": "The assignment problem", + "schema:url": "http://example.org/", + "name": "The assignment problem", + } diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -111,6 +111,7 @@ "composer", "gemspec", "github", + "json-sword-codemeta", "maven", "npm", "nuget", @@ -150,6 +151,8 @@ "--exclude-mapping", "codemeta", "--exclude-mapping", + "json-sword-codemeta", + "--exclude-mapping", "sword-codemeta", ], catch_exceptions=False,