diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -2,7 +2,7 @@ import click -from . import codemeta, maven, npm, python, ruby +from . import citation, codemeta, maven, npm, python, ruby MAPPINGS = { "CodemetaMapping": codemeta.CodemetaMapping, @@ -10,6 +10,7 @@ "NpmMapping": npm.NpmMapping, "PythonPkginfoMapping": python.PythonPkginfoMapping, "GemspecMapping": ruby.GemspecMapping, + "CitationMapping": citation.CitationMapping, } diff --git a/swh/indexer/metadata_dictionary/citation.py b/swh/indexer/metadata_dictionary/citation.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/citation.py @@ -0,0 +1,56 @@ +import yaml + +from swh.indexer.codemeta import CROSSWALK_TABLE + +from .base import DictMapping, SingleFileMapping + +yaml.SafeLoader.yaml_implicit_resolvers = { + k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] + for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() +} + + +class CitationMapping(DictMapping, SingleFileMapping): + """Dedicated class for Citation (CITATION.cff) mapping and translation""" + + name = "citation" + filename = b"CITATION.cff" + mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] + string_fields = [ + "date-released", + "keywords", + "license", + "abstract", + "version", + "doi", + "repository-code", + "authors", + ] + + def translate(self, raw_content): + raw_content = raw_content.decode() # bytes to str + content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) # str to dict + metadata = self._translate_dict(content_dict) # convert to CodeMeta + + for author in content_dict["authors"]: + metadata["schema:author"].append( + { + "@id": author.get("orcid", None), + "@type": "Person", + "affiliation": { + "@type": "Organization", + "legalName": author.get("affiliation", None), + }, + "familyName": author.get("family-names", None), + "givenName": author.get("given-names", None), + } + ) + return metadata + + def normalize_license(self, s): + if isinstance(s, str): + return {"@id": "https://spdx.org/licenses/" + s} + + def normalize_doi(self, s): + if isinstance(s, str): + return {"@id": "https://doi.org/" + s} diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -86,7 +86,15 @@ catch_exceptions=False, ) expected_output = "\n".join( - ["codemeta", "gemspec", "maven", "npm", "pkg-info", "",] + [ + "citation", + "codemeta", + "gemspec", + "maven", + "npm", + "pkg-info", + "", + ] # must be sorted for test to pass ) assert result.exit_code == 0, result.output assert result.output == expected_output