diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -2,7 +2,7 @@ import click -from . import codemeta, maven, npm, python, ruby +from . import cff, codemeta, maven, npm, python, ruby MAPPINGS = { "CodemetaMapping": codemeta.CodemetaMapping, @@ -10,6 +10,7 @@ "NpmMapping": npm.NpmMapping, "PythonPkginfoMapping": python.PythonPkginfoMapping, "GemspecMapping": ruby.GemspecMapping, + "CffMapping": cff.CffMapping, } diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/cff.py @@ -0,0 +1,65 @@ +import yaml + +from swh.indexer.codemeta import CROSSWALK_TABLE + +from .base import DictMapping, SingleFileMapping + +yaml.SafeLoader.yaml_implicit_resolvers = { + k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] + for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() +} + + +class CffMapping(DictMapping, SingleFileMapping): + """Dedicated class for Citation (CITATION.cff) mapping and translation""" + + name = "cff" + filename = b"CITATION.cff" + mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] + string_fields = [ + "keywords", + "license", + "abstract", + "version", + "doi", + ] + + def translate(self, raw_content): + raw_content = raw_content.decode() + content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) + metadata = self._translate_dict(content_dict) + + metadata["@context"] = "https://doi.org/10.5063/schema/codemeta-2.0" + authors = content_dict.pop("authors") + metadata["author"] = [] + + for author in authors: + author_data = {} + if "orcid" in author: + author_data["@id"] = author["orcid"] + author_data["@type"] = "Person" + + if "affiliation" in author: + author_data["affiliation"] = { + "@type": "Organization", + "name": author["affiliation"], + } + if "family-names" in author: + author_data["familyName"] = author["family-names"] + if "given-names" in author: + author_data["givenName"] = author["given-names"] + + metadata["author"].append(author_data) + + metadata["codeRepository"] = content_dict["repository-code"] + metadata["datePublished"] = content_dict["date-released"] + + return metadata + + def normalize_license(self, s): + if isinstance(s, str): + return {"@id": "https://spdx.org/licenses/" + s} + + def normalize_doi(self, s): + if isinstance(s, str): + return {"@id": "https://doi.org/" + s} diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -38,7 +38,7 @@ INDEXER_CFG_KEY = "indexer_storage" -MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"] +MAPPING_NAMES = ["cff", "codemeta", "gemspec", "maven", "npm", "pkg-info"] SERVER_IMPLEMENTATIONS: Dict[str, str] = { diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -1583,6 +1583,7 @@ result = storage.origin_intrinsic_metadata_stats() assert result == { "per_mapping": { + "cff": 0, "gemspec": 1, "npm": 2, "pkg-info": 1, diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -86,7 +86,15 @@ catch_exceptions=False, ) expected_output = "\n".join( - ["codemeta", "gemspec", "maven", "npm", "pkg-info", "",] + [ + "cff", + "codemeta", + "gemspec", + "maven", + "npm", + "pkg-info", + "", + ] # must be sorted for test to pass ) assert result.exit_code == 0, result.output assert result.output == expected_output diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -66,6 +66,7 @@ self.maven_mapping = MAPPINGS["MavenMapping"]() self.pkginfo_mapping = MAPPINGS["PythonPkginfoMapping"]() self.gemspec_mapping = MAPPINGS["GemspecMapping"]() + self.cff_mapping = MAPPINGS["CffMapping"]() def test_compute_metadata_none(self): """ @@ -82,6 +83,85 @@ # then self.assertEqual(declared_metadata, result) + def test_compute_metadata_cff(self): + """ + testing CITATION.cff tranlsation + """ + # given + content = """# YAML 1.2 +--- +abstract: "Command line program to convert from Citation File \ +Format to various other formats such as BibTeX, EndNote, RIS, \ +schema.org, CodeMeta, and .zenodo.json." +authors: + - + affiliation: "Netherlands eScience Center" + family-names: Klaver + given-names: Tom + - + affiliation: "Humboldt-Universität zu Berlin" + family-names: Druskat + given-names: Stephan + orcid: https://orcid.org/0000-0003-4925-7248 +cff-version: "1.0.3" +date-released: 2019-11-12 +doi: 10.5281/zenodo.1162057 +keywords: + - "citation" + - "bibliography" + - "cff" + - "CITATION.cff" +license: Apache-2.0 +message: "If you use this software, please cite it using these metadata." +repository-code: "https://github.com/citation-file-format/cff-converter-python" +title: cffconvert +version: "1.4.0-alpha0" + """.encode( + "utf-8" + ) + + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [ + { + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Netherlands eScience Center", + }, + "familyName": "Klaver", + "givenName": "Tom", + }, + { + "@id": "https://orcid.org/0000-0003-4925-7248", + "@type": "Person", + "affiliation": { + "@type": "Organization", + "name": "Humboldt-Universit\u00e4t zu Berlin", + }, + "familyName": "Druskat", + "givenName": "Stephan", + }, + ], + "codeRepository": ( + "https://github.com/" "citation-file-format/cff-converter-python" + ), + "datePublished": "2019-11-12", + "description": """Command line program to convert from \ +Citation File Format to various other formats such as BibTeX, EndNote, \ +RIS, schema.org, CodeMeta, and .zenodo.json.""", + "identifier": "https://doi.org/10.5281/zenodo.1162057", + "keywords": ["citation", "bibliography", "cff", "CITATION.cff"], + "license": "https://spdx.org/licenses/Apache-2.0", + "version": "1.4.0-alpha0", + } + + # when + result = self.cff_mapping.translate(content) + # then + self.assertEqual(expected, result) + def test_compute_metadata_npm(self): """ testing only computation of metadata with hard_mapping_npm