Changeset View
Standalone View
swh/indexer/metadata_dictionary/cff.py
- This file was added.
import yaml | |||||
from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI | |||||
from .base import DictMapping, SingleFileMapping | |||||
yaml.SafeLoader.yaml_implicit_resolvers = { | |||||
k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] | |||||
for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() | |||||
} | |||||
class CffMapping(DictMapping, SingleFileMapping): | |||||
"""Dedicated class for Citation (CITATION.cff) mapping and translation""" | |||||
name = "cff" | |||||
filename = b"CITATION.cff" | |||||
mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] | |||||
string_fields = ["keywords", "license", "abstract", "version", "doi"] | |||||
def translate(self, raw_content): | |||||
raw_content = raw_content.decode() | |||||
content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) | |||||
metadata = self._translate_dict(content_dict) | |||||
metadata["@context"] = CODEMETA_CONTEXT_URL | |||||
return metadata | |||||
def normalize_authors(self, d): | |||||
result = [] | |||||
for author in d: | |||||
author_data = {"@type": SCHEMA_URI + "Person"} | |||||
if "orcid" in author: | |||||
author_data["@id"] = author["orcid"] | |||||
if "affiliation" in author: | |||||
author_data[SCHEMA_URI + "affiliation"] = { | |||||
"@type": SCHEMA_URI + "Organization", | |||||
SCHEMA_URI + "name": author["affiliation"], | |||||
} | |||||
if "family-names" in author: | |||||
author_data[SCHEMA_URI + "familyName"] = author["family-names"] | |||||
if "given-names" in author: | |||||
author_data[SCHEMA_URI + "givenName"] = author["given-names"] | |||||
result.append(author_data) | |||||
result = {"@list": result} | |||||
return result | |||||
def normalize_doi(self, s): | |||||
if isinstance(s, str): | |||||
vlorentz: could you move this part to a method? (`normalize_authors` iirc) | |||||
Done Inline ActionsI tried but something went wrong. I went in debugging mode to track the flow and found that the normalize_translation function made the output to be : "author": [ { "type": "Person" }, { "id": "https://orcid.org/0000-0003-4925-7248", "type": "Person" } ], But the playground gives the correct results in compact form. Have a look at it here KShivendu: I tried but something went wrong. I went in debugging mode to track the flow and found that the… | |||||
Done Inline ActionsIt seems to works in npm.py. Try to find why it works for NPM but not CFF. vlorentz: It seems to works in `npm.py`. Try to find why it works for NPM but not CFF. | |||||
return {"@id": "https://doi.org/" + s} | |||||
def normalize_license(self, s): | |||||
Done Inline Actionsisn't this redundant with the crosswalk? vlorentz: isn't this redundant with the crosswalk? | |||||
Done Inline ActionsYes, it is. But when I put these two fields in string_fields the output is 'schema:codeRepository' : '....', 'schema:datePublished' : '....' not 'codeRepository' : '....', 'datePublished' : '....' But this doesn't happen with the other fields. Why is that? KShivendu: Yes, it is. But when I put these two fields in `string_fields` the output is
```
'schema… | |||||
Done Inline ActionsIt's a bug :) I'm working on it vlorentz: It's a bug :)
I'm working on it | |||||
Done Inline ActionsActually it isn't. (Well, it's an arguable design decision in Codemeta, but that's how it is). These fields are defined as: "datePublished": {"@id": "schema:datePublished", "@type": "schema:Date" }, and "codeRepository": { "@id": "schema:codeRepository", "@type": "@id"}, which means "the codemeta term is like the schema.org term, but with an extra constraint on the type". But the JSON-LD document you create does not match these type definitions. So when compacting, it can't be converted to the codemeta term, because of the constraint. So the fix should be to make them satisfy the type constraint. (Hint: the license field is defined the same way) vlorentz: Actually it isn't. (Well, it's an arguable design decision in Codemeta, but that's how it is). | |||||
if isinstance(s, str): | |||||
return {"@id": "https://spdx.org/licenses/" + s} | |||||
def normalize_repository_code(self, s): | |||||
if isinstance(s, str): | |||||
return {"@id": s} | |||||
def normalize_date_released(self, s): | |||||
if isinstance(s, str): | |||||
return {"@value": s, "@type": SCHEMA_URI + "Date"} |
could you move this part to a method? (normalize_authors iirc)