Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/cff.py
Show All 16 Lines | |||||
class CffMapping(DictMapping, SingleFileMapping): | class CffMapping(DictMapping, SingleFileMapping): | ||||
"""Dedicated class for Citation (CITATION.cff) mapping and translation""" | """Dedicated class for Citation (CITATION.cff) mapping and translation""" | ||||
name = "cff" | name = "cff" | ||||
filename = b"CITATION.cff" | filename = b"CITATION.cff" | ||||
mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] | mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] | ||||
string_fields = ["keywords", "license", "abstract", "version", "doi"] | string_fields = ["keywords", "license", "abstract", "version", "doi"] | ||||
def translate(self, raw_content: bytes) -> Dict[str, str]: | def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: | ||||
raw_content_string: str = raw_content.decode() | raw_content_string: str = raw_content.decode() | ||||
try: | |||||
content_dict = yaml.load(raw_content_string, Loader=SafeLoader) | content_dict = yaml.load(raw_content_string, Loader=SafeLoader) | ||||
metadata = self._translate_dict(content_dict) | except yaml.scanner.ScannerError: | ||||
ardumont: Add a warning log to explicit that the yaml is invalid and that we skip it. | |||||
Done Inline ActionsI disagree, there is nothing we can do about them, so no point in logging. We also do not warn about syntax errors in JSON files. vlorentz: I disagree, there is nothing we can do about them, so no point in logging. We also do not warn… | |||||
Done Inline Actionsto have a tendency or a rough stats of how often that happens, isn't it worth it? ardumont: to have a tendency or a rough stats of how often that happens, isn't it worth it?
Either way… | |||||
return None | |||||
if isinstance(content_dict, dict): | |||||
metadata = self._translate_dict(content_dict) | |||||
metadata["@context"] = CODEMETA_CONTEXT_URL | metadata["@context"] = CODEMETA_CONTEXT_URL | ||||
return metadata | return metadata | ||||
return None | |||||
def normalize_authors(self, d: List[dict]) -> Dict[str, list]: | def normalize_authors(self, d: List[dict]) -> Dict[str, list]: | ||||
Done Inline Actionswell, with some more info though ;) ardumont: well, with some more info though ;)
But like i said, i won't push too much. | |||||
result = [] | result = [] | ||||
for author in d: | for author in d: | ||||
author_data: Dict[str, Optional[Union[str, Dict]]] = { | author_data: Dict[str, Optional[Union[str, Dict]]] = { | ||||
"@type": SCHEMA_URI + "Person" | "@type": SCHEMA_URI + "Person" | ||||
} | } | ||||
if "orcid" in author: | if "orcid" in author: | ||||
author_data["@id"] = author["orcid"] | author_data["@id"] = author["orcid"] | ||||
if "affiliation" in author: | if "affiliation" in author: | ||||
Show All 29 Lines |
Add a warning log to explicit that the yaml is invalid and that we skip it.