diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -27,7 +27,7 @@ @click.command() @click.argument("mapping_name") @click.argument("file_name") -def main(mapping_name, file_name): +def main(mapping_name: str, file_name: str): from pprint import pprint with open(file_name, "rb") as fd: diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -5,7 +5,7 @@ import json import logging -from typing import List +from typing import List, Union from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values @@ -32,7 +32,7 @@ raise NotImplementedError(f"{self.__class__.__name__}.name") @classmethod - def detect_metadata_files(cls, files): + def detect_metadata_files(cls, files: list): """ Detects files potentially containing metadata @@ -150,7 +150,7 @@ class JsonMapping(DictMapping, SingleFileMapping): """Base class for all mappings that use a JSON file as input.""" - def translate(self, raw_content): + def translate(self, raw_content) -> Union[dict, None]: """ Translates content by parsing content from a bytestring containing json data and translating with the appropriate mapping @@ -167,11 +167,12 @@ raw_content = raw_content.decode() except UnicodeDecodeError: self.log.warning("Error unidecoding from %s", self.log_suffix) - return + return None try: content_dict = json.loads(raw_content) except json.JSONDecodeError: self.log.warning("Error unjsoning from %s", self.log_suffix) - return + return None if isinstance(content_dict, dict): return self._translate_dict(content_dict) + return None diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -1,3 +1,5 @@ +from typing import Dict, List, Optional, Union + import yaml from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI @@ -18,19 +20,23 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content): - raw_content = raw_content.decode() - content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) + def translate(self, raw_content: bytes) -> Dict[str, str]: + raw_content_string: str = raw_content.decode() + content_dict = yaml.load(raw_content_string, Loader=yaml.SafeLoader) metadata = self._translate_dict(content_dict) metadata["@context"] = CODEMETA_CONTEXT_URL return metadata - def normalize_authors(self, d): + def normalize_authors( + self, d: List[dict] + ) -> Optional[Union[List, Dict[str, list]]]: result = [] for author in d: - author_data = {"@type": SCHEMA_URI + "Person"} + author_data: Dict[str, Optional[Union[str, dict]]] = { + "@type": SCHEMA_URI + "Person" + } if "orcid" in author: author_data["@id"] = author["orcid"] if "affiliation" in author: @@ -45,21 +51,21 @@ result.append(author_data) - result = {"@list": result} - return result + result_final = {"@list": result} + return result_final - def normalize_doi(self, s): + def normalize_doi(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://doi.org/" + s} - def normalize_license(self, s): + def normalize_license(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://spdx.org/licenses/" + s} - def normalize_repository_code(self, s): + def normalize_repository_code(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": s} - def normalize_date_released(self, s): + def normalize_date_released(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@value": s, "@type": SCHEMA_URI + "Date"}