diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -27,7 +27,7 @@ @click.command() @click.argument("mapping_name") @click.argument("file_name") -def main(mapping_name, file_name): +def main(mapping_name: str, file_name: str): from pprint import pprint with open(file_name, "rb") as fd: diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -5,7 +5,7 @@ import json import logging -from typing import List +from typing import Any, Dict, List, Optional, Union from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values @@ -32,7 +32,7 @@ raise NotImplementedError(f"{self.__class__.__name__}.name") @classmethod - def detect_metadata_files(cls, files): + def detect_metadata_files(cls, files: list): """ Detects files potentially containing metadata @@ -60,9 +60,9 @@ raise NotImplementedError(f"{self.__class__.__name__}.filename") @classmethod - def detect_metadata_files(cls, file_entries): + def detect_metadata_files(cls, file_entries: List[Dict[str, str]]) -> List[Any]: for entry in file_entries: - if entry["name"].lower() == cls.filename.lower(): + if entry["name"].lower() == cls.filename: return [entry["sha1"]] return [] @@ -94,7 +94,9 @@ or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) } - def _translate_dict(self, content_dict, *, normalize=True): + def _translate_dict( + self, content_dict: Dict, *, normalize: bool = True + ) -> Dict[str, str]: """ Translates content by parsing content from a dict object and translating with the appropriate mapping @@ -150,7 +152,7 @@ class JsonMapping(DictMapping, SingleFileMapping): """Base class for all mappings that use a JSON file as input.""" - def translate(self, raw_content): + def translate(self, raw_content: bytes) -> Optional[Union[Dict, None]]: """ Translates content by parsing content from a bytestring containing json data and translating with the appropriate mapping @@ -164,14 +166,15 @@ """ try: - raw_content = raw_content.decode() + raw_content_string: str = raw_content.decode() except UnicodeDecodeError: self.log.warning("Error unidecoding from %s", self.log_suffix) - return + return None try: - content_dict = json.loads(raw_content) + content_dict = json.loads(raw_content_string) except json.JSONDecodeError: self.log.warning("Error unjsoning from %s", self.log_suffix) - return + return None if isinstance(content_dict, dict): return self._translate_dict(content_dict) + return None diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -1,3 +1,5 @@ +from typing import Dict, List, Optional, Union + import yaml from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI @@ -18,19 +20,23 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content): - raw_content = raw_content.decode() - content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) + def translate(self, raw_content: bytes) -> Dict[str, str]: + raw_content_string: str = raw_content.decode() + content_dict = yaml.load(raw_content_string, Loader=yaml.SafeLoader) metadata = self._translate_dict(content_dict) metadata["@context"] = CODEMETA_CONTEXT_URL return metadata - def normalize_authors(self, d): + def normalize_authors( + self, d: List[dict] + ) -> Optional[Union[List, Dict[str, list]]]: result = [] for author in d: - author_data = {"@type": SCHEMA_URI + "Person"} + author_data: Dict[str, Optional[Union[str, Dict]]] = { + "@type": SCHEMA_URI + "Person" + } if "orcid" in author: author_data["@id"] = author["orcid"] if "affiliation" in author: @@ -45,21 +51,21 @@ result.append(author_data) - result = {"@list": result} - return result + result_final = {"@list": result} + return result_final - def normalize_doi(self, s): + def normalize_doi(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://doi.org/" + s} - def normalize_license(self, s): + def normalize_license(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://spdx.org/licenses/" + s} - def normalize_repository_code(self, s): + def normalize_repository_code(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": s} - def normalize_date_released(self, s): + def normalize_date_released(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@value": s, "@type": SCHEMA_URI + "Date"}