diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -27,7 +27,7 @@ @click.command() @click.argument("mapping_name") @click.argument("file_name") -def main(mapping_name, file_name): +def main(mapping_name: str, file_name: str): from pprint import pprint with open(file_name, "rb") as fd: diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -5,7 +5,7 @@ import json import logging -from typing import List +from typing import Any, Dict, List, Optional from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values @@ -32,7 +32,7 @@ raise NotImplementedError(f"{self.__class__.__name__}.name") @classmethod - def detect_metadata_files(cls, files): + def detect_metadata_files(cls, files: List[Dict[str, str]]) -> List[str]: """ Detects files potentially containing metadata @@ -44,10 +44,10 @@ """ raise NotImplementedError(f"{cls.__name__}.detect_metadata_files") - def translate(self, file_content): + def translate(self, file_content: bytes) -> Optional[Dict]: raise NotImplementedError(f"{self.__class__.__name__}.translate") - def normalize_translation(self, metadata): + def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: return compact(metadata) @@ -60,9 +60,9 @@ raise NotImplementedError(f"{self.__class__.__name__}.filename") @classmethod - def detect_metadata_files(cls, file_entries): + def detect_metadata_files(cls, file_entries: List[Dict[str, str]]) -> List[str]: for entry in file_entries: - if entry["name"].lower() == cls.filename.lower(): + if entry["name"].lower() == cls.filename: return [entry["sha1"]] return [] @@ -81,7 +81,7 @@ raise NotImplementedError(f"{self.__class__.__name__}.mapping") @staticmethod - def _normalize_method_name(name): + def _normalize_method_name(name: str) -> str: return name.replace("-", "_") @classmethod @@ -94,7 +94,9 @@ or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) } - def _translate_dict(self, content_dict, *, normalize=True): + def _translate_dict( + self, content_dict: Dict, *, normalize: bool = True + ) -> Dict[str, str]: """ Translates content by parsing content from a dict object and translating with the appropriate mapping @@ -150,7 +152,7 @@ class JsonMapping(DictMapping, SingleFileMapping): """Base class for all mappings that use a JSON file as input.""" - def translate(self, raw_content): + def translate(self, raw_content: bytes) -> Optional[Dict]: """ Translates content by parsing content from a bytestring containing json data and translating with the appropriate mapping @@ -164,14 +166,15 @@ """ try: - raw_content = raw_content.decode() + raw_content_string: str = raw_content.decode() except UnicodeDecodeError: self.log.warning("Error unidecoding from %s", self.log_suffix) - return + return None try: - content_dict = json.loads(raw_content) + content_dict = json.loads(raw_content_string) except json.JSONDecodeError: self.log.warning("Error unjsoning from %s", self.log_suffix) - return + return None if isinstance(content_dict, dict): return self._translate_dict(content_dict) + return None diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -1,3 +1,5 @@ +from typing import Dict, List, Optional, Union + import yaml from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI @@ -18,19 +20,21 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content): - raw_content = raw_content.decode() - content_dict = yaml.load(raw_content, Loader=yaml.SafeLoader) + def translate(self, raw_content: bytes) -> Dict[str, str]: + raw_content_string: str = raw_content.decode() + content_dict = yaml.load(raw_content_string, Loader=yaml.SafeLoader) metadata = self._translate_dict(content_dict) metadata["@context"] = CODEMETA_CONTEXT_URL return metadata - def normalize_authors(self, d): + def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = [] for author in d: - author_data = {"@type": SCHEMA_URI + "Person"} + author_data: Dict[str, Optional[Union[str, Dict]]] = { + "@type": SCHEMA_URI + "Person" + } if "orcid" in author: author_data["@id"] = author["orcid"] if "affiliation" in author: @@ -45,21 +49,21 @@ result.append(author_data) - result = {"@list": result} - return result + result_final = {"@list": result} + return result_final - def normalize_doi(self, s): + def normalize_doi(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://doi.org/" + s} - def normalize_license(self, s): + def normalize_license(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": "https://spdx.org/licenses/" + s} - def normalize_repository_code(self, s): + def normalize_repository_code(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@id": s} - def normalize_date_released(self, s): + def normalize_date_released(self, s: str) -> Dict[str, str]: if isinstance(s, str): return {"@value": s, "@type": SCHEMA_URI + "Date"} diff --git a/swh/indexer/metadata_dictionary/codemeta.py b/swh/indexer/metadata_dictionary/codemeta.py --- a/swh/indexer/metadata_dictionary/codemeta.py +++ b/swh/indexer/metadata_dictionary/codemeta.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import json +from typing import Any, Dict, List, Optional from swh.indexer.codemeta import CODEMETA_TERMS, expand @@ -20,10 +21,10 @@ string_fields = None @classmethod - def supported_terms(cls): + def supported_terms(cls) -> List[str]: return [term for term in CODEMETA_TERMS if not term.startswith("@")] - def translate(self, content): + def translate(self, content: bytes) -> Optional[Dict[str, Any]]: try: return self.normalize_translation(expand(json.loads(content.decode()))) except Exception: diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py --- a/swh/indexer/metadata_dictionary/maven.py +++ b/swh/indexer/metadata_dictionary/maven.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import os +from typing import Any, Dict, Optional import xml.parsers.expat import xmltodict @@ -23,7 +24,7 @@ mapping = CROSSWALK_TABLE["Java (Maven)"] string_fields = ["name", "version", "description", "email"] - def translate(self, content): + def translate(self, content: bytes) -> Optional[Dict[str, Any]]: try: d = xmltodict.parse(content).get("project") or {} except xml.parsers.expat.ExpatError: