diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,15 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.4.0 + rev: fd9a0c6cfd9f74f38f05161de5e860ba924baf58 hooks: - id: trailing-whitespace - id: flake8 + args: [--ignore=TYP101] - id: check-json - id: check-yaml - repo: https://github.com/codespell-project/codespell - rev: v1.16.0 + rev: 40ba097529c8a37e30184300b814c93bfd2e2a54 hooks: - id: codespell exclude: ^(swh/indexer/data/codemeta/crosswalk.csv)$ diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py --- a/swh/indexer/mimetype.py +++ b/swh/indexer/mimetype.py @@ -1,11 +1,11 @@ -# Copyright (C) 2016-2018 The Software Heritage developers +# Copyright (C) 2016-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import magic -from typing import Optional +from typing import Optional, Dict, Any, List, Tuple, Iterator from .indexer import ContentIndexer, ContentRangeIndexer @@ -15,7 +15,7 @@ 'was imported instead.') -def compute_mimetype_encoding(raw_content): +def compute_mimetype_encoding(raw_content: bytes) -> Dict[str, bytes]: """Determine mimetype and encoding from the raw content. Args: @@ -36,6 +36,8 @@ class MixinMimetypeIndexer: + tool: Dict[str, Any] + idx_storage: Any """Mixin mimetype indexer. See :class:`MimetypeIndexer` and :class:`MimetypeRangeIndexer` @@ -55,7 +57,7 @@ CONFIG_BASE_FILENAME = 'indexer/mimetype' # type: Optional[str] - def index(self, id, data): + def index(self, id: bytes, data: bytes) -> Dict[str, Any]: """Index sha1s' content and store result. Args: @@ -74,10 +76,11 @@ properties.update({ 'id': id, 'indexer_configuration_id': self.tool['id'], - }) + }) return properties - def persist_index_computations(self, results, policy_update): + def persist_index_computations(self, results: List[Dict[str, Any]], + policy_update: List[str]) -> None: """Persist the results in storage. Args: @@ -104,7 +107,8 @@ - stores result in storage """ - def filter(self, ids): + def filter(self, ids: Dict[str, bytes]) -> Iterator[Tuple + [bytes, int]]: """Filter out known sha1s and return only missing ones. """ @@ -128,7 +132,11 @@ - stores result in storage """ - def indexed_contents_in_range(self, start, end): + + def indexed_contents_in_range(self, + start: bytes, + end: bytes + ) -> Dict['str', Optional[bytes]]: """Retrieve indexed content id within range [start, end]. Args: