diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ /sql/createdb-stamp /sql/filldb-stamp .tox/ -.hypothesis/ \ No newline at end of file +.hypothesis/ +.mypy_cache/ diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ recursive-include sql * recursive-include swh/indexer/sql *.sql recursive-include swh/indexer/data * +recursive-include swh py.typed diff --git a/mypy.ini b/mypy.ini new file mode 100644 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,27 @@ +[mypy] +namespace_packages = True +warn_unused_ignores = True + + +# 3rd party libraries without stubs (yet) + +[mypy-celery.*] +ignore_missing_imports = True + +[mypy-magic.*] +ignore_missing_imports = True + +[mypy-pkg_resources.*] +ignore_missing_imports = True + +[mypy-psycopg2.*] +ignore_missing_imports = True + +[mypy-pyld.*] +ignore_missing_imports = True + +[mypy-pytest.*] +ignore_missing_imports = True + +[mypy-xmltodict.*] +ignore_missing_imports = True diff --git a/swh/__init__.py b/swh/__init__.py --- a/swh/__init__.py +++ b/swh/__init__.py @@ -1 +1,4 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +from pkgutil import extend_path +from typing import Iterable + +__path__ = extend_path(__path__, __name__) # type: Iterable[str] diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py --- a/swh/indexer/cli.py +++ b/swh/indexer/cli.py @@ -238,7 +238,7 @@ @cli.command('rpc-serve') -@click.argument('config-path', required=1) +@click.argument('config-path', required=True) @click.option('--host', default='0.0.0.0', help="Host to run the server") @click.option('--port', default=5007, type=click.INT, help="Binding port of the server") diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py --- a/swh/indexer/fossology_license.py +++ b/swh/indexer/fossology_license.py @@ -5,8 +5,9 @@ import subprocess -from swh.model import hashutil +from typing import Optional +from swh.model import hashutil from .indexer import ContentIndexer, ContentRangeIndexer, write_to_temp @@ -66,7 +67,7 @@ 'write_batch_size': ('int', 1000), } - CONFIG_BASE_FILENAME = 'indexer/fossology_license' + CONFIG_BASE_FILENAME = 'indexer/fossology_license' # type: Optional[str] def prepare(self): super().prepare() diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -9,8 +9,10 @@ import shutil import tempfile import datetime + from copy import deepcopy from contextlib import contextmanager +from typing import Any, Dict, Tuple from swh.scheduler import get_scheduler from swh.scheduler import CONFIG as SWH_CONFIG @@ -124,7 +126,7 @@ }) } - ADDITIONAL_CONFIG = {} + ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] USE_TOOLS = True diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -7,6 +7,8 @@ import json import logging +from typing import List + from swh.indexer.codemeta import SCHEMA_URI from swh.indexer.codemeta import compact @@ -109,7 +111,7 @@ """Base class for mappings that take as input a file that is mostly a key-value store (eg. a shallow JSON dict).""" - string_fields = [] + string_fields = [] # type: List[str] '''List of fields that are simple strings, and don't need any normalization.''' diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py --- a/swh/indexer/mimetype.py +++ b/swh/indexer/mimetype.py @@ -5,6 +5,8 @@ import magic +from typing import Optional + from .indexer import ContentIndexer, ContentRangeIndexer if not hasattr(magic.Magic, 'from_buffer'): @@ -51,7 +53,7 @@ 'write_batch_size': ('int', 1000), } - CONFIG_BASE_FILENAME = 'indexer/mimetype' + CONFIG_BASE_FILENAME = 'indexer/mimetype' # type: Optional[str] def index(self, id, data): """Index sha1s' content and store result. diff --git a/swh/indexer/py.typed b/swh/indexer/py.typed new file mode 100644 --- /dev/null +++ b/swh/indexer/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -7,6 +7,7 @@ import unittest from hypothesis import given, strategies, settings, HealthCheck +from typing import cast from swh.model.hashutil import hash_to_bytes @@ -14,6 +15,9 @@ from swh.indexer.codemeta import merge_documents from swh.indexer.metadata_dictionary import MAPPINGS from swh.indexer.metadata_dictionary.base import merge_values +from swh.indexer.metadata_dictionary.maven import MavenMapping +from swh.indexer.metadata_dictionary.npm import NpmMapping +from swh.indexer.metadata_dictionary.ruby import GemspecMapping from swh.indexer.metadata_detector import ( detect_metadata ) @@ -1075,7 +1079,7 @@ @settings(suppress_health_check=[HealthCheck.too_slow]) @given(json_document_strategy( - keys=list(MAPPINGS['NpmMapping'].mapping))) + keys=list(cast(NpmMapping, MAPPINGS['NpmMapping']).mapping))) def test_npm_adversarial(self, doc): raw = json.dumps(doc).encode() self.npm_mapping.translate(raw) @@ -1088,7 +1092,7 @@ @settings(suppress_health_check=[HealthCheck.too_slow]) @given(xml_document_strategy( - keys=list(MAPPINGS['MavenMapping'].mapping), + keys=list(cast(MavenMapping, MAPPINGS['MavenMapping']).mapping), root='project', xmlns='http://maven.apache.org/POM/4.0.0')) def test_maven_adversarial(self, doc): @@ -1099,7 +1103,8 @@ # keys strategies.one_of( strategies.text(), - *map(strategies.just, MAPPINGS['GemspecMapping'].mapping) + *map(strategies.just, + cast(GemspecMapping, MAPPINGS['GemspecMapping']).mapping) ), # values strategies.recursive( diff --git a/tox.ini b/tox.ini --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=flake8,py3 +envlist=flake8,mypy,py3 [testenv:py3] deps = @@ -39,3 +39,11 @@ flake8 commands = {envpython} -m flake8 + +[testenv:mypy] +skip_install = true +deps = + .[testing] + mypy +commands = + mypy swh