diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ /sql/createdb-stamp /sql/filldb-stamp .tox/ -.hypothesis/ \ No newline at end of file +.hypothesis/ +.mypy_cache/ diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ recursive-include sql * recursive-include swh/indexer/sql *.sql recursive-include swh/indexer/data * +recursive-include swh py.typed diff --git a/mypy.ini b/mypy.ini new file mode 100644 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,27 @@ +[mypy] +namespace_packages = True +warn_unused_ignores = True + + +# 3rd party libraries without stubs (yet) + +[mypy-celery.*] +ignore_missing_imports = True + +[mypy-magic.*] +ignore_missing_imports = True + +[mypy-pkg_resources.*] +ignore_missing_imports = True + +[mypy-psycopg2.*] +ignore_missing_imports = True + +[mypy-pyld.*] +ignore_missing_imports = True + +[mypy-pytest.*] +ignore_missing_imports = True + +[mypy-xmltodict.*] +ignore_missing_imports = True diff --git a/swh/__init__.py b/swh/__init__.py --- a/swh/__init__.py +++ b/swh/__init__.py @@ -1 +1,4 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +from pkgutil import extend_path +from typing import Iterable + +__path__ = extend_path(__path__, __name__) # type: Iterable[str] diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py --- a/swh/indexer/cli.py +++ b/swh/indexer/cli.py @@ -238,7 +238,7 @@ @cli.command('rpc-serve') -@click.argument('config-path', required=1) +@click.argument('config-path', required=True) @click.option('--host', default='0.0.0.0', help="Host to run the server") @click.option('--port', default=5007, type=click.INT, help="Binding port of the server") diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py --- a/swh/indexer/fossology_license.py +++ b/swh/indexer/fossology_license.py @@ -5,8 +5,9 @@ import subprocess -from swh.model import hashutil +from typing import Optional +from swh.model import hashutil from .indexer import ContentIndexer, ContentRangeIndexer, write_to_temp @@ -66,7 +67,7 @@ 'write_batch_size': ('int', 1000), } - CONFIG_BASE_FILENAME = 'indexer/fossology_license' + CONFIG_BASE_FILENAME = 'indexer/fossology_license' # type: Optional[str] def prepare(self): super().prepare() diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -9,8 +9,10 @@ import shutil import tempfile import datetime + from copy import deepcopy from contextlib import contextmanager +from typing import Any, Dict, Tuple from swh.scheduler import get_scheduler from swh.scheduler import CONFIG as SWH_CONFIG @@ -124,7 +126,7 @@ }) } - ADDITIONAL_CONFIG = {} + ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] USE_TOOLS = True diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -7,6 +7,8 @@ import json import logging +from typing import List + from swh.indexer.codemeta import SCHEMA_URI from swh.indexer.codemeta import compact @@ -109,7 +111,7 @@ """Base class for mappings that take as input a file that is mostly a key-value store (eg. a shallow JSON dict).""" - string_fields = [] + string_fields = [] # type: List[str] '''List of fields that are simple strings, and don't need any normalization.''' diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py --- a/swh/indexer/mimetype.py +++ b/swh/indexer/mimetype.py @@ -5,6 +5,8 @@ import magic +from typing import Optional + from .indexer import ContentIndexer, ContentRangeIndexer if not hasattr(magic.Magic, 'from_buffer'): @@ -51,7 +53,7 @@ 'write_batch_size': ('int', 1000), } - CONFIG_BASE_FILENAME = 'indexer/mimetype' + CONFIG_BASE_FILENAME = 'indexer/mimetype' # type: Optional[str] def index(self, id, data): """Index sha1s' content and store result. diff --git a/swh/indexer/py.typed b/swh/indexer/py.typed new file mode 100644 --- /dev/null +++ b/swh/indexer/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py --- a/swh/indexer/storage/db.py +++ b/swh/indexer/storage/db.py @@ -388,7 +388,6 @@ "ORDER BY ts_rank(oim.metadata_tsvector, tsq, 1) DESC " "LIMIT %s;" ).format(keys=', '.join(keys), - regconfig=regconfig, tsquery_template=tsquery_template) cur.execute(query, tsquery_args + [limit]) yield from cur diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py --- a/swh/indexer/tests/conftest.py +++ b/swh/indexer/tests/conftest.py @@ -63,7 +63,7 @@ yield objstorage -@pytest.fixture(scope='session') +@pytest.fixture(scope='session') # type: ignore # expected redefinition def celery_includes(): return [ 'swh.indexer.tests.tasks', diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -426,7 +426,7 @@ """Base class for Indexer Storage testing. """ - def setUp(self): + def setUp(self, *args, **kwargs): super().setUp() self.storage = get_indexer_storage(**self.storage_config) tools = self.storage.indexer_configuration_add(TOOLS) diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -4,10 +4,11 @@ # See top-level LICENSE file for more information import unittest -from unittest.mock import patch - import pytest +from unittest.mock import patch +from typing import Any, Dict + from swh.indexer import fossology_license from swh.indexer.fossology_license import ( FossologyLicenseIndexer, FossologyLicenseRangeIndexer, @@ -66,7 +67,7 @@ 'command_line': 'nomossa ', }, }, -} +} # type: Dict[str, Any] RANGE_CONFIG = dict(list(CONFIG.items()) + [('write_batch_size', 100)]) diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -7,6 +7,7 @@ import unittest from hypothesis import given, strategies, settings, HealthCheck +from typing import cast from swh.model.hashutil import hash_to_bytes @@ -14,6 +15,9 @@ from swh.indexer.codemeta import merge_documents from swh.indexer.metadata_dictionary import MAPPINGS from swh.indexer.metadata_dictionary.base import merge_values +from swh.indexer.metadata_dictionary.maven import MavenMapping +from swh.indexer.metadata_dictionary.npm import NpmMapping +from swh.indexer.metadata_dictionary.ruby import GemspecMapping from swh.indexer.metadata_detector import ( detect_metadata ) @@ -1075,7 +1079,7 @@ @settings(suppress_health_check=[HealthCheck.too_slow]) @given(json_document_strategy( - keys=list(MAPPINGS['NpmMapping'].mapping))) + keys=list(cast(NpmMapping, MAPPINGS['NpmMapping']).mapping))) def test_npm_adversarial(self, doc): raw = json.dumps(doc).encode() self.npm_mapping.translate(raw) @@ -1088,7 +1092,7 @@ @settings(suppress_health_check=[HealthCheck.too_slow]) @given(xml_document_strategy( - keys=list(MAPPINGS['MavenMapping'].mapping), + keys=list(cast(MavenMapping, MAPPINGS['MavenMapping']).mapping), root='project', xmlns='http://maven.apache.org/POM/4.0.0')) def test_maven_adversarial(self, doc): @@ -1099,7 +1103,8 @@ # keys strategies.one_of( strategies.text(), - *map(strategies.just, MAPPINGS['GemspecMapping'].mapping) + *map(strategies.just, + cast(GemspecMapping, MAPPINGS['GemspecMapping']).mapping) ), # values strategies.recursive( diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -6,6 +6,8 @@ import pytest import unittest +from typing import Any, Dict + from swh.indexer.mimetype import ( MimetypeIndexer, MimetypeRangeIndexer, compute_mimetype_encoding ) @@ -40,7 +42,7 @@ "debian-package": "python3-magic" }, }, -} +} # type: Dict[str, Any] class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -397,10 +397,16 @@ json_dict_keys = strategies.one_of( strategies.characters(), - *map(strategies.just, ['type', 'url', 'name', 'email', '@id', - '@context', 'repository', 'license', - 'repositories', 'licenses' - ]), + strategies.just('type'), + strategies.just('url'), + strategies.just('name'), + strategies.just('email'), + strategies.just('@id'), + strategies.just('@context'), + strategies.just('repository'), + strategies.just('license'), + strategies.just('repositories'), + strategies.just('licenses'), ) """Hypothesis strategy that generates strings, with an emphasis on those that are often used as dictionary keys in metadata files.""" diff --git a/tox.ini b/tox.ini --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=flake8,py3 +envlist=flake8,mypy,py3 [testenv:py3] deps = @@ -39,3 +39,11 @@ flake8 commands = {envpython} -m flake8 + +[testenv:mypy] +skip_install = true +deps = + .[testing] + mypy +commands = + mypy swh