diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ psycopg2 pyyaml setuptools +typing-extensions # test dependencies # hypothesis diff --git a/swh/scheduler/__init__.py b/swh/scheduler/__init__.py --- a/swh/scheduler/__init__.py +++ b/swh/scheduler/__init__.py @@ -1,9 +1,13 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict +from __future__ import annotations + +from importlib import import_module +from typing import TYPE_CHECKING, Any, Dict +import warnings # Percentage of tasks with priority to schedule PRIORITY_SLOT = 0.6 @@ -11,13 +15,17 @@ DEFAULT_CONFIG = { "scheduler": ( "dict", - {"cls": "local", "args": {"db": "dbname=softwareheritage-scheduler-dev",},}, + {"cls": "local", "db": "dbname=softwareheritage-scheduler-dev",}, ) } # current configuration. To be set by the config loading mechanism CONFIG = {} # type: Dict[str, Any] +if TYPE_CHECKING: + from swh.scheduler.interface import SchedulerInterface + + def compute_nb_tasks_from(num_tasks): """Compute and returns the tuple, number of tasks without priority, number of tasks with priority. @@ -35,16 +43,19 @@ return (int((1 - PRIORITY_SLOT) * num_tasks), int(PRIORITY_SLOT * num_tasks)) -def get_scheduler(cls, args={}): +BACKEND_TYPES: Dict[str, str] = { + "local": ".backend.SchedulerBackend", + "remote": ".api.client.RemoteScheduler", +} + + +def get_scheduler(cls: str, **kwargs) -> SchedulerInterface: """ - Get a scheduler object of class `scheduler_class` with arguments - `scheduler_args`. + Get a scheduler object of class `cls` with arguments `**kwargs`. Args: - scheduler (dict): dictionary with keys: - - cls (str): scheduler's class, either 'local' or 'remote' - args (dict): dictionary with keys, default to empty. + cls: scheduler's class, either 'local' or 'remote' + kwargs: arguments to pass to the class' constructor Returns: an instance of swh.scheduler, either local or remote: @@ -57,11 +68,21 @@ """ - if cls == "remote": - from .api.client import RemoteScheduler as SchedulerBackend - elif cls == "local": - from .backend import SchedulerBackend - else: - raise ValueError("Unknown swh.scheduler class `%s`" % cls) - - return SchedulerBackend(**args) + if "args" in kwargs: + warnings.warn( + 'Explicit "args" key is deprecated, use keys directly instead.', + DeprecationWarning, + ) + kwargs = kwargs["args"] + + class_path = BACKEND_TYPES.get(cls) + if class_path is None: + raise ValueError( + f"Unknown Scheduler class `{cls}`. " + f"Supported: {', '.join(BACKEND_TYPES)}" + ) + + (module_path, class_name) = class_path.rsplit(".", 1) + module = import_module(module_path, package=__package__) + BackendClass = getattr(module, class_name) + return BackendClass(**kwargs) diff --git a/swh/scheduler/api/server.py b/swh/scheduler/api/server.py --- a/swh/scheduler/api/server.py +++ b/swh/scheduler/api/server.py @@ -82,12 +82,12 @@ return links -def load_and_check_config(config_file, type="local"): +def load_and_check_config(config_path, type="local"): """Check the minimal configuration is set to run the api or raise an error explanation. Args: - config_file (str): Path to the configuration file to load + config_path (str): Path to the configuration file to load type (str): configuration type. For 'local' type, more checks are done. @@ -98,13 +98,13 @@ configuration as a dict """ - if not config_file: + if not config_path: raise EnvironmentError("Configuration file must be defined") - if not os.path.exists(config_file): - raise FileNotFoundError("Configuration file %s does not exist" % (config_file,)) + if not os.path.exists(config_path): + raise FileNotFoundError(f"Configuration file {config_path} does not exist") - cfg = config.read(config_file) + cfg = config.read(config_path) vcfg = cfg.get("scheduler") if not vcfg: @@ -118,11 +118,7 @@ "configuration" ) - args = vcfg.get("args") - if not args: - raise KeyError("Invalid configuration; missing 'args' config entry") - - db = args.get("db") + db = vcfg.get("db") if not db: raise KeyError("Invalid configuration; missing 'db' config entry") @@ -142,8 +138,8 @@ """ global api_cfg if not api_cfg: - config_file = os.environ.get("SWH_CONFIG_FILENAME") - api_cfg = load_and_check_config(config_file) + config_path = os.environ.get("SWH_CONFIG_FILENAME") + api_cfg = load_and_check_config(config_path) app.config.update(api_cfg) handler = logging.StreamHandler() app.logger.addHandler(handler) diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -6,6 +6,8 @@ from typing import Any, Dict, Iterable, List, Optional from uuid import UUID +from typing_extensions import Protocol, runtime_checkable + from swh.core.api import remote_api_endpoint from swh.scheduler.model import ( ListedOrigin, @@ -15,7 +17,8 @@ ) -class SchedulerInterface: +@runtime_checkable +class SchedulerInterface(Protocol): @remote_api_endpoint("task_type/create") def create_task_type(self, task_type): """Create a new task type ready for scheduling. diff --git a/swh/scheduler/pytest_plugin.py b/swh/scheduler/pytest_plugin.py --- a/swh/scheduler/pytest_plugin.py +++ b/swh/scheduler/pytest_plugin.py @@ -43,7 +43,7 @@ @pytest.fixture def swh_scheduler(swh_scheduler_config): - scheduler = get_scheduler("local", swh_scheduler_config) + scheduler = get_scheduler("local", **swh_scheduler_config) for taskname in TASK_NAMES: scheduler.create_task_type( { diff --git a/swh/scheduler/tests/es/conftest.py b/swh/scheduler/tests/es/conftest.py --- a/swh/scheduler/tests/es/conftest.py +++ b/swh/scheduler/tests/es/conftest.py @@ -12,7 +12,7 @@ @pytest.fixture def swh_sched_config(swh_scheduler_config): return { - "scheduler": {"cls": "local", "args": swh_scheduler_config,}, + "scheduler": {"cls": "local", **swh_scheduler_config,}, "elasticsearch": { "cls": "memory", "args": {"index_name_prefix": "swh-tasks",}, diff --git a/swh/scheduler/tests/test_cli_task_type.py b/swh/scheduler/tests/test_cli_task_type.py --- a/swh/scheduler/tests/test_cli_task_type.py +++ b/swh/scheduler/tests/test_cli_task_type.py @@ -48,7 +48,7 @@ """Expose the local scheduler configuration """ - return {"scheduler": {"cls": "local", "args": swh_scheduler_config}} + return {"scheduler": {"cls": "local", **swh_scheduler_config}} @pytest.fixture diff --git a/swh/scheduler/tests/test_init.py b/swh/scheduler/tests/test_init.py new file mode 100644 --- /dev/null +++ b/swh/scheduler/tests/test_init.py @@ -0,0 +1,78 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import inspect + +import pytest + +from swh.scheduler import get_scheduler +from swh.scheduler.api.client import RemoteScheduler +from swh.scheduler.backend import SchedulerBackend +from swh.scheduler.interface import SchedulerInterface + +SERVER_IMPLEMENTATIONS = [ + ("remote", RemoteScheduler, {"url": "localhost"}), + ("local", SchedulerBackend, {"db": "something"}), +] + + +@pytest.fixture +def mock_psycopg2(mocker): + mocker.patch("swh.scheduler.backend.psycopg2.pool") + return mocker + + +def test_init_get_scheduler_failure(): + with pytest.raises(ValueError, match="Unknown Scheduler class"): + get_scheduler("unknown-scheduler-storage") + + +@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) +def test_init_get_scheduler(class_name, expected_class, kwargs, mock_psycopg2): + concrete_scheduler = get_scheduler(class_name, **kwargs) + assert isinstance(concrete_scheduler, expected_class) + assert isinstance(concrete_scheduler, SchedulerInterface) + + +@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) +def test_init_get_scheduler_deprecation_warning( + class_name, expected_class, kwargs, mock_psycopg2 +): + with pytest.warns(DeprecationWarning): + concrete_scheduler = get_scheduler(class_name, args=kwargs) + assert isinstance(concrete_scheduler, expected_class) + + +def test_types(swh_scheduler) -> None: + """Checks all methods of SchedulerInterface are implemented by this + backend, and that they have the same signature.""" + # Create an instance of the protocol (which cannot be instantiated + # directly, so this creates a subclass, then instantiates it) + interface = type("_", (SchedulerInterface,), {})() + + missing_methods = [] + + for meth_name in dir(interface): + if meth_name.startswith("_"): + continue + interface_meth = getattr(interface, meth_name) + try: + concrete_meth = getattr(swh_scheduler, meth_name) + except AttributeError: + missing_methods.append(meth_name) + continue + + expected_signature = inspect.signature(interface_meth) + actual_signature = inspect.signature(concrete_meth) + + assert expected_signature == actual_signature, meth_name + + assert missing_methods == [] + + # If all the assertions above succeed, then this one should too. + # But there's no harm in double-checking. + # And we could replace the assertions above by this one, but unlike + # the assertions above, it doesn't explain what is missing. + assert isinstance(swh_scheduler, SchedulerInterface) diff --git a/swh/scheduler/tests/test_server.py b/swh/scheduler/tests/test_server.py --- a/swh/scheduler/tests/test_server.py +++ b/swh/scheduler/tests/test_server.py @@ -3,8 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import copy - import pytest import yaml @@ -57,7 +55,7 @@ def test_load_and_check_config_remote_config_local_type_raise(tmpdir): """Configuration without 'local' storage is rejected""" - config = {"scheduler": {"cls": "remote", "args": {}}} + config = {"scheduler": {"cls": "remote"}} config_path = prepare_config_file(tmpdir, config) expected_error = ( "The scheduler backend can only be started with a 'local'" " configuration" @@ -68,29 +66,17 @@ def test_load_and_check_config_local_incomplete_configuration(tmpdir): """Incomplete 'local' configuration should raise""" - config = { - "scheduler": { - "cls": "local", - "args": {"db": "database", "something": "needed-for-test",}, - } - } - - for key in ["db", "args"]: - c = copy.deepcopy(config) - if key == "db": - source = c["scheduler"]["args"] - else: - source = c["scheduler"] - source.pop(key) - config_path = prepare_config_file(tmpdir, c) - expected_error = f"Invalid configuration; missing '{key}' config entry" - with pytest.raises(KeyError, match=expected_error): - load_and_check_config(config_path) + config = {"scheduler": {"cls": "local", "something": "needed-for-test",}} + + config_path = prepare_config_file(tmpdir, config) + expected_error = "Invalid configuration; missing 'db' config entry" + with pytest.raises(KeyError, match=expected_error): + load_and_check_config(config_path) def test_load_and_check_config_local_config_fine(tmpdir): """Local configuration is fine""" - config = {"scheduler": {"cls": "local", "args": {"db": "db",}}} + config = {"scheduler": {"cls": "local", "db": "db",}} config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path, type="local") assert cfg == config @@ -98,8 +84,7 @@ def test_load_and_check_config_remote_config_fine(tmpdir): """Remote configuration is fine""" - config = {"scheduler": {"cls": "remote", "args": {}}} + config = {"scheduler": {"cls": "remote"}} config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path, type="any") - assert cfg == config