Software Heritage scheduler RPC server

diff --git a/swh/scheduler/api/server.py b/swh/scheduler/api/server.py index abc3ab8..a067589 100644 --- a/swh/scheduler/api/server.py +++ b/swh/scheduler/api/server.py @@ -1,150 +1,151 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os +from typing import Dict from swh.core import config from swh.core.api import JSONFormatter, MsgpackFormatter, RPCServerApp from swh.core.api import encode_data_server as encode_data from swh.core.api import error_handler, negotiate from swh.scheduler import get_scheduler from swh.scheduler.exc import SchedulerException from swh.scheduler.interface import SchedulerInterface from .serializers import DECODERS, ENCODERS scheduler = None def get_global_scheduler(): global scheduler if not scheduler: scheduler = get_scheduler(**app.config["scheduler"]) return scheduler class SchedulerServerApp(RPCServerApp): extra_type_decoders = DECODERS extra_type_encoders = ENCODERS app = SchedulerServerApp( __name__, backend_class=SchedulerInterface, backend_factory=get_global_scheduler ) @app.errorhandler(SchedulerException) def argument_error_handler(exception): return error_handler(exception, encode_data, status_code=400) @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) def has_no_empty_params(rule): return len(rule.defaults or ()) >= len(rule.arguments or ()) @app.route("/") def index(): return """ Software Heritage scheduler RPC server

You have reached the Software Heritage scheduler RPC server.
See its documentation and API for more information

""" @app.route("/site-map") @negotiate(MsgpackFormatter) @negotiate(JSONFormatter) def site_map(): links = [] for rule in app.url_map.iter_rules(): if has_no_empty_params(rule) and hasattr(SchedulerInterface, rule.endpoint): links.append( dict( rule=rule.rule, description=getattr(SchedulerInterface, rule.endpoint).__doc__, ) ) # links is now a list of url, endpoint tuples return links -def load_and_check_config(config_path, type="local"): +def load_and_check_config(config_path: str, type: str = "postgresql") -> Dict: """Check the minimal configuration is set to run the api or raise an error explanation. Args: - config_path (str): Path to the configuration file to load - type (str): configuration type. For 'local' type, more - checks are done. + config_path: Configuration file path to load + type: Configuration type, for 'postgresql' type (the default), more checks are + done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError(f"Configuration file {config_path} does not exist") cfg = config.read(config_path) vcfg = cfg.get("scheduler") if not vcfg: raise KeyError("Missing '%scheduler' configuration") - if type == "local": + if type == "postgresql": cls = vcfg.get("cls") if cls not in ("local", "postgresql"): raise ValueError( "The scheduler backend can only be started with a 'postgresql' " "configuration" ) db = vcfg.get("db") if not db: raise KeyError("Invalid configuration; missing 'db' config entry") return cfg api_cfg = None def make_app_from_configfile(): """Run the WSGI app from the webserver, loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ global api_cfg if not api_cfg: config_path = os.environ.get("SWH_CONFIG_FILENAME") api_cfg = load_and_check_config(config_path) app.config.update(api_cfg) handler = logging.StreamHandler() app.logger.addHandler(handler) return app if __name__ == "__main__": print('Please use the "swh-scheduler api-server" command') diff --git a/swh/scheduler/tests/test_server.py b/swh/scheduler/tests/test_server.py index 50c5b41..4156fac 100644 --- a/swh/scheduler/tests/test_server.py +++ b/swh/scheduler/tests/test_server.py @@ -1,100 +1,97 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest import yaml from swh.scheduler.api.server import load_and_check_config def prepare_config_file(tmpdir, content, name="config.yml"): """Prepare configuration file in `$tmpdir/name` with content `content`. Args: tmpdir (LocalPath): root directory content (str/dict): Content of the file either as string or as a dict. If a dict, converts the dict into a yaml string. name (str): configuration filename Returns path (str) of the configuration file prepared. """ config_path = tmpdir / name if isinstance(content, dict): # convert if needed content = yaml.dump(content) config_path.write_text(content, encoding="utf-8") # pytest on python3.5 does not support LocalPath manipulation, so # convert path to string return str(config_path) @pytest.mark.parametrize("scheduler_class", [None, ""]) def test_load_and_check_config_no_configuration(scheduler_class): """Inexistent configuration files raises""" with pytest.raises(EnvironmentError, match="Configuration file must be defined"): load_and_check_config(scheduler_class) def test_load_and_check_config_inexistent_fil(): """Inexistent config filepath should raise""" config_path = "/some/inexistent/config.yml" expected_error = f"Configuration file {config_path} does not exist" with pytest.raises(FileNotFoundError, match=expected_error): load_and_check_config(config_path) def test_load_and_check_config_wrong_configuration(tmpdir): """Wrong configuration raises""" config_path = prepare_config_file(tmpdir, "something: useless") with pytest.raises(KeyError, match="Missing '%scheduler' configuration"): load_and_check_config(config_path) def test_load_and_check_config_remote_config_local_type_raise(tmpdir): - """Configuration without 'local' storage is rejected""" - config = {"scheduler": {"cls": "remote"}} - config_path = prepare_config_file(tmpdir, config) - expected_error = ( - "The scheduler backend can only be started with a 'postgresql'" " configuration" - ) - with pytest.raises(ValueError, match=expected_error): - load_and_check_config(config_path, type="local") + """Configuration without 'postgresql' storage is rejected""" + config_path = prepare_config_file(tmpdir, {"scheduler": {"cls": "remote"}}) + with pytest.raises(ValueError, match="'postgresql'"): + load_and_check_config(config_path) def test_load_and_check_config_local_incomplete_configuration(tmpdir): """Incomplete 'local' configuration should raise""" config = { "scheduler": { "cls": "postgresql", "something": "needed-for-test", } } config_path = prepare_config_file(tmpdir, config) expected_error = "Invalid configuration; missing 'db' config entry" with pytest.raises(KeyError, match=expected_error): load_and_check_config(config_path) -def test_load_and_check_config_local_config_fine(tmpdir): +@pytest.mark.parametrize("clazz", ["local", "postgresql"]) +def test_load_and_check_config_local_config_fine(tmpdir, clazz): """Local configuration is fine""" config = { "scheduler": { - "cls": "postgresql", + "cls": clazz, "db": "db", } } config_path = prepare_config_file(tmpdir, config) - cfg = load_and_check_config(config_path, type="local") + cfg = load_and_check_config(config_path, type="postgresql") assert cfg == config def test_load_and_check_config_remote_config_fine(tmpdir): """Remote configuration is fine""" config = {"scheduler": {"cls": "remote"}} config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path, type="any") assert cfg == config