diff --git a/swh/search/api/server.py b/swh/search/api/server.py index e0ab43f..effd2ed 100644 --- a/swh/search/api/server.py +++ b/swh/search/api/server.py @@ -1,90 +1,91 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os +from typing import Any, Dict from swh.core import config from swh.core.api import RPCServerApp from swh.core.api import encode_data_server as encode_data from swh.core.api import error_handler from swh.search.metrics import timed from .. import get_search from ..interface import SearchInterface def _get_search(): global search if not search: search = get_search(**app.config["search"]) return search app = RPCServerApp(__name__, backend_class=SearchInterface, backend_factory=_get_search) search = None @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) @app.route("/") @timed def index(): return "SWH Search API server" api_cfg = None -def load_and_check_config(config_file, type="elasticsearch"): +def load_and_check_config(config_file: str) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: - config_file (str): Path to the configuration file to load - type (str): configuration type. For 'local' type, more + config_file: Path to the configuration file to load + type: configuration type. For 'local' type, more checks are done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_file: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_file): raise FileNotFoundError("Configuration file %s does not exist" % (config_file,)) cfg = config.read(config_file) if "search" not in cfg: raise KeyError("Missing 'search' configuration") return cfg def make_app_from_configfile(): """Run the WSGI app from the webserver, loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ global api_cfg if not api_cfg: config_file = os.environ.get("SWH_CONFIG_FILENAME") api_cfg = load_and_check_config(config_file) app.config.update(api_cfg) handler = logging.StreamHandler() app.logger.addHandler(handler) return app diff --git a/swh/search/cli.py b/swh/search/cli.py index ae67b1c..8901d88 100644 --- a/swh/search/cli.py +++ b/swh/search/cli.py @@ -1,130 +1,130 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from swh.core.cli import CONTEXT_SETTINGS from swh.core.cli import swh as swh_cli_group @swh_cli_group.group(name="search", context_settings=CONTEXT_SETTINGS) @click.option( "--config-file", "-C", default=None, type=click.Path(exists=True, dir_okay=False,), help="Configuration file.", ) @click.pass_context def search_cli_group(ctx, config_file): """Software Heritage Search tools.""" from swh.core import config ctx.ensure_object(dict) conf = config.read(config_file) ctx.obj["config"] = conf @search_cli_group.command("initialize") @click.pass_context def initialize(ctx): """Creates Elasticsearch indices.""" from . import get_search search = get_search(**ctx.obj["config"]["search"]) search.initialize() print("Done.") @search_cli_group.group("journal-client") @click.pass_context def journal_client(ctx): """""" pass @journal_client.command("objects") @click.option( "--stop-after-objects", "-m", default=None, type=int, help="Maximum number of objects to replay. Default is to run forever.", ) @click.option( "--object-type", "-o", multiple=True, help="Default list of object types to subscribe to", ) @click.option( "--prefix", "-p", help="Topic prefix to use (e.g swh.journal.indexed)", ) @click.pass_context def journal_client_objects(ctx, stop_after_objects, object_type, prefix): """Listens for new objects from the SWH Journal, and schedules tasks to run relevant indexers (currently, origin and origin_visit) on these new objects. """ import functools from swh.journal.client import get_journal_client from . import get_search from .journal_client import process_journal_objects config = ctx.obj["config"] journal_cfg = config["journal"] journal_cfg["object_types"] = object_type or journal_cfg.get("object_types", []) journal_cfg["prefix"] = prefix or journal_cfg.get("prefix") journal_cfg["stop_after_objects"] = stop_after_objects or journal_cfg.get( "stop_after_objects" ) if len(journal_cfg["object_types"]) == 0: raise ValueError("'object_types' must be specified by cli or configuration") if journal_cfg["prefix"] is None: raise ValueError("'prefix' must be specified by cli or configuration") client = get_journal_client(cls="kafka", **journal_cfg,) search = get_search(**config["search"]) worker_fn = functools.partial(process_journal_objects, search=search,) nb_messages = 0 try: nb_messages = client.process(worker_fn) print("Processed %d messages." % nb_messages) except KeyboardInterrupt: ctx.exit(0) else: print("Done.") finally: client.close() @search_cli_group.command("rpc-serve") @click.argument("config-path", required=True) @click.option("--host", default="0.0.0.0", help="Host to run the server") @click.option("--port", default=5010, type=click.INT, help="Binding port of the server") @click.option( "--index-prefix", required=False, help="The prefix to add before the index names" ) @click.option( "--debug/--nodebug", default=True, help="Indicates if the server should run in debug mode", ) def rpc_server(config_path, host, port, index_prefix, debug): """Starts a Software Heritage Indexer RPC HTTP server.""" from .api.server import app, load_and_check_config - api_cfg = load_and_check_config(config_path, type="any") + api_cfg = load_and_check_config(config_path) app.config.update(api_cfg) app.run(host, port=int(port), index_prefix=index_prefix, debug=bool(debug)) diff --git a/swh/search/tests/test_server.py b/swh/search/tests/test_server.py new file mode 100644 index 0000000..187b171 --- /dev/null +++ b/swh/search/tests/test_server.py @@ -0,0 +1,88 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from typing import Any, Dict + +import pytest +import yaml + +from swh.core.api import RPCServerApp +from swh.core.config import load_from_envvar +from swh.search.api.server import load_and_check_config, make_app_from_configfile + + +@pytest.fixture +def swh_search_server_config() -> Dict[str, Any]: + return {"search": {"cls": "elasticsearch", "hosts": ["es1"],}} + + +@pytest.fixture +def swh_search_config(monkeypatch, swh_search_server_config, tmp_path): + conf_path = os.path.join(str(tmp_path), "search.yml") + with open(conf_path, "w") as f: + f.write(yaml.dump(swh_search_server_config)) + monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) + return conf_path + + +def prepare_config_file(tmpdir, config_dict: Dict, name: str = "config.yml") -> str: + """Prepare configuration file in `$tmpdir/name` with content `content`. + + Args: + tmpdir (LocalPath): root directory + content: Content of the file either as string or as a dict. + If a dict, converts the dict into a yaml string. + name: configuration filename + + Returns + path of the configuration file prepared. + + """ + config_path = tmpdir / name + config_path.write_text(yaml.dump(config_dict), encoding="utf-8") + # pytest on python3.5 does not support LocalPath manipulation, so + # convert path to string + return str(config_path) + + +@pytest.mark.parametrize("config_file", [None, ""]) +def test_load_and_check_config_no_configuration(config_file): + """Inexistent configuration files raises""" + with pytest.raises(EnvironmentError, match="Configuration file must be defined"): + load_and_check_config(config_file) + + +def test_load_and_check_config_inexistent_file(): + config_path = "/some/inexistent/config.yml" + expected_error = f"Configuration file {config_path} does not exist" + with pytest.raises(EnvironmentError, match=expected_error): + load_and_check_config(config_path) + + +def test_load_and_check_config_wrong_configuration(tmpdir): + """Wrong configuration raises""" + config_path = prepare_config_file(tmpdir, {"something": "useless"}) + with pytest.raises(KeyError, match="Missing 'search' configuration"): + load_and_check_config(config_path) + + +def test_load_and_check_config_local_config_fine(swh_search_server_config, tmpdir): + """'local' complete configuration is fine""" + config_path = prepare_config_file(tmpdir, swh_search_server_config) + cfg = load_and_check_config(config_path) + assert cfg == swh_search_server_config + + +def test_server_make_app_from_config_file(swh_search_config): + app = make_app_from_configfile() + expected_cfg = load_from_envvar() + + assert app is not None + assert isinstance(app, RPCServerApp) + assert app.config["search"] == expected_cfg["search"] + + app2 = make_app_from_configfile() + assert app is app2