diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py index 31ef3cc..881a390 100644 --- a/swh/vault/api/server.py +++ b/swh/vault/api/server.py @@ -1,124 +1,132 @@ # Copyright (C) 2016-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + import asyncio import os from typing import Any, Dict, Optional import aiohttp.web from swh.core.api.asynchronous import RPCServerApp from swh.core.config import config_basepath, merge_configs, read_raw_config from swh.vault import get_vault as get_swhvault from swh.vault.backend import NotFoundExc from swh.vault.interface import VaultInterface DEFAULT_CONFIG = { "storage": {"cls": "remote", "url": "http://localhost:5002/"}, "cache": { "cls": "pathslicing", "args": {"root": "/srv/softwareheritage/vault", "slicing": "0:1/1:5"}, }, "client_max_size": 1024 ** 3, "vault": {"cls": "local", "args": {"db": "dbname=softwareheritage-vault-dev",}}, "scheduler": {"cls": "remote", "url": "http://localhost:5008/"}, } vault = None app = None def get_vault(config: Optional[Dict[str, Any]] = None) -> VaultInterface: global vault if not vault: assert config is not None vault = get_swhvault(**config) return vault class VaultServerApp(RPCServerApp): client_exception_classes = (NotFoundExc,) @asyncio.coroutine def index(request): return aiohttp.web.Response(body="SWH Vault API server") def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]: - """Ensure the configuration is ok to run a local vault server + """Ensure the configuration is ok to run a local vault server, and propagate defaults. Raises: EnvironmentError if the configuration is not for local instance ValueError if one of the following keys is missing: vault, cache, storage, scheduler Returns: - Configuration dict to instantiate a local vault server instance + New configuration dict to instantiate a local vault server instance. """ + cfg = cfg.copy() + if "vault" not in cfg: raise ValueError("missing 'vault' configuration") vcfg = cfg["vault"] if vcfg["cls"] != "local": raise EnvironmentError( "The vault backend can only be started with a 'local' configuration", ) - args = vcfg["args"] - if "cache" not in args: - args["cache"] = cfg.get("cache") - if "storage" not in args: - args["storage"] = cfg.get("storage") - if "scheduler" not in args: - args["scheduler"] = cfg.get("scheduler") + + # TODO: Soft-deprecation of args key. Remove when ready. + vcfg.update(vcfg.get("args", {})) + + # Default to top-level value if any + if "cache" not in vcfg: + vcfg["cache"] = cfg.get("cache") + if "storage" not in vcfg: + vcfg["storage"] = cfg.get("storage") + if "scheduler" not in vcfg: + vcfg["scheduler"] = cfg.get("scheduler") for key in ("cache", "storage", "scheduler"): - if not args.get(key): + if not vcfg.get(key): raise ValueError(f"invalid configuration: missing {key} config entry.") return cfg -def make_app(config_to_check: Dict[str, Any]) -> VaultServerApp: +def make_app(config: Dict[str, Any]) -> VaultServerApp: """Ensure the configuration is ok, then instantiate the server application """ - config_ok = check_config(config_to_check) + config = check_config(config) app = VaultServerApp( __name__, backend_class=VaultInterface, - backend_factory=lambda: get_vault(config_ok["vault"]), - client_max_size=config_ok["client_max_size"], + backend_factory=lambda: get_vault(config["vault"]), + client_max_size=config["client_max_size"], ) app.router.add_route("GET", "/", index) return app def make_app_from_configfile( config_path: Optional[str] = None, **kwargs ) -> VaultServerApp: """Load and check configuration if ok, then instantiate (once) a vault server application. """ global app if not app: config_path = os.environ.get("SWH_CONFIG_FILENAME", config_path) if not config_path: raise ValueError("Missing configuration path.") if not os.path.isfile(config_path): raise ValueError(f"Configuration path {config_path} should exist.") app_config = read_raw_config(config_basepath(config_path)) app_config = merge_configs(DEFAULT_CONFIG, app_config) app = make_app(app_config) return app if __name__ == "__main__": print("Deprecated. Use swh-vault ") diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py index 04ca9e5..0a9df00 100644 --- a/swh/vault/cookers/__init__.py +++ b/swh/vault/cookers/__init__.py @@ -1,68 +1,96 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + +from __future__ import annotations + import os +from typing import Any, Dict from swh.core.config import load_named_config from swh.core.config import read as read_config from swh.storage import get_storage from swh.vault import get_vault from swh.vault.cookers.base import DEFAULT_CONFIG, DEFAULT_CONFIG_PATH from swh.vault.cookers.directory import DirectoryCooker from swh.vault.cookers.revision_flat import RevisionFlatCooker from swh.vault.cookers.revision_gitfast import RevisionGitfastCooker COOKER_TYPES = { "directory": DirectoryCooker, "revision_flat": RevisionFlatCooker, "revision_gitfast": RevisionGitfastCooker, } def get_cooker_cls(obj_type): return COOKER_TYPES[obj_type] +def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]: + """Ensure the configuration is ok to run a vault worker, and propagate defaults + + Raises: + EnvironmentError if the configuration is not for remote instance + ValueError if one of the following keys is missing: vault, storage + + Returns: + New configuration dict to instantiate a vault worker instance + + """ + cfg = cfg.copy() + + if "vault" not in cfg: + raise ValueError("missing 'vault' configuration") + + vcfg = cfg["vault"] + if vcfg["cls"] != "remote": + raise EnvironmentError( + "This vault backend can only be a 'remote' configuration" + ) + + # TODO: Soft-deprecation of args key. Remove when ready. + vcfg.update(vcfg.get("args", {})) + + # Default to top-level value if any + if "storage" not in vcfg: + vcfg["storage"] = cfg.get("storage") + + if not vcfg.get("storage"): + raise ValueError("invalid configuration: missing 'storage' config entry.") + + return cfg + + def get_cooker(obj_type: str, obj_id: str): """Instantiate a cooker class of type obj_type. Returns: Cooker class in charge of cooking the obj_type with id obj_id. Raises: ValueError in case of a missing top-level vault key configuration or a storage key. EnvironmentError in case the vault configuration reference a non remote class. """ if "SWH_CONFIG_FILENAME" in os.environ: cfg = read_config(os.environ["SWH_CONFIG_FILENAME"], DEFAULT_CONFIG) else: cfg = load_named_config(DEFAULT_CONFIG_PATH, DEFAULT_CONFIG) cooker_cls = get_cooker_cls(obj_type) - if "vault" not in cfg: - raise ValueError("missing 'vault' configuration") + cfg = check_config(cfg) vcfg = cfg["vault"] - if vcfg["cls"] != "remote": - raise EnvironmentError( - "This vault backend can only be a 'remote' configuration" - ) - args = vcfg["args"] - if "storage" not in args: - args["storage"] = cfg.get("storage") - - if not args.get("storage"): - raise ValueError("invalid configuration: missing 'storage' config entry.") - storage = get_storage(**args.pop("storage")) + storage = get_storage(**vcfg.pop("storage")) backend = get_vault(**vcfg) return cooker_cls( obj_type, obj_id, backend=backend, storage=storage, max_bundle_size=cfg["max_bundle_size"], ) diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py index ebfc0c9..67e531f 100644 --- a/swh/vault/tests/conftest.py +++ b/swh/vault/tests/conftest.py @@ -1,97 +1,97 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict import pkg_resources.extern.packaging.version import pytest import yaml from swh.core.db.pytest_plugin import postgresql_fact from swh.storage.tests import SQL_DIR as STORAGE_SQL_DIR import swh.vault from swh.vault import get_vault os.environ["LC_ALL"] = "C.UTF-8" pytest_v = pkg_resources.get_distribution("pytest").parsed_version if pytest_v < pkg_resources.extern.packaging.version.parse("3.9"): @pytest.fixture def tmp_path(): import pathlib import tempfile with tempfile.TemporaryDirectory() as tmpdir: yield pathlib.Path(tmpdir) def db_url(name, postgresql_proc): return "postgresql://{user}@{host}:{port}/{dbname}".format( host=postgresql_proc.host, port=postgresql_proc.port, user="postgres", dbname=name, ) VAULT_SQL_DIR = os.path.join(os.path.dirname(swh.vault.__file__), "sql") postgres_vault = postgresql_fact( "postgresql_proc", db_name="vault", dump_files=f"{VAULT_SQL_DIR}/*.sql" ) postgres_storage = postgresql_fact( "postgresql_proc", db_name="storage", dump_files=f"{STORAGE_SQL_DIR}/*.sql" ) @pytest.fixture def swh_vault_config(postgres_vault, postgres_storage, tmp_path) -> Dict[str, Any]: tmp_path = str(tmp_path) return { "db": postgres_vault.dsn, "storage": { "cls": "local", "db": postgres_storage.dsn, "objstorage": { "cls": "pathslicing", "args": {"root": tmp_path, "slicing": "0:1/1:5",}, }, }, "cache": { "cls": "pathslicing", "args": {"root": tmp_path, "slicing": "0:1/1:5", "allow_delete": True}, }, "scheduler": {"cls": "remote", "url": "http://swh-scheduler:5008",}, } @pytest.fixture def swh_local_vault_config(swh_vault_config: Dict[str, Any]) -> Dict[str, Any]: return { - "vault": {"cls": "local", "args": swh_vault_config}, + "vault": {"cls": "local", **swh_vault_config}, "client_max_size": 1024 ** 3, } @pytest.fixture def swh_vault_config_file(swh_local_vault_config, monkeypatch, tmp_path): conf_path = os.path.join(str(tmp_path), "vault-server.yml") with open(conf_path, "w") as f: f.write(yaml.dump(swh_local_vault_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path @pytest.fixture def swh_vault(swh_vault_config): return get_vault("local", **swh_vault_config) @pytest.fixture def swh_storage(swh_vault): return swh_vault.storage diff --git a/swh/vault/tests/test_init_cookers.py b/swh/vault/tests/test_init_cookers.py index 51d6bb1..57a9f28 100644 --- a/swh/vault/tests/test_init_cookers.py +++ b/swh/vault/tests/test_init_cookers.py @@ -1,109 +1,112 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Dict import pytest import yaml from swh.vault.cookers import COOKER_TYPES, get_cooker from swh.vault.tests.test_backend import TEST_HEX_ID @pytest.fixture def swh_cooker_config(): return { "vault": { "cls": "remote", "args": { "url": "mock://vault-backend", "storage": {"cls": "remote", "url": "mock://storage-url"}, }, } } def write_config_to_env(config: Dict, tmp_path, monkeypatch) -> str: """Write the configuration dict into a temporary file, then reference that path to SWH_CONFIG_FILENAME environment variable. """ conf_path = os.path.join(str(tmp_path), "cooker.yml") with open(conf_path, "w") as f: f.write(yaml.dump(config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path def test_write_to_env(swh_cooker_config, tmp_path, monkeypatch): actual_path = write_config_to_env(swh_cooker_config, tmp_path, monkeypatch) assert os.path.exists(actual_path) is True assert os.environ["SWH_CONFIG_FILENAME"] == actual_path with open(actual_path, "r") as f: actual_config = yaml.safe_load(f.read()) assert actual_config == swh_cooker_config @pytest.mark.parametrize( "config_ko,exception_class,exception_msg", [ ({}, ValueError, "missing 'vault' configuration"), ( {"vault": {"cls": "local"}}, EnvironmentError, "This vault backend can only be a 'remote' configuration", ), ( {"vault": {"cls": "remote", "args": {"missing-storage-key": ""}}}, ValueError, "invalid configuration: missing 'storage' config entry", ), - ({"vault": {"cls": "remote"}}, KeyError, "args",), ], ) def test_get_cooker_config_ko( config_ko, exception_class, exception_msg, monkeypatch, tmp_path ): """Misconfigured cooker should fail the instantiation with exception message """ write_config_to_env(config_ko, tmp_path, monkeypatch) with pytest.raises(exception_class, match=exception_msg): get_cooker("directory", TEST_HEX_ID) @pytest.mark.parametrize( "config_ok", [ { "vault": { "cls": "remote", "args": { "url": "mock://vault-backend", "storage": {"cls": "remote", "url": "mock://storage-url"}, }, } }, { "vault": {"cls": "remote", "args": {"url": "mock://vault-backend",},}, "storage": {"cls": "remote", "url": "mock://storage-url"}, }, + { + "vault": {"cls": "remote", "url": "mock://vault-backend",}, + "storage": {"cls": "remote", "url": "mock://storage-url"}, + }, ], ) def test_get_cooker_nominal(config_ok, tmp_path, monkeypatch): """Correct configuration should allow the instantiation of the cookers """ for cooker_type in COOKER_TYPES.keys(): write_config_to_env(config_ok, tmp_path, monkeypatch) cooker = get_cooker(cooker_type, TEST_HEX_ID) assert cooker is not None assert isinstance(cooker, COOKER_TYPES[cooker_type]) diff --git a/swh/vault/tests/test_server.py b/swh/vault/tests/test_server.py index 98af6e9..0164cb6 100644 --- a/swh/vault/tests/test_server.py +++ b/swh/vault/tests/test_server.py @@ -1,161 +1,161 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import os from typing import Any, Dict import pytest import yaml from swh.core.api.serializers import json_dumps, msgpack_dumps, msgpack_loads from swh.vault.api.server import ( VaultServerApp, check_config, make_app, make_app_from_configfile, ) from swh.vault.tests.test_backend import TEST_HEX_ID def test_make_app_from_file_missing(): with pytest.raises(ValueError, match="Missing configuration path."): make_app_from_configfile() def test_make_app_from_file_does_not_exist(tmp_path): conf_path = os.path.join(str(tmp_path), "vault-server.yml") assert os.path.exists(conf_path) is False with pytest.raises( ValueError, match=f"Configuration path {conf_path} should exist." ): make_app_from_configfile(conf_path) def test_make_app_from_env_variable(swh_vault_config_file): """Instantiation of the server should happen once (through environment variable) """ app0 = make_app_from_configfile() assert app0 is not None app1 = make_app_from_configfile() assert app1 == app0 def test_make_app_from_file(swh_local_vault_config, tmp_path): """Instantiation of the server should happen once (through environment variable) """ conf_path = os.path.join(str(tmp_path), "vault-server.yml") with open(conf_path, "w") as f: f.write(yaml.dump(swh_local_vault_config)) app0 = make_app_from_configfile(conf_path) assert app0 is not None app1 = make_app_from_configfile(conf_path) assert app1 == app0 @pytest.fixture def async_app(swh_local_vault_config: Dict[str, Any],) -> VaultServerApp: """Instantiate the vault server application. Note: This requires the db setup to run (fixture swh_vault in charge of this) """ return make_app(swh_local_vault_config) @pytest.fixture def cli(async_app, aiohttp_client, loop): return loop.run_until_complete(aiohttp_client(async_app)) async def test_client_index(cli): resp = await cli.get("/") assert resp.status == 200 async def test_client_cook_notfound(cli): resp = await cli.post( "/cook", data=json_dumps({"obj_type": "directory", "obj_id": TEST_HEX_ID}), headers=[("Content-Type", "application/json")], ) assert resp.status == 400 content = msgpack_loads(await resp.content.read()) assert content["exception"]["type"] == "NotFoundExc" assert content["exception"]["args"] == [f"directory {TEST_HEX_ID} was not found."] async def test_client_progress_notfound(cli): resp = await cli.post( "/progress", data=json_dumps({"obj_type": "directory", "obj_id": TEST_HEX_ID}), headers=[("Content-Type", "application/json")], ) assert resp.status == 400 content = msgpack_loads(await resp.content.read()) assert content["exception"]["type"] == "NotFoundExc" assert content["exception"]["args"] == [f"directory {TEST_HEX_ID} was not found."] async def test_client_batch_cook_invalid_type(cli): resp = await cli.post( "/batch_cook", data=msgpack_dumps({"batch": [("foobar", [])]}), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status == 400 content = msgpack_loads(await resp.content.read()) assert content["exception"]["type"] == "NotFoundExc" assert content["exception"]["args"] == ["foobar is an unknown type."] async def test_client_batch_progress_notfound(cli): resp = await cli.post( "/batch_progress", data=msgpack_dumps({"batch_id": 1}), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status == 400 content = msgpack_loads(await resp.content.read()) assert content["exception"]["type"] == "NotFoundExc" assert content["exception"]["args"] == ["Batch 1 does not exist."] def test_check_config_missing_vault_configuration() -> None: """Irrelevant configuration file path raises""" with pytest.raises(ValueError, match="missing 'vault' configuration"): check_config({}) def test_check_config_not_local() -> None: """Wrong configuration raises""" expected_error = ( "The vault backend can only be started with a 'local' configuration" ) with pytest.raises(EnvironmentError, match=expected_error): check_config({"vault": {"cls": "remote"}}) @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) def test_check_config_missing_key(missing_key, swh_vault_config) -> None: """Any other configuration than 'local' (the default) is rejected""" - config_ok = {"vault": {"cls": "local", "args": swh_vault_config}} + config_ok = {"vault": {"cls": "local", **swh_vault_config}} config_ko = copy.deepcopy(config_ok) - config_ko["vault"]["args"].pop(missing_key, None) + config_ko["vault"].pop(missing_key, None) expected_error = f"invalid configuration: missing {missing_key} config entry" with pytest.raises(ValueError, match=expected_error): check_config(config_ko) @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) def test_check_config_ok(missing_key, swh_vault_config) -> None: """Any other configuration than 'local' (the default) is rejected""" - config_ok = {"vault": {"cls": "local", "args": swh_vault_config}} + config_ok = {"vault": {"cls": "local", **swh_vault_config}} assert check_config(config_ok) is not None