diff --git a/PKG-INFO b/PKG-INFO index 04a580f..085f9fe 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,42 +1,42 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 1.6.0 +Version: 1.6.1 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS Software Heritage - Vault ========================= User-facing service that allows to retrieve parts of the archive as self-contained bundles (e.g., individual releases, entire repository snapshots, etc.) The creation of a bundle is called "cooking" a bundle. Architecture ------------ The vault is made of two main parts: 1. a stateful RPC server called the **backend** 2. Celery tasks, called **cookers** diff --git a/conftest.py b/conftest.py index 8a741d1..b6c0a26 100644 --- a/conftest.py +++ b/conftest.py @@ -1,10 +1,9 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information pytest_plugins = [ - "aiohttp.pytest_plugin", "swh.storage.pytest_plugin", "swh.loader.pytest_plugin", ] diff --git a/swh.vault.egg-info/PKG-INFO b/swh.vault.egg-info/PKG-INFO index 04a580f..085f9fe 100644 --- a/swh.vault.egg-info/PKG-INFO +++ b/swh.vault.egg-info/PKG-INFO @@ -1,42 +1,42 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 1.6.0 +Version: 1.6.1 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS Software Heritage - Vault ========================= User-facing service that allows to retrieve parts of the archive as self-contained bundles (e.g., individual releases, entire repository snapshots, etc.) The creation of a bundle is called "cooking" a bundle. Architecture ------------ The vault is made of two main parts: 1. a stateful RPC server called the **backend** 2. Celery tasks, called **cookers** diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py index 15f6f75..0630a0c 100644 --- a/swh/vault/api/server.py +++ b/swh/vault/api/server.py @@ -1,116 +1,117 @@ # Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations import os from typing import Any, Dict, Optional from swh.core.api import RPCServerApp from swh.core.api import encode_data_server as encode_data from swh.core.api import error_handler from swh.core.config import config_basepath, merge_configs, read_raw_config from swh.vault import get_vault as get_swhvault from swh.vault.backend import NotFoundExc from swh.vault.interface import VaultInterface from .serializers import DECODERS, ENCODERS # do not define default services here DEFAULT_CONFIG = { "client_max_size": 1024 ** 3, } def get_vault(): global vault if not vault: vault = get_swhvault(**app.config["vault"]) return vault class VaultServerApp(RPCServerApp): client_exception_classes = (NotFoundExc,) extra_type_decoders = DECODERS extra_type_encoders = ENCODERS vault = None app = VaultServerApp(__name__, backend_class=VaultInterface, backend_factory=get_vault,) @app.errorhandler(NotFoundExc) def argument_error_handler(exception): return error_handler(exception, encode_data, status_code=400) @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) @app.route("/") def index(): return "SWH Vault API server" def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]: """Ensure the configuration is ok to run a local vault server, and propagate defaults. Raises: EnvironmentError if the configuration is not for local instance ValueError if one of the following keys is missing: vault, cache, storage, scheduler Returns: New configuration dict to instantiate a local vault server instance. """ cfg = cfg.copy() if "vault" not in cfg: raise ValueError("missing 'vault' configuration") vcfg = cfg["vault"] if vcfg["cls"] != "local": raise EnvironmentError( "The vault backend can only be started with a 'local' configuration", ) # TODO: Soft-deprecation of args key. Remove when ready. vcfg.update(vcfg.get("args", {})) # Default to top-level value if any vcfg = {**cfg, **vcfg} for key in ("cache", "storage", "scheduler"): if not vcfg.get(key): raise ValueError(f"invalid configuration: missing {key} config entry.") return vcfg def make_app_from_configfile( config_path: Optional[str] = None, **kwargs ) -> VaultServerApp: """Load and check configuration if ok, then instantiate (once) a vault server application. """ config_path = os.environ.get("SWH_CONFIG_FILENAME", config_path) if not config_path: raise ValueError("Missing configuration path.") if not os.path.isfile(config_path): raise ValueError(f"Configuration path {config_path} should exist.") app_config = read_raw_config(config_basepath(config_path)) + app_config["vault"] = check_config(app_config) app.config.update(merge_configs(DEFAULT_CONFIG, app_config)) return app if __name__ == "__main__": print("Deprecated. Use swh-vault ") diff --git a/swh/vault/cli.py b/swh/vault/cli.py index 5808c3a..2881117 100644 --- a/swh/vault/cli.py +++ b/swh/vault/cli.py @@ -1,189 +1,189 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations # WARNING: do not import unnecessary things here to keep cli startup time under # control import logging from typing import TYPE_CHECKING, Optional import click from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup from swh.core.cli import swh as swh_cli_group if TYPE_CHECKING: import io from swh.model.swhids import CoreSWHID class SwhidParamType(click.ParamType): name = "swhid" def convert(self, value, param, ctx): from swh.model.exceptions import ValidationError from swh.model.swhids import CoreSWHID try: return CoreSWHID.from_string(value) except ValidationError: self.fail(f"expected core SWHID, got {value!r}", param, ctx) @swh_cli_group.group(name="vault", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup) @click.pass_context def vault(ctx): """Software Heritage Vault tools.""" @vault.command() @click.option( "--config-file", "-C", default=None, metavar="CONFIGFILE", type=click.Path(exists=True, dir_okay=False,), help="Configuration file.", ) @click.argument("swhid", type=SwhidParamType()) @click.argument("outfile", type=click.File("wb")) @click.option( "--bundle-type", type=click.Choice(["flat", "gitfast", "git_bare"]), help="Selects which cooker to use, when there is more than one available " "for the given object type.", ) @click.pass_context def cook( ctx, config_file: str, swhid: CoreSWHID, outfile: io.RawIOBase, bundle_type: Optional[str], ): """ Runs a vault cooker for a single object (identified by a SWHID), and outputs it to the given file. """ from swh.core import config from swh.model.swhids import ObjectType from swh.objstorage.exc import ObjNotFoundError from swh.objstorage.factory import get_objstorage from swh.storage import get_storage from .cookers import get_cooker_cls from .in_memory_backend import InMemoryVaultBackend conf = config.read(config_file) try: from swh.graph.client import RemoteGraphClient # optional dependency graph = RemoteGraphClient(**conf["graph"]) if conf.get("graph") else None except ModuleNotFoundError: if conf.get("graph"): raise EnvironmentError( "Graph configuration required but module is not installed." ) else: graph = None backend = InMemoryVaultBackend() if bundle_type is None: if swhid.object_type in (ObjectType.RELEASE, ObjectType.SNAPSHOT,): bundle_type = "git_bare" elif swhid.object_type in (ObjectType.DIRECTORY,): bundle_type = "flat" else: raise click.ClickException( "No default bundle type for this kind of object, " "use --bundle-type to choose one" ) try: cooker_cls = get_cooker_cls(bundle_type, swhid.object_type) except ValueError as e: raise click.ClickException(*e.args) storage = get_storage(**conf["storage"]) objstorage = get_objstorage(**conf["objstorage"]) if "objstorage" in conf else None cooker = cooker_cls( swhid=swhid, backend=backend, storage=storage, graph=graph, objstorage=objstorage, max_bundle_size=None, # No need for a size limit, we are running locally ) cooker.cook() try: bundle = backend.fetch(cooker_cls.BUNDLE_TYPE, swhid) except ObjNotFoundError: bundle = None if bundle is None: import pdb pdb.set_trace() raise click.ClickException("Cooker did not write a bundle to the backend.") outfile.write(bundle) @vault.command(name="rpc-serve") @click.option( "--config-file", "-C", default=None, metavar="CONFIGFILE", type=click.Path(exists=True, dir_okay=False,), help="Configuration file.", ) @click.option( "--host", default="0.0.0.0", metavar="IP", show_default=True, help="Host ip address to bind the server on", ) @click.option( "--port", default=5005, type=click.INT, metavar="PORT", help="Binding port of the server", ) @click.option( "--debug/--no-debug", default=True, help="Indicates if the server should run in debug mode", ) @click.pass_context def serve(ctx, config_file, host, port, debug): """Software Heritage Vault RPC server.""" - import aiohttp - from swh.vault.api.server import make_app_from_configfile ctx.ensure_object(dict) + if "log_level" in ctx.obj: + logging.getLogger("werkzeug").setLevel(ctx.obj["log_level"]) try: app = make_app_from_configfile(config_file, debug=debug) except EnvironmentError as e: click.echo(e.msg, err=True) ctx.exit(1) - aiohttp.web.run_app(app, host=host, port=int(port)) + app.run(host, port=int(port), debug=debug) def main(): logging.basicConfig() return serve(auto_envvar_prefix="SWH_VAULT") if __name__ == "__main__": main() diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py index be1b266..ef893c8 100644 --- a/swh/vault/tests/conftest.py +++ b/swh/vault/tests/conftest.py @@ -1,104 +1,89 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from functools import partial import os from typing import Any, Dict import pkg_resources.extern.packaging.version import pytest from pytest_postgresql import factories -import yaml from swh.core.db.pytest_plugin import initialize_database_for_module, postgresql_fact from swh.storage.postgresql.db import Db as StorageDb from swh.vault import get_vault from swh.vault.backend import VaultBackend os.environ["LC_ALL"] = "C.UTF-8" # needed for directory tests on git-cloned repositories # 022 is usually the default value, but some environments (eg. Debian builds) have # a different one. os.umask(0o022) pytest_v = pkg_resources.get_distribution("pytest").parsed_version if pytest_v < pkg_resources.extern.packaging.version.parse("3.9"): @pytest.fixture def tmp_path(): import pathlib import tempfile with tempfile.TemporaryDirectory() as tmpdir: yield pathlib.Path(tmpdir) storage_postgresql_proc = factories.postgresql_proc( dbname="storage", load=[ partial(initialize_database_for_module, "storage", StorageDb.current_version) ], ) vault_postgresql_proc = factories.postgresql_proc( dbname="vault", load=[ partial(initialize_database_for_module, "vault", VaultBackend.current_version) ], ) postgres_vault = postgresql_fact("vault_postgresql_proc") postgres_storage = postgresql_fact( "storage_postgresql_proc", no_db_drop=True, # keep the db for performance reasons ) @pytest.fixture def swh_vault_config(postgres_vault, postgres_storage, tmp_path) -> Dict[str, Any]: tmp_path = str(tmp_path) return { "db": postgres_vault.dsn, "storage": { "cls": "postgresql", "db": postgres_storage.dsn, "objstorage": { "cls": "pathslicing", - "args": {"root": tmp_path, "slicing": "0:1/1:5",}, + "root": tmp_path, + "slicing": "0:1/1:5", }, }, "cache": { "cls": "pathslicing", - "args": {"root": tmp_path, "slicing": "0:1/1:5", "allow_delete": True}, + "root": tmp_path, + "slicing": "0:1/1:5", + "allow_delete": True, }, "scheduler": {"cls": "remote", "url": "http://swh-scheduler:5008",}, } -@pytest.fixture -def swh_local_vault_config(swh_vault_config: Dict[str, Any]) -> Dict[str, Any]: - return { - "vault": {"cls": "local", **swh_vault_config}, - "client_max_size": 1024 ** 3, - } - - -@pytest.fixture -def swh_vault_config_file(swh_local_vault_config, monkeypatch, tmp_path): - conf_path = os.path.join(str(tmp_path), "vault-server.yml") - with open(conf_path, "w") as f: - f.write(yaml.dump(swh_local_vault_config)) - monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) - return conf_path - - @pytest.fixture def swh_vault(swh_vault_config): return get_vault("local", **swh_vault_config) @pytest.fixture def swh_storage(swh_vault): return swh_vault.storage diff --git a/swh/vault/tests/test_server.py b/swh/vault/tests/test_server.py index ddad45f..28a20c6 100644 --- a/swh/vault/tests/test_server.py +++ b/swh/vault/tests/test_server.py @@ -1,170 +1,188 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import os +from typing import Any, Dict import pytest import yaml from swh.core.api.serializers import json_dumps, msgpack_dumps, msgpack_loads from swh.vault.api.serializers import ENCODERS import swh.vault.api.server -from swh.vault.api.server import app, check_config, make_app_from_configfile +from swh.vault.api.server import app, check_config, get_vault, make_app_from_configfile from swh.vault.tests.test_backend import TEST_SWHID +@pytest.fixture +def swh_vault_server_config(swh_vault_config: Dict[str, Any]) -> Dict[str, Any]: + """Returns a vault server configuration, with ``storage``, ``scheduler`` and + ``cache`` set at the toplevel""" + return { + "vault": {"cls": "local", "db": swh_vault_config["db"]}, + "client_max_size": 1024 ** 3, + **{k: v for k, v in swh_vault_config.items() if k != "db"}, + } + + +@pytest.fixture +def swh_vault_server_config_file(swh_vault_server_config, monkeypatch, tmp_path): + """Creates a vault server configuration file and sets it into SWH_CONFIG_FILENAME""" + conf_path = os.path.join(str(tmp_path), "vault-server.yml") + with open(conf_path, "w") as f: + f.write(yaml.dump(swh_vault_server_config)) + monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) + return conf_path + + def test_make_app_from_file_missing(): with pytest.raises(ValueError, match="Missing configuration path."): make_app_from_configfile() def test_make_app_from_file_does_not_exist(tmp_path): conf_path = os.path.join(str(tmp_path), "vault-server.yml") assert os.path.exists(conf_path) is False with pytest.raises( ValueError, match=f"Configuration path {conf_path} should exist." ): make_app_from_configfile(conf_path) -def test_make_app_from_env_variable(swh_vault_config_file): +def test_make_app_from_env_variable(swh_vault_server_config_file): """Server initialization happens through env variable when no path is provided """ app = make_app_from_configfile() assert app is not None - assert "vault" in app.config + assert get_vault() is not None # Cleanup app del app.config["vault"] swh.vault.api.server.vault = None -def test_make_app_from_file(swh_local_vault_config, tmp_path): +def test_make_app_from_file(swh_vault_server_config, tmp_path): """Server initialization happens through path if provided """ conf_path = os.path.join(str(tmp_path), "vault-server.yml") with open(conf_path, "w") as f: - f.write(yaml.dump(swh_local_vault_config)) + f.write(yaml.dump(swh_vault_server_config)) app = make_app_from_configfile(conf_path) assert app is not None - assert "vault" in app.config + assert get_vault() is not None # Cleanup app del app.config["vault"] swh.vault.api.server.vault = None @pytest.fixture -def vault_app(swh_local_vault_config): - # Set app config - app.config["vault"] = swh_local_vault_config["vault"] - - yield app +def vault_app(swh_vault_server_config_file): + yield make_app_from_configfile() # Cleanup app del app.config["vault"] swh.vault.api.server.vault = None @pytest.fixture def cli(vault_app): cli = vault_app.test_client() return cli def test_client_index(cli): resp = cli.get("/") assert resp.status == "200 OK" def test_client_cook_notfound(cli): resp = cli.post( "/cook", data=json_dumps( {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS ), headers=[("Content-Type", "application/json")], ) assert resp.status == "400 BAD REQUEST" content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == [f"flat {TEST_SWHID} was not found."] def test_client_progress_notfound(cli): resp = cli.post( "/progress", data=json_dumps( {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS ), headers=[("Content-Type", "application/json")], ) assert resp.status == "400 BAD REQUEST" content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == [f"flat {TEST_SWHID} was not found."] def test_client_batch_cook_invalid_type(cli): resp = cli.post( "/batch_cook", data=msgpack_dumps({"batch": [("foobar", [])]}), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status == "400 BAD REQUEST" content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == ["foobar is an unknown type."] def test_client_batch_progress_notfound(cli): resp = cli.post( "/batch_progress", data=msgpack_dumps({"batch_id": 1}), headers={"Content-Type": "application/x-msgpack"}, ) assert resp.status == "400 BAD REQUEST" content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == ["Batch 1 does not exist."] def test_check_config_missing_vault_configuration() -> None: """Irrelevant configuration file path raises""" with pytest.raises(ValueError, match="missing 'vault' configuration"): check_config({}) def test_check_config_not_local() -> None: """Wrong configuration raises""" expected_error = ( "The vault backend can only be started with a 'local' configuration" ) with pytest.raises(EnvironmentError, match=expected_error): check_config({"vault": {"cls": "remote"}}) +def test_check_config_ok(swh_vault_server_config) -> None: + """Check that the default config is accepted""" + assert check_config(swh_vault_server_config) is not None + + @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) -def test_check_config_missing_key(missing_key, swh_vault_config) -> None: - """Any other configuration than 'local' (the default) is rejected""" - config_ok = {"vault": {"cls": "local", **swh_vault_config}} +def test_check_config_missing_key(missing_key, swh_vault_server_config) -> None: + """Check that configs with a missing key get rejected""" + config_ok = swh_vault_server_config config_ko = copy.deepcopy(config_ok) config_ko["vault"].pop(missing_key, None) + config_ko.pop(missing_key, None) expected_error = f"invalid configuration: missing {missing_key} config entry" with pytest.raises(ValueError, match=expected_error): check_config(config_ko) - - -@pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) -def test_check_config_ok(missing_key, swh_vault_config) -> None: - """Any other configuration than 'local' (the default) is rejected""" - config_ok = {"vault": {"cls": "local", **swh_vault_config}} - assert check_config(config_ok) is not None