diff --git a/PKG-INFO b/PKG-INFO index 7cf58bb..04a580f 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,42 +1,42 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 1.5.0 +Version: 1.6.0 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS Software Heritage - Vault ========================= User-facing service that allows to retrieve parts of the archive as self-contained bundles (e.g., individual releases, entire repository snapshots, etc.) The creation of a bundle is called "cooking" a bundle. Architecture ------------ The vault is made of two main parts: 1. a stateful RPC server called the **backend** 2. Celery tasks, called **cookers** diff --git a/swh.vault.egg-info/PKG-INFO b/swh.vault.egg-info/PKG-INFO index 7cf58bb..04a580f 100644 --- a/swh.vault.egg-info/PKG-INFO +++ b/swh.vault.egg-info/PKG-INFO @@ -1,42 +1,42 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 1.5.0 +Version: 1.6.0 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS Software Heritage - Vault ========================= User-facing service that allows to retrieve parts of the archive as self-contained bundles (e.g., individual releases, entire repository snapshots, etc.) The creation of a bundle is called "cooking" a bundle. Architecture ------------ The vault is made of two main parts: 1. a stateful RPC server called the **backend** 2. Celery tasks, called **cookers** diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py index ddd2504..15f6f75 100644 --- a/swh/vault/api/server.py +++ b/swh/vault/api/server.py @@ -1,124 +1,116 @@ -# Copyright (C) 2016-2020 The Software Heritage developers +# Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations -import asyncio import os from typing import Any, Dict, Optional -import aiohttp.web - -from swh.core.api.asynchronous import RPCServerApp +from swh.core.api import RPCServerApp +from swh.core.api import encode_data_server as encode_data +from swh.core.api import error_handler from swh.core.config import config_basepath, merge_configs, read_raw_config from swh.vault import get_vault as get_swhvault from swh.vault.backend import NotFoundExc from swh.vault.interface import VaultInterface from .serializers import DECODERS, ENCODERS # do not define default services here DEFAULT_CONFIG = { "client_max_size": 1024 ** 3, } -vault = None -app = None - -def get_vault(config: Optional[Dict[str, Any]] = None) -> VaultInterface: +def get_vault(): global vault if not vault: - assert config is not None - vault = get_swhvault(**config) + vault = get_swhvault(**app.config["vault"]) + return vault class VaultServerApp(RPCServerApp): client_exception_classes = (NotFoundExc,) extra_type_decoders = DECODERS extra_type_encoders = ENCODERS -@asyncio.coroutine -def index(request): - return aiohttp.web.Response(body="SWH Vault API server") +vault = None +app = VaultServerApp(__name__, backend_class=VaultInterface, backend_factory=get_vault,) + + +@app.errorhandler(NotFoundExc) +def argument_error_handler(exception): + return error_handler(exception, encode_data, status_code=400) + + +@app.errorhandler(Exception) +def my_error_handler(exception): + return error_handler(exception, encode_data) + + +@app.route("/") +def index(): + return "SWH Vault API server" def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]: """Ensure the configuration is ok to run a local vault server, and propagate defaults. Raises: EnvironmentError if the configuration is not for local instance ValueError if one of the following keys is missing: vault, cache, storage, scheduler Returns: New configuration dict to instantiate a local vault server instance. """ cfg = cfg.copy() if "vault" not in cfg: raise ValueError("missing 'vault' configuration") vcfg = cfg["vault"] if vcfg["cls"] != "local": raise EnvironmentError( "The vault backend can only be started with a 'local' configuration", ) # TODO: Soft-deprecation of args key. Remove when ready. vcfg.update(vcfg.get("args", {})) # Default to top-level value if any vcfg = {**cfg, **vcfg} for key in ("cache", "storage", "scheduler"): if not vcfg.get(key): raise ValueError(f"invalid configuration: missing {key} config entry.") return vcfg -def make_app(config: Dict[str, Any]) -> VaultServerApp: - """Ensure the configuration is ok, then instantiate the server application - - """ - config = check_config(config) - app = VaultServerApp( - __name__, - backend_class=VaultInterface, - backend_factory=lambda: get_vault(config), - client_max_size=config["client_max_size"], - ) - app.router.add_route("GET", "/", index) - return app - - def make_app_from_configfile( config_path: Optional[str] = None, **kwargs ) -> VaultServerApp: """Load and check configuration if ok, then instantiate (once) a vault server application. """ - global app - if not app: - config_path = os.environ.get("SWH_CONFIG_FILENAME", config_path) - if not config_path: - raise ValueError("Missing configuration path.") - if not os.path.isfile(config_path): - raise ValueError(f"Configuration path {config_path} should exist.") - - app_config = read_raw_config(config_basepath(config_path)) - app_config = merge_configs(DEFAULT_CONFIG, app_config) - app = make_app(app_config) + config_path = os.environ.get("SWH_CONFIG_FILENAME", config_path) + if not config_path: + raise ValueError("Missing configuration path.") + if not os.path.isfile(config_path): + raise ValueError(f"Configuration path {config_path} should exist.") + + app_config = read_raw_config(config_basepath(config_path)) + app.config.update(merge_configs(DEFAULT_CONFIG, app_config)) return app if __name__ == "__main__": print("Deprecated. Use swh-vault ") diff --git a/swh/vault/cache.py b/swh/vault/cache.py index f805de1..5599c29 100644 --- a/swh/vault/cache.py +++ b/swh/vault/cache.py @@ -1,47 +1,43 @@ # Copyright (C) 2016-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.model import hashutil from swh.model.swhids import CoreSWHID from swh.objstorage.factory import get_objstorage from swh.objstorage.objstorage import compute_hash class VaultCache: """The Vault cache is an object storage that stores Vault bundles. This implementation computes sha1(':') as the internal identifiers used in the underlying objstorage. """ def __init__(self, **objstorage): self.objstorage = get_objstorage(**objstorage) def add(self, bundle_type, swhid: CoreSWHID, content): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.add(content, sid) def get(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.get(hashutil.hash_to_bytes(sid)) def delete(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.delete(hashutil.hash_to_bytes(sid)) - def add_stream(self, bundle_type, swhid: CoreSWHID, content_iter): - sid = self._get_internal_id(bundle_type, swhid) - return self.objstorage.add_stream(content_iter, sid) - def get_stream(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.get_stream(hashutil.hash_to_bytes(sid)) def is_cached(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return hashutil.hash_to_bytes(sid) in self.objstorage def _get_internal_id(self, bundle_type, swhid: CoreSWHID): return compute_hash("{}:{}".format(bundle_type, swhid).encode()) diff --git a/swh/vault/tests/test_server.py b/swh/vault/tests/test_server.py index fbd5c12..ddad45f 100644 --- a/swh/vault/tests/test_server.py +++ b/swh/vault/tests/test_server.py @@ -1,166 +1,170 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import os -from typing import Any, Dict import pytest import yaml from swh.core.api.serializers import json_dumps, msgpack_dumps, msgpack_loads from swh.vault.api.serializers import ENCODERS import swh.vault.api.server -from swh.vault.api.server import ( - VaultServerApp, - check_config, - make_app, - make_app_from_configfile, -) +from swh.vault.api.server import app, check_config, make_app_from_configfile from swh.vault.tests.test_backend import TEST_SWHID def test_make_app_from_file_missing(): with pytest.raises(ValueError, match="Missing configuration path."): make_app_from_configfile() def test_make_app_from_file_does_not_exist(tmp_path): conf_path = os.path.join(str(tmp_path), "vault-server.yml") assert os.path.exists(conf_path) is False with pytest.raises( ValueError, match=f"Configuration path {conf_path} should exist." ): make_app_from_configfile(conf_path) def test_make_app_from_env_variable(swh_vault_config_file): """Server initialization happens through env variable when no path is provided """ app = make_app_from_configfile() assert app is not None + assert "vault" in app.config + + # Cleanup app + del app.config["vault"] + swh.vault.api.server.vault = None def test_make_app_from_file(swh_local_vault_config, tmp_path): """Server initialization happens through path if provided """ conf_path = os.path.join(str(tmp_path), "vault-server.yml") with open(conf_path, "w") as f: f.write(yaml.dump(swh_local_vault_config)) app = make_app_from_configfile(conf_path) assert app is not None + assert "vault" in app.config + + # Cleanup app + del app.config["vault"] + swh.vault.api.server.vault = None @pytest.fixture -def async_app(swh_local_vault_config: Dict[str, Any],) -> VaultServerApp: - """Instantiate the vault server application. +def vault_app(swh_local_vault_config): + # Set app config + app.config["vault"] = swh_local_vault_config["vault"] - Note: This requires the db setup to run (fixture swh_vault in charge of this) + yield app - """ - # make sure a new VaultBackend is instantiated for each test to prevent - # side effects between tests + # Cleanup app + del app.config["vault"] swh.vault.api.server.vault = None - return make_app(swh_local_vault_config) @pytest.fixture -def cli(async_app, aiohttp_client, loop): - return loop.run_until_complete(aiohttp_client(async_app)) +def cli(vault_app): + cli = vault_app.test_client() + return cli -async def test_client_index(cli): - resp = await cli.get("/") - assert resp.status == 200 +def test_client_index(cli): + resp = cli.get("/") + assert resp.status == "200 OK" -async def test_client_cook_notfound(cli): - resp = await cli.post( +def test_client_cook_notfound(cli): + resp = cli.post( "/cook", data=json_dumps( {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS ), headers=[("Content-Type", "application/json")], ) - assert resp.status == 400 - content = msgpack_loads(await resp.content.read()) + assert resp.status == "400 BAD REQUEST" + content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == [f"flat {TEST_SWHID} was not found."] -async def test_client_progress_notfound(cli): - resp = await cli.post( +def test_client_progress_notfound(cli): + resp = cli.post( "/progress", data=json_dumps( {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS ), headers=[("Content-Type", "application/json")], ) - assert resp.status == 400 - content = msgpack_loads(await resp.content.read()) + assert resp.status == "400 BAD REQUEST" + content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == [f"flat {TEST_SWHID} was not found."] -async def test_client_batch_cook_invalid_type(cli): - resp = await cli.post( +def test_client_batch_cook_invalid_type(cli): + resp = cli.post( "/batch_cook", data=msgpack_dumps({"batch": [("foobar", [])]}), headers={"Content-Type": "application/x-msgpack"}, ) - assert resp.status == 400 - content = msgpack_loads(await resp.content.read()) + assert resp.status == "400 BAD REQUEST" + content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == ["foobar is an unknown type."] -async def test_client_batch_progress_notfound(cli): - resp = await cli.post( +def test_client_batch_progress_notfound(cli): + resp = cli.post( "/batch_progress", data=msgpack_dumps({"batch_id": 1}), headers={"Content-Type": "application/x-msgpack"}, ) - assert resp.status == 400 - content = msgpack_loads(await resp.content.read()) + assert resp.status == "400 BAD REQUEST" + content = msgpack_loads(resp.data) assert content["type"] == "NotFoundExc" assert content["args"] == ["Batch 1 does not exist."] def test_check_config_missing_vault_configuration() -> None: """Irrelevant configuration file path raises""" with pytest.raises(ValueError, match="missing 'vault' configuration"): check_config({}) def test_check_config_not_local() -> None: """Wrong configuration raises""" expected_error = ( "The vault backend can only be started with a 'local' configuration" ) with pytest.raises(EnvironmentError, match=expected_error): check_config({"vault": {"cls": "remote"}}) @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) def test_check_config_missing_key(missing_key, swh_vault_config) -> None: """Any other configuration than 'local' (the default) is rejected""" config_ok = {"vault": {"cls": "local", **swh_vault_config}} config_ko = copy.deepcopy(config_ok) config_ko["vault"].pop(missing_key, None) expected_error = f"invalid configuration: missing {missing_key} config entry" with pytest.raises(ValueError, match=expected_error): check_config(config_ko) @pytest.mark.parametrize("missing_key", ["storage", "cache", "scheduler"]) def test_check_config_ok(missing_key, swh_vault_config) -> None: """Any other configuration than 'local' (the default) is rejected""" config_ok = {"vault": {"cls": "local", **swh_vault_config}} assert check_config(config_ok) is not None