diff --git a/swh/objstorage/api/server.py b/swh/objstorage/api/server.py index aecb036..a906c27 100644 --- a/swh/objstorage/api/server.py +++ b/swh/objstorage/api/server.py @@ -1,278 +1,271 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import aiohttp.web from swh.core.api.asynchronous import RPCServerApp, decode_request from swh.core.api.asynchronous import encode_data_server as encode_data from swh.core.api.serializers import SWHJSONDecoder, msgpack_loads from swh.core.config import read as config_read from swh.core.statsd import statsd from swh.model import hashutil from swh.objstorage.exc import Error, ObjNotFoundError from swh.objstorage.factory import get_objstorage from swh.objstorage.objstorage import DEFAULT_LIMIT def timed(f): async def w(*a, **kw): with statsd.timed( "swh_objstorage_request_duration_seconds", tags={"endpoint": f.__name__} ): return await f(*a, **kw) return w @timed async def index(request): return aiohttp.web.Response(body="SWH Objstorage API server") @timed async def check_config(request): req = await decode_request(request) return encode_data(request.app["objstorage"].check_config(**req)) @timed async def contains(request): req = await decode_request(request) return encode_data(request.app["objstorage"].__contains__(**req)) @timed async def add_bytes(request): req = await decode_request(request) statsd.increment( "swh_objstorage_in_bytes_total", len(req["content"]), tags={"endpoint": "add_bytes"}, ) return encode_data(request.app["objstorage"].add(**req)) @timed async def add_batch(request): req = await decode_request(request) return encode_data(request.app["objstorage"].add_batch(**req)) @timed async def get_bytes(request): req = await decode_request(request) ret = request.app["objstorage"].get(**req) statsd.increment( "swh_objstorage_out_bytes_total", len(ret), tags={"endpoint": "get_bytes"} ) return encode_data(ret) @timed async def get_batch(request): req = await decode_request(request) return encode_data(request.app["objstorage"].get_batch(**req)) @timed async def check(request): req = await decode_request(request) return encode_data(request.app["objstorage"].check(**req)) @timed async def delete(request): req = await decode_request(request) return encode_data(request.app["objstorage"].delete(**req)) # Management methods @timed async def get_random_contents(request): req = await decode_request(request) return encode_data(request.app["objstorage"].get_random(**req)) # Streaming methods @timed async def add_stream(request): hex_id = request.match_info["hex_id"] obj_id = hashutil.hash_to_bytes(hex_id) check_pres = request.query.get("check_presence", "").lower() == "true" objstorage = request.app["objstorage"] if check_pres and obj_id in objstorage: return encode_data(obj_id) # XXX this really should go in a decode_stream_request coroutine in # swh.core, but since py35 does not support async generators, it cannot # easily be made for now content_type = request.headers.get("Content-Type") if content_type == "application/x-msgpack": decode = msgpack_loads elif content_type == "application/json": decode = lambda x: json.loads(x, cls=SWHJSONDecoder) # noqa else: raise ValueError("Wrong content type `%s` for API request" % content_type) buffer = b"" with objstorage.chunk_writer(obj_id) as write: while not request.content.at_eof(): data, eot = await request.content.readchunk() buffer += data if eot: write(decode(buffer)) buffer = b"" return encode_data(obj_id) @timed async def get_stream(request): hex_id = request.match_info["hex_id"] obj_id = hashutil.hash_to_bytes(hex_id) response = aiohttp.web.StreamResponse() await response.prepare(request) for chunk in request.app["objstorage"].get_stream(obj_id, 2 << 20): await response.write(chunk) await response.write_eof() return response @timed async def list_content(request): last_obj_id = request.query.get("last_obj_id") if last_obj_id: last_obj_id = bytes.fromhex(last_obj_id) limit = int(request.query.get("limit", DEFAULT_LIMIT)) response = aiohttp.web.StreamResponse() response.enable_chunked_encoding() await response.prepare(request) for obj_id in request.app["objstorage"].list_content(last_obj_id, limit=limit): await response.write(obj_id) await response.write_eof() return response def make_app(config): """Initialize the remote api application. """ client_max_size = config.get("client_max_size", 1024 * 1024 * 1024) app = RPCServerApp(client_max_size=client_max_size) app.client_exception_classes = (ObjNotFoundError, Error) # retro compatibility configuration settings app["config"] = config - _cfg = config["objstorage"] - app["objstorage"] = get_objstorage(_cfg["cls"], _cfg["args"]) + app["objstorage"] = get_objstorage(**config["objstorage"]) app.router.add_route("GET", "/", index) app.router.add_route("POST", "/check_config", check_config) app.router.add_route("POST", "/content/contains", contains) app.router.add_route("POST", "/content/add", add_bytes) app.router.add_route("POST", "/content/add/batch", add_batch) app.router.add_route("POST", "/content/get", get_bytes) app.router.add_route("POST", "/content/get/batch", get_batch) app.router.add_route("POST", "/content/get/random", get_random_contents) app.router.add_route("POST", "/content/check", check) app.router.add_route("POST", "/content/delete", delete) app.router.add_route("GET", "/content", list_content) app.router.add_route("POST", "/content/add_stream/{hex_id}", add_stream) app.router.add_route("GET", "/content/get_stream/{hex_id}", get_stream) return app def load_and_check_config(config_file): """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_file (str): Path to the configuration file to load Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_file: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_file): raise FileNotFoundError("Configuration file %s does not exist" % (config_file,)) cfg = config_read(config_file) return validate_config(cfg) def validate_config(cfg): """Check the minimal configuration is set to run the api or raise an explanatory error. Args: cfg (dict): Loaded configuration. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if "objstorage" not in cfg: raise KeyError("Invalid configuration; missing objstorage config entry") missing_keys = [] vcfg = cfg["objstorage"] - for key in ("cls", "args"): - v = vcfg.get(key) - if v is None: - missing_keys.append(key) - - if missing_keys: - raise KeyError( - "Invalid configuration; missing %s config entry" - % (", ".join(missing_keys),) - ) - - cls = vcfg.get("cls") + if "cls" not in vcfg: + raise KeyError("Invalid configuration; missing cls config entry") + + cls = vcfg["cls"] if cls == "pathslicing": - args = vcfg["args"] + # Backwards-compatibility: either get the deprecated `args` from the + # objstorage config, or use the full config itself to check for keys + args = vcfg.get("args", vcfg) for key in ("root", "slicing"): v = args.get(key) if v is None: missing_keys.append(key) if missing_keys: raise KeyError( - "Invalid configuration; missing args.%s config entry" + "Invalid configuration; missing %s config entry" % (", ".join(missing_keys),) ) return cfg def make_app_from_configfile(): """Load configuration and then build application to run """ config_file = os.environ.get("SWH_CONFIG_FILENAME") config = load_and_check_config(config_file) return make_app(config=config) if __name__ == "__main__": print("Deprecated. Use swh-objstorage") diff --git a/swh/objstorage/factory.py b/swh/objstorage/factory.py index a095880..0ffe652 100644 --- a/swh/objstorage/factory.py +++ b/swh/objstorage/factory.py @@ -1,107 +1,118 @@ # Copyright (C) 2016-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Callable, Dict, Union +import warnings from swh.objstorage.api.client import RemoteObjStorage from swh.objstorage.backends.generator import RandomGeneratorObjStorage from swh.objstorage.backends.in_memory import InMemoryObjStorage from swh.objstorage.backends.pathslicing import PathSlicingObjStorage from swh.objstorage.backends.seaweed import WeedObjStorage from swh.objstorage.multiplexer import MultiplexerObjStorage, StripingObjStorage from swh.objstorage.multiplexer.filter import add_filters from swh.objstorage.objstorage import ID_HASH_LENGTH, ObjStorage # noqa __all__ = ["get_objstorage", "ObjStorage"] _STORAGE_CLASSES: Dict[str, Union[type, Callable[..., type]]] = { "pathslicing": PathSlicingObjStorage, "remote": RemoteObjStorage, "memory": InMemoryObjStorage, "weed": WeedObjStorage, "random": RandomGeneratorObjStorage, } _STORAGE_CLASSES_MISSING = {} try: from swh.objstorage.backends.azure import ( AzureCloudObjStorage, PrefixedAzureCloudObjStorage, ) _STORAGE_CLASSES["azure"] = AzureCloudObjStorage _STORAGE_CLASSES["azure-prefixed"] = PrefixedAzureCloudObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING["azure"] = e.args[0] _STORAGE_CLASSES_MISSING["azure-prefixed"] = e.args[0] try: from swh.objstorage.backends.rados import RADOSObjStorage _STORAGE_CLASSES["rados"] = RADOSObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING["rados"] = e.args[0] try: from swh.objstorage.backends.libcloud import ( AwsCloudObjStorage, OpenStackCloudObjStorage, ) _STORAGE_CLASSES["s3"] = AwsCloudObjStorage _STORAGE_CLASSES["swift"] = OpenStackCloudObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING["s3"] = e.args[0] _STORAGE_CLASSES_MISSING["swift"] = e.args[0] -def get_objstorage(cls, args): +def get_objstorage(cls: str, args=None, **kwargs): """ Create an ObjStorage using the given implementation class. Args: - cls (str): objstorage class unique key contained in the + cls: objstorage class unique key contained in the _STORAGE_CLASSES dict. - args (dict): arguments for the required class of objstorage - that must match exactly the one in the `__init__` method of the - class. + kwargs: arguments for the required class of objstorage + that must match exactly the one in the `__init__` method of the + class. Returns: subclass of ObjStorage that match the given `storage_class` argument. Raises: ValueError: if the given storage class is not a valid objstorage key. """ if cls in _STORAGE_CLASSES: - return _STORAGE_CLASSES[cls](**args) + if args is not None: + warnings.warn( + 'Explicit "args" key is deprecated for objstorage initialization, ' + "use class arguments keys directly instead.", + DeprecationWarning, + ) + # TODO: when removing this, drop the "args" backwards compatibility + # from swh.objstorage.api.server configuration checker + kwargs = args + + return _STORAGE_CLASSES[cls](**kwargs) else: raise ValueError( "Storage class {} is not available: {}".format( cls, _STORAGE_CLASSES_MISSING.get(cls, "unknown name") ) ) def _construct_filtered_objstorage(storage_conf, filters_conf): return add_filters(get_objstorage(**storage_conf), filters_conf) _STORAGE_CLASSES["filtered"] = _construct_filtered_objstorage def _construct_multiplexer_objstorage(objstorages): storages = [get_objstorage(**conf) for conf in objstorages] return MultiplexerObjStorage(storages) _STORAGE_CLASSES["multiplexer"] = _construct_multiplexer_objstorage def _construct_striping_objstorage(objstorages): storages = [get_objstorage(**conf) for conf in objstorages] return StripingObjStorage(storages) _STORAGE_CLASSES["striping"] = _construct_striping_objstorage diff --git a/swh/objstorage/tests/test_multiplexer_filter.py b/swh/objstorage/tests/test_multiplexer_filter.py index bf62208..03142a2 100644 --- a/swh/objstorage/tests/test_multiplexer_filter.py +++ b/swh/objstorage/tests/test_multiplexer_filter.py @@ -1,331 +1,332 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random import shutil from string import ascii_lowercase import tempfile import unittest from swh.model import hashutil from swh.objstorage.exc import Error, ObjNotFoundError from swh.objstorage.factory import get_objstorage from swh.objstorage.multiplexer.filter import id_prefix, id_regex, read_only from swh.objstorage.objstorage import compute_hash def get_random_content(): return bytes("".join(random.sample(ascii_lowercase, 10)), "utf8") class MixinTestReadFilter(unittest.TestCase): # Read only filter should not allow writing def setUp(self): super().setUp() self.tmpdir = tempfile.mkdtemp() pstorage = { "cls": "pathslicing", - "args": {"root": self.tmpdir, "slicing": "0:5"}, + "root": self.tmpdir, + "slicing": "0:5", } base_storage = get_objstorage(**pstorage) base_storage.id = compute_hash self.storage = get_objstorage( - "filtered", {"storage_conf": pstorage, "filters_conf": [read_only()]} + "filtered", storage_conf=pstorage, filters_conf=[read_only()] ) self.valid_content = b"pre-existing content" self.invalid_content = b"invalid_content" self.true_invalid_content = b"Anything that is not correct" self.absent_content = b"non-existent content" # Create a valid content. self.valid_id = base_storage.add(self.valid_content) # Create an invalid id and add a content with it. self.invalid_id = base_storage.id(self.true_invalid_content) base_storage.add(self.invalid_content, obj_id=self.invalid_id) # Compute an id for a non-existing content. self.absent_id = base_storage.id(self.absent_content) def tearDown(self): super().tearDown() shutil.rmtree(self.tmpdir) def test_can_contains(self): self.assertTrue(self.valid_id in self.storage) self.assertTrue(self.invalid_id in self.storage) self.assertFalse(self.absent_id in self.storage) def test_can_iter(self): self.assertIn(self.valid_id, iter(self.storage)) self.assertIn(self.invalid_id, iter(self.storage)) def test_can_len(self): self.assertEqual(2, len(self.storage)) def test_can_get(self): self.assertEqual(self.valid_content, self.storage.get(self.valid_id)) self.assertEqual(self.invalid_content, self.storage.get(self.invalid_id)) def test_can_check(self): with self.assertRaises(ObjNotFoundError): self.storage.check(self.absent_id) with self.assertRaises(Error): self.storage.check(self.invalid_id) self.storage.check(self.valid_id) def test_can_get_random(self): self.assertEqual(1, len(list(self.storage.get_random(1)))) self.assertEqual( len(list(self.storage)), len(set(self.storage.get_random(1000))) ) def test_cannot_add(self): new_id = self.storage.add(b"New content") result = self.storage.add(self.valid_content, self.valid_id) self.assertIsNone(new_id, self.storage) self.assertIsNone(result) def test_cannot_restore(self): result = self.storage.restore(self.valid_content, self.valid_id) self.assertIsNone(result) class MixinTestIdFilter: """ Mixin class that tests the filters based on filter.IdFilter Methods "make_valid", "make_invalid" and "filter_storage" must be implemented by subclasses. """ def setUp(self): super().setUp() # Use a hack here : as the mock uses the content as id, it is easy to # create contents that are filtered or not. self.prefix = "71" self.tmpdir = tempfile.mkdtemp() # Make the storage filtered self.sconf = { "cls": "pathslicing", "args": {"root": self.tmpdir, "slicing": "0:5"}, } storage = get_objstorage(**self.sconf) self.base_storage = storage self.storage = self.filter_storage(self.sconf) # Set the id calculators storage.id = compute_hash # Present content with valid id self.present_valid_content = self.ensure_valid(b"yroqdtotji") self.present_valid_id = storage.id(self.present_valid_content) # Present content with invalid id self.present_invalid_content = self.ensure_invalid(b"glxddlmmzb") self.present_invalid_id = storage.id(self.present_invalid_content) # Missing content with valid id self.missing_valid_content = self.ensure_valid(b"rmzkdclkez") self.missing_valid_id = storage.id(self.missing_valid_content) # Missing content with invalid id self.missing_invalid_content = self.ensure_invalid(b"hlejfuginh") self.missing_invalid_id = storage.id(self.missing_invalid_content) # Present corrupted content with valid id self.present_corrupted_valid_content = self.ensure_valid(b"cdsjwnpaij") self.true_present_corrupted_valid_content = self.ensure_valid(b"mgsdpawcrr") self.present_corrupted_valid_id = storage.id( self.true_present_corrupted_valid_content ) # Present corrupted content with invalid id self.present_corrupted_invalid_content = self.ensure_invalid(b"pspjljnrco") self.true_present_corrupted_invalid_content = self.ensure_invalid(b"rjocbnnbso") self.present_corrupted_invalid_id = storage.id( self.true_present_corrupted_invalid_content ) # Missing (potentially) corrupted content with valid id self.missing_corrupted_valid_content = self.ensure_valid(b"zxkokfgtou") self.true_missing_corrupted_valid_content = self.ensure_valid(b"royoncooqa") self.missing_corrupted_valid_id = storage.id( self.true_missing_corrupted_valid_content ) # Missing (potentially) corrupted content with invalid id self.missing_corrupted_invalid_content = self.ensure_invalid(b"hxaxnrmnyk") self.true_missing_corrupted_invalid_content = self.ensure_invalid(b"qhbolyuifr") self.missing_corrupted_invalid_id = storage.id( self.true_missing_corrupted_invalid_content ) # Add the content that are supposed to be present self.storage.add(self.present_valid_content) self.storage.add(self.present_invalid_content) self.storage.add( self.present_corrupted_valid_content, obj_id=self.present_corrupted_valid_id ) self.storage.add( self.present_corrupted_invalid_content, obj_id=self.present_corrupted_invalid_id, ) def tearDown(self): super().tearDown() shutil.rmtree(self.tmpdir) def filter_storage(self, sconf): raise NotImplementedError( "Id_filter test class must have a filter_storage method" ) def ensure_valid(self, content=None): if content is None: content = get_random_content() while not self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content def ensure_invalid(self, content=None): if content is None: content = get_random_content() while self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content def test_contains(self): # Both contents are present, but the invalid one should be ignored. self.assertTrue(self.present_valid_id in self.storage) self.assertFalse(self.present_invalid_id in self.storage) self.assertFalse(self.missing_valid_id in self.storage) self.assertFalse(self.missing_invalid_id in self.storage) self.assertTrue(self.present_corrupted_valid_id in self.storage) self.assertFalse(self.present_corrupted_invalid_id in self.storage) self.assertFalse(self.missing_corrupted_valid_id in self.storage) self.assertFalse(self.missing_corrupted_invalid_id in self.storage) def test_iter(self): self.assertIn(self.present_valid_id, iter(self.storage)) self.assertNotIn(self.present_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_valid_id, iter(self.storage)) self.assertNotIn(self.missing_invalid_id, iter(self.storage)) self.assertIn(self.present_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage)) def test_len(self): # Four contents are present, but only two should be valid. self.assertEqual(2, len(self.storage)) def test_get(self): self.assertEqual( self.present_valid_content, self.storage.get(self.present_valid_id) ) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_invalid_id) self.assertEqual( self.present_corrupted_valid_content, self.storage.get(self.present_corrupted_valid_id), ) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_invalid_id) def test_check(self): self.storage.check(self.present_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_invalid_id) with self.assertRaises(Error): self.storage.check(self.present_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_invalid_id) def test_get_random(self): self.assertEqual(0, len(list(self.storage.get_random(0)))) random_content = list(self.storage.get_random(1000)) self.assertIn(self.present_valid_id, random_content) self.assertNotIn(self.present_invalid_id, random_content) self.assertNotIn(self.missing_valid_id, random_content) self.assertNotIn(self.missing_invalid_id, random_content) self.assertIn(self.present_corrupted_valid_id, random_content) self.assertNotIn(self.present_corrupted_invalid_id, random_content) self.assertNotIn(self.missing_corrupted_valid_id, random_content) self.assertNotIn(self.missing_corrupted_invalid_id, random_content) def test_add(self): # Add valid and invalid contents to the storage and check their # presence with the unfiltered storage. valid_content = self.ensure_valid(b"ulepsrjbgt") valid_id = self.base_storage.id(valid_content) invalid_content = self.ensure_invalid(b"znvghkjked") invalid_id = self.base_storage.id(invalid_content) self.storage.add(valid_content) self.storage.add(invalid_content) self.assertTrue(valid_id in self.base_storage) self.assertFalse(invalid_id in self.base_storage) def test_restore(self): # Add corrupted content to the storage and the try to restore it valid_content = self.ensure_valid(b"ulepsrjbgt") valid_id = self.base_storage.id(valid_content) corrupted_content = self.ensure_valid(b"ltjkjsloyb") corrupted_id = self.base_storage.id(corrupted_content) self.storage.add(corrupted_content, obj_id=valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(corrupted_id) with self.assertRaises(Error): self.storage.check(valid_id) self.storage.restore(valid_content) self.storage.check(valid_id) class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.prefix = b"71" super().setUp() def ensure_valid(self, content): obj_id = compute_hash(content) hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertTrue(hex_obj_id.startswith(self.prefix)) return content def ensure_invalid(self, content): obj_id = compute_hash(content) hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertFalse(hex_obj_id.startswith(self.prefix)) return content def filter_storage(self, sconf): return get_objstorage( "filtered", {"storage_conf": sconf, "filters_conf": [id_prefix(self.prefix)]}, ) class TestRegexFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.regex = r"[a-f][0-9].*" super().setUp() def filter_storage(self, sconf): return get_objstorage( "filtered", {"storage_conf": sconf, "filters_conf": [id_regex(self.regex)]} ) diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py index b3abc12..7abde98 100644 --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -1,52 +1,50 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import shutil import tempfile import unittest import pytest from swh.core.api.tests.server_testing import ServerTestFixtureAsync from swh.objstorage.api.server import make_app from swh.objstorage.factory import get_objstorage from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture class TestRemoteObjStorage( ServerTestFixtureAsync, ObjStorageTestFixture, unittest.TestCase ): """ Test the remote archive API. """ def setUp(self): self.tmpdir = tempfile.mkdtemp() self.config = { "objstorage": { "cls": "pathslicing", - "args": { - "root": self.tmpdir, - "slicing": "0:1/0:5", - "allow_delete": True, - }, + "root": self.tmpdir, + "slicing": "0:1/0:5", + "allow_delete": True, }, "client_max_size": 8 * 1024 * 1024, } self.app = make_app(self.config) super().setUp() self.storage = get_objstorage("remote", {"url": self.url()}) def tearDown(self): super().tearDown() shutil.rmtree(self.tmpdir) @pytest.mark.skip("makes no sense to test this for the remote api") def test_delete_not_allowed(self): pass @pytest.mark.skip("makes no sense to test this for the remote api") def test_delete_not_allowed_by_default(self): pass diff --git a/swh/objstorage/tests/test_server.py b/swh/objstorage/tests/test_server.py index 638199c..77f2ccf 100644 --- a/swh/objstorage/tests/test_server.py +++ b/swh/objstorage/tests/test_server.py @@ -1,115 +1,126 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import pytest import yaml from swh.objstorage.api.server import load_and_check_config def prepare_config_file(tmpdir, content, name="config.yml"): """Prepare configuration file in `$tmpdir/name` with content `content`. Args: tmpdir (LocalPath): root directory content (str/dict): Content of the file either as string or as a dict. If a dict, converts the dict into a yaml string. name (str): configuration filename Returns path (str) of the configuration file prepared. """ config_path = tmpdir / name if isinstance(content, dict): # convert if needed content = yaml.dump(content) config_path.write_text(content, encoding="utf-8") # pytest on python3.5 does not support LocalPath manipulation, so # convert path to string return str(config_path) def test_load_and_check_config_no_configuration(): """Inexistent configuration files raises""" with pytest.raises(EnvironmentError) as e: load_and_check_config(None) assert e.value.args[0] == "Configuration file must be defined" config_path = "/indexer/inexistent/config.yml" with pytest.raises(FileNotFoundError) as e: load_and_check_config(config_path) assert e.value.args[0] == "Configuration file %s does not exist" % (config_path,) def test_load_and_check_config_invalid_configuration_toplevel(tmpdir): """Invalid configuration raises""" config = {"something": "useless"} config_path = prepare_config_file(tmpdir, content=config) with pytest.raises(KeyError) as e: load_and_check_config(config_path) assert e.value.args[0] == "Invalid configuration; missing objstorage config entry" def test_load_and_check_config_invalid_configuration(tmpdir): """Invalid configuration raises""" - for data, missing_keys in [ - ({"objstorage": {"something": "useless"}}, ["cls", "args"]), - ({"objstorage": {"cls": "something"}}, ["args"]), - ]: - config_path = prepare_config_file(tmpdir, content=data) - with pytest.raises(KeyError) as e: - load_and_check_config(config_path) + config_path = prepare_config_file( + tmpdir, content={"objstorage": {"something": "useless"}} + ) + with pytest.raises(KeyError) as e: + load_and_check_config(config_path) - assert e.value.args[0] == "Invalid configuration; missing %s config entry" % ( - ", ".join(missing_keys), - ) + assert "missing cls config entry" in e.value.args[0] def test_load_and_check_config_invalid_configuration_level2(tmpdir): """Invalid configuration at 2nd level raises""" config = { "objstorage": { "cls": "pathslicing", "args": {"root": "root", "slicing": "slicing",}, "client_max_size": "10", } } for key in ("root", "slicing"): c = copy.deepcopy(config) c["objstorage"]["args"].pop(key) config_path = prepare_config_file(tmpdir, c) with pytest.raises(KeyError) as e: load_and_check_config(config_path) - assert ( - e.value.args[0] - == "Invalid configuration; missing args.%s config entry" % key - ) + assert "missing %s config entry" % key in e.value.args[0] def test_load_and_check_config_fine(tmpdir): """pathslicing configuration fine loads ok""" config = { "objstorage": { "cls": "pathslicing", "args": {"root": "root", "slicing": "slicing",}, } } config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path) assert cfg == config +def test_load_and_check_config_no_args(tmpdir): + """pathslicing configuration with no args key loads ok""" + config = { + "objstorage": {"cls": "pathslicing", "root": "root", "slicing": "slicing"} + } + + config_path = prepare_config_file(tmpdir, config) + cfg = load_and_check_config(config_path) + assert cfg == config + + def test_load_and_check_config_fine2(tmpdir): - config = {"client_max_size": "10", "objstorage": {"cls": "remote", "args": {}}} + config = {"client_max_size": "10", "objstorage": {"cls": "memory", "args": {}}} + config_path = prepare_config_file(tmpdir, config) + cfg = load_and_check_config(config_path) + assert cfg == config + + +def test_load_and_check_config_only_cls(tmpdir): + config = {"client_max_size": "10", "objstorage": {"cls": "memory"}} config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path) assert cfg == config