diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py --- a/swh/storage/__init__.py +++ b/swh/storage/__init__.py @@ -58,10 +58,17 @@ (module_path, class_name) = class_path.rsplit(".", 1) module = importlib.import_module(module_path, package=__package__) Storage = getattr(module, class_name) - return Storage(**kwargs) + check_config = kwargs.pop("check_config", {}) + storage = Storage(**kwargs) + if check_config: + if not storage.check_config(**check_config): + raise EnvironmentError("storage check config failed") + return storage -def get_storage_pipeline(steps: List[Dict[str, Any]]) -> StorageInterface: +def get_storage_pipeline( + steps: List[Dict[str, Any]], check_config=None +) -> StorageInterface: """Recursively get a storage object that may use other storage objects as backends. @@ -88,6 +95,7 @@ } if storage_config: step["storage"] = storage_config + step["check_config"] = check_config storage_config = step if storage_config is None: diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -43,6 +43,7 @@ dbname="tests", ), "objstorage": {"cls": "memory", "args": {}}, + "check_config": {"check_write": True}, } diff --git a/swh/storage/tests/test_init.py b/swh/storage/tests/test_init.py --- a/swh/storage/tests/test_init.py +++ b/swh/storage/tests/test_init.py @@ -7,9 +7,10 @@ from unittest.mock import patch +from swh.core.pytest_plugin import RPCTestAdapter from swh.storage import get_storage - -from swh.storage.api.client import RemoteStorage +from swh.storage.api import server +from swh.storage.api import client from swh.storage.postgresql.storage import Storage as DbStorage from swh.storage.in_memory import InMemoryStorage from swh.storage.buffer import BufferingProxyStorage @@ -17,49 +18,45 @@ from swh.storage.retry import RetryingProxyStorage -@patch("swh.storage.postgresql.storage.psycopg2.pool") -def test_get_storage(mock_pool): - """Instantiating an existing storage should be ok - - """ - mock_pool.ThreadedConnectionPool.return_value = None - for cls, real_class, dummy_args in [ - ("remote", RemoteStorage, {"url": "url"}), +STORAGES = [ + pytest.param(cls, real_class, kwargs, id=cls) + for (cls, real_class, kwargs) in [ + ("remote", client.RemoteStorage, {"url": "url"}), ("memory", InMemoryStorage, {}), ( "local", DbStorage, - {"db": "postgresql://db", "objstorage": {"cls": "memory", "args": {},},}, + {"db": "postgresql://db", "objstorage": {"cls": "memory", "args": {}}}, ), ("filter", FilteringProxyStorage, {"storage": {"cls": "memory"}}), ("buffer", BufferingProxyStorage, {"storage": {"cls": "memory"}}), ("retry", RetryingProxyStorage, {"storage": {"cls": "memory"}}), - ]: - actual_storage = get_storage(cls, **dummy_args) - assert actual_storage is not None - assert isinstance(actual_storage, real_class) + ] +] +@pytest.mark.parametrize("cls,real_class,args", STORAGES) @patch("swh.storage.postgresql.storage.psycopg2.pool") -def test_get_storage_legacy_args(mock_pool): +def test_get_storage(mock_pool, cls, real_class, args): + """Instantiating an existing storage should be ok + + """ + mock_pool.ThreadedConnectionPool.return_value = None + actual_storage = get_storage(cls, **args) + assert actual_storage is not None + assert isinstance(actual_storage, real_class) + + +@pytest.mark.parametrize("cls,real_class,args", STORAGES) +@patch("swh.storage.postgresql.storage.psycopg2.pool") +def test_get_storage_legacy_args(mock_pool, cls, real_class, args): """Instantiating an existing storage should be ok even with the legacy explicit 'args' keys """ mock_pool.ThreadedConnectionPool.return_value = None - for cls, real_class, dummy_args in [ - ("remote", RemoteStorage, {"url": "url"}), - ("memory", InMemoryStorage, {}), - ( - "local", - DbStorage, - {"db": "postgresql://db", "objstorage": {"cls": "memory", "args": {},},}, - ), - ("filter", FilteringProxyStorage, {"storage": {"cls": "memory", "args": {}}}), - ("buffer", BufferingProxyStorage, {"storage": {"cls": "memory", "args": {}}}), - ]: - with pytest.warns(DeprecationWarning): - actual_storage = get_storage(cls, args=dummy_args) + with pytest.warns(DeprecationWarning): + actual_storage = get_storage(cls, args=args) assert actual_storage is not None assert isinstance(actual_storage, real_class) @@ -105,3 +102,133 @@ assert isinstance(storage, FilteringProxyStorage) assert isinstance(storage.storage, BufferingProxyStorage) assert isinstance(storage.storage.storage, InMemoryStorage) + + +# get_storage's check_config argument tests + +# the "remote" and "pipeline" cases are tested in dedicated test functions below +@pytest.mark.parametrize( + "cls,real_class,kwargs", [x for x in STORAGES if x.id not in ("remote", "local")] +) +def test_get_storage_check_config(cls, real_class, kwargs, monkeypatch): + """Instantiating an existing storage with check_config should be ok + + """ + check_backend_check_config(monkeypatch, dict(cls=cls, **kwargs)) + + +@patch("swh.storage.postgresql.storage.psycopg2.pool") +def test_get_storage_local_check_config(mock_pool, monkeypatch): + """Instantiating a local storage with check_config should be ok + + """ + mock_pool.ThreadedConnectionPool.return_value = None + check_backend_check_config( + monkeypatch, + { + "cls": "local", + "db": "postgresql://db", + "objstorage": {"cls": "memory", "args": {}}, + }, + backend_storage_cls=DbStorage, + ) + + +def test_get_storage_pipeline_check_config(monkeypatch): + """Test that the check_config option works as intended for a pipelined storage""" + config = { + "cls": "pipeline", + "steps": [ + {"cls": "filter",}, + {"cls": "buffer", "min_batch_size": {"content": 10,},}, + {"cls": "memory",}, + ], + } + check_backend_check_config( + monkeypatch, config, + ) + + +def test_get_storage_remote_check_config(monkeypatch): + """Test that the check_config option works as intended for a remote storage""" + + monkeypatch.setattr( + server, "storage", get_storage(cls="memory", journal_writer={"cls": "memory"}) + ) + test_client = server.app.test_client() + + class MockedRemoteStorage(client.RemoteStorage): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.session.adapters.clear() + self.session.mount("mock://", RPCTestAdapter(test_client)) + + monkeypatch.setattr(client, "RemoteStorage", MockedRemoteStorage) + + config = { + "cls": "remote", + "url": "mock://example.com", + } + check_backend_check_config( + monkeypatch, config, + ) + + +def check_backend_check_config( + monkeypatch, config, backend_storage_cls=InMemoryStorage +): + """Check the staged/indirect storage (pipeline or remote) works + as desired with regard to the check_config option of the get_storage() + factory function. + + If set, the check_config argument is used to call the Storage.check_config() at + instantiation time in the get_storage() fctory function. This is supposed to be + passed to each step of the Storage pipeline to check if the actual backend (in + memory in this test) check_config() method is properly called and that it check for + read/write access to the backend storage. + + monkeypatch is supposed to be the monkeypatch pytest fixture to be used from the + calling test_ function. + + config is the config dict passed to get_storage() + """ + access = None + + def mockcheck(self, check_write=False): + if access == "none": + return False + if access == "read": + return check_write is False + if access == "write": + return True + + monkeypatch.setattr(backend_storage_cls, "check_config", mockcheck) + + # simulate no read nor write access to the underlying (memory) storage + access = "none" + # by default, no check, so no complain + assert get_storage(**config) + # if asked to check, complain + with pytest.raises(EnvironmentError): + get_storage(check_config={"check_write": False}, **config) + with pytest.raises(EnvironmentError): + get_storage(check_config={"check_write": True}, **config) + + # simulate no write access to the underlying (memory) storage + access = "read" + # by default, no check so no complain + assert get_storage(**config) + # if asked to check for read access, no complain + get_storage(check_config={"check_write": False}, **config) + # if asked to check for write access, complain + with pytest.raises(EnvironmentError): + get_storage(check_config={"check_write": True}, **config) + + # simulate read & write access to the underlying (memory) storage + access = "write" + # by default, no check so no complain + assert get_storage(**config) + # if asked to check for read access, no complain + get_storage(check_config={"check_write": False}, **config) + # if asked to check for write access, no complain + get_storage(check_config={"check_write": True}, **config)