diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ python-magic >= 0.4.13 pyld xmltodict +typing-extensions diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -3,15 +3,17 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - from collections import Counter +from importlib import import_module import json from typing import Dict, Iterable, List, Optional, Tuple, Union +import warnings import psycopg2 import psycopg2.pool from swh.core.db.common import db_transaction +from swh.indexer.storage.interface import IndexerStorageInterface from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import SHA1_SIZE from swh.storage.exc import StorageDBError @@ -38,32 +40,52 @@ MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"] -def get_indexer_storage(cls, args): - """Get an indexer storage object of class `storage_class` with - arguments `storage_args`. +SERVER_IMPLEMENTATIONS: Dict[str, str] = { + "local": ".IndexerStorage", + "remote": ".api.client.RemoteStorage", + "memory": ".in_memory.IndexerStorage", +} + + +def get_indexer_storage(cls: str, **kwargs) -> IndexerStorageInterface: + """Instantiate an indexer storage implementation of class `cls` with arguments + `kwargs`. Args: - cls (str): storage's class, either 'local' or 'remote' - args (dict): dictionary of arguments passed to the - storage class constructor + cls: indexer storage class (local, remote or memory) + kwargs: dictionary of arguments passed to the + indexer storage class constructor Returns: - an instance of swh.indexer's storage (either local or remote) + an instance of swh.indexer.storage Raises: ValueError if passed an unknown storage class. """ - if cls == "remote": - from .api.client import RemoteStorage as IndexerStorage - elif cls == "local": - from . import IndexerStorage - elif cls == "memory": - from .in_memory import IndexerStorage - else: - raise ValueError("Unknown indexer storage class `%s`" % cls) - - return IndexerStorage(**args) + if "args" in kwargs: + warnings.warn( + 'Explicit "args" key is deprecated, use keys directly instead.', + DeprecationWarning, + ) + kwargs = kwargs["args"] + + class_path = SERVER_IMPLEMENTATIONS.get(cls) + if class_path is None: + raise ValueError( + f"Unknown indexer storage class `{cls}`. " + f"Supported: {', '.join(SERVER_IMPLEMENTATIONS)}" + ) + + (module_path, class_name) = class_path.rsplit(".", 1) + module = import_module(module_path if module_path else ".", package=__package__) + BackendClass = getattr(module, class_name) + check_config = kwargs.pop("check_config", {}) + idx_storage = BackendClass(**kwargs) + if check_config: + if not idx_storage.check_config(**check_config): + raise EnvironmentError("Indexer storage check config failed") + return idx_storage def check_id_duplicates(data): diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py --- a/swh/indexer/storage/interface.py +++ b/swh/indexer/storage/interface.py @@ -5,6 +5,8 @@ from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union +from typing_extensions import Protocol, runtime_checkable + from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult from swh.indexer.storage.model import ( @@ -24,7 +26,8 @@ Sha1 = bytes -class IndexerStorageInterface: +@runtime_checkable +class IndexerStorageInterface(Protocol): @remote_api_endpoint("check_config") def check_config(self, *, check_write): """Check that the storage is configured and ready to go.""" diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py --- a/swh/indexer/tests/conftest.py +++ b/swh/indexer/tests/conftest.py @@ -42,7 +42,7 @@ indexers classes. """ - idx_storage = get_indexer_storage("memory", {}) + idx_storage = get_indexer_storage("memory") with patch("swh.indexer.storage.in_memory.IndexerStorage") as idx_storage_mock: idx_storage_mock.return_value = idx_storage yield idx_storage diff --git a/swh/indexer/tests/storage/conftest.py b/swh/indexer/tests/storage/conftest.py --- a/swh/indexer/tests/storage/conftest.py +++ b/swh/indexer/tests/storage/conftest.py @@ -73,8 +73,4 @@ @pytest.fixture def swh_indexer_storage(swh_indexer_storage_postgresql): - storage_config = { - "cls": "local", - "args": {"db": swh_indexer_storage_postgresql.dsn,}, - } - return get_indexer_storage(**storage_config) + return get_indexer_storage("local", db=swh_indexer_storage_postgresql.dsn) diff --git a/swh/indexer/tests/storage/test_api_client.py b/swh/indexer/tests/storage/test_api_client.py --- a/swh/indexer/tests/storage/test_api_client.py +++ b/swh/indexer/tests/storage/test_api_client.py @@ -14,11 +14,7 @@ @pytest.fixture def app(swh_indexer_storage_postgresql): - storage_config = { - "cls": "local", - "args": {"db": swh_indexer_storage_postgresql.dsn,}, - } - server.storage = get_indexer_storage(**storage_config) + server.storage = get_indexer_storage("local", db=swh_indexer_storage_postgresql.dsn) return server.app diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py --- a/swh/indexer/tests/storage/test_in_memory.py +++ b/swh/indexer/tests/storage/test_in_memory.py @@ -12,8 +12,4 @@ @pytest.fixture def swh_indexer_storage(): - storage_config = { - "cls": "memory", - "args": {}, - } - return get_indexer_storage(**storage_config) + return get_indexer_storage("memory") diff --git a/swh/indexer/tests/storage/test_init.py b/swh/indexer/tests/storage/test_init.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/storage/test_init.py @@ -0,0 +1,89 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import inspect + +import pytest + +from swh.indexer.storage import IndexerStorage, get_indexer_storage +from swh.indexer.storage.api.client import RemoteStorage +from swh.indexer.storage.in_memory import IndexerStorage as MemoryIndexerStorage +from swh.indexer.storage.interface import IndexerStorageInterface + +SERVER_IMPLEMENTATIONS_KWARGS = [ + ("remote", RemoteStorage, {"url": "localhost"}), + ("local", IndexerStorage, {"db": "something"}), +] + +SERVER_IMPLEMENTATIONS = SERVER_IMPLEMENTATIONS_KWARGS + [ + ("memory", MemoryIndexerStorage, {}), +] + + +@pytest.fixture +def mock_psycopg2(mocker): + mocker.patch("swh.indexer.storage.psycopg2.pool") + return mocker + + +def test_init_get_indexer_storage_failure(): + with pytest.raises(ValueError, match="Unknown indexer storage class"): + get_indexer_storage("unknown-idx-storage") + + +@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) +def test_init_get_indexer_storage(class_name, expected_class, kwargs, mock_psycopg2): + if kwargs: + concrete_idx_storage = get_indexer_storage(class_name, **kwargs) + else: + concrete_idx_storage = get_indexer_storage(class_name) + assert isinstance(concrete_idx_storage, expected_class) + assert isinstance(concrete_idx_storage, IndexerStorageInterface) + + +@pytest.mark.parametrize( + "class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS_KWARGS +) +def test_init_get_indexer_storage_deprecation_warning( + class_name, expected_class, kwargs, mock_psycopg2 +): + with pytest.warns(DeprecationWarning): + concrete_idx_storage = get_indexer_storage(class_name, args=kwargs) + assert isinstance(concrete_idx_storage, expected_class) + + +def test_types(swh_indexer_storage) -> None: + """Checks all methods of StorageInterface are implemented by this + backend, and that they have the same signature.""" + # Create an instance of the protocol (which cannot be instantiated + # directly, so this creates a subclass, then instantiates it) + interface = type("_", (IndexerStorageInterface,), {})() + + assert "content_mimetype_add" in dir(interface) + + missing_methods = [] + + for meth_name in dir(interface): + if meth_name.startswith("_"): + continue + interface_meth = getattr(interface, meth_name) + try: + concrete_meth = getattr(swh_indexer_storage, meth_name) + except AttributeError: + missing_methods.append(meth_name) + continue + + expected_signature = inspect.signature(interface_meth) + actual_signature = inspect.signature(concrete_meth) + + assert expected_signature == actual_signature, meth_name + + assert missing_methods == [] + + # If all the assertions above succeed, then this one should too. + # But there's no harm in double-checking. + # And we could replace the assertions above by this one, but unlike + # the assertions above, it doesn't explain what is missing. + assert isinstance(swh_indexer_storage, IndexerStorageInterface) diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import inspect import math import threading from typing import Any, Dict, List, Tuple, Type, cast @@ -82,35 +81,6 @@ assert swh_indexer_storage.check_config(check_write=False) -def test_types(swh_indexer_storage) -> None: - """Checks all methods of StorageInterface are implemented by this - backend, and that they have the same signature.""" - # Create an instance of the protocol (which cannot be instantiated - # directly, so this creates a subclass, then instantiates it) - interface = type("_", (IndexerStorageInterface,), {})() - - assert "content_mimetype_add" in dir(interface) - - missing_methods = [] - - for meth_name in dir(interface): - if meth_name.startswith("_"): - continue - interface_meth = getattr(interface, meth_name) - try: - concrete_meth = getattr(swh_indexer_storage, meth_name) - except AttributeError: - missing_methods.append(meth_name) - continue - - expected_signature = inspect.signature(interface_meth) - actual_signature = inspect.signature(concrete_meth) - - assert expected_signature == actual_signature, meth_name - - assert missing_methods == [] - - class StorageETypeTester: """Base class for testing a series of common behaviour between a bunch of endpoint types supported by an IndexerStorage.