diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ python-magic >= 0.4.13 pyld xmltodict +typing-extensions diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -3,10 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations from collections import Counter +from importlib import import_module import json -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union +import warnings import psycopg2 import psycopg2.pool @@ -32,38 +35,61 @@ RevisionIntrinsicMetadataRow, ) +if TYPE_CHECKING: + from swh.indexer.storage.interface import IndexerStorageInterface + INDEXER_CFG_KEY = "indexer_storage" MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"] -def get_indexer_storage(cls, args): - """Get an indexer storage object of class `storage_class` with +SERVER_IMPLEMENTATIONS: Dict[str, str] = { + "local": ".IndexerStorage", + "remote": ".api.client.RemoteStorage", + "memory": ".in_memory.IndexerStorage", +} + + +def get_indexer_storage(cls: str, **kwargs) -> IndexerStorageInterface: + """Get an indexer storage object of class `cls` with arguments `storage_args`. Args: - cls (str): storage's class, either 'local' or 'remote' - args (dict): dictionary of arguments passed to the - storage class constructor + cls: storage's class, either 'local' or 'remote' + kwargs: dictionary of arguments passed to the + indexer storage class constructor Returns: - an instance of swh.indexer's storage (either local or remote) + an instance of swh.indexer.storage (either local, remote, memory) Raises: ValueError if passed an unknown storage class. """ - if cls == "remote": - from .api.client import RemoteStorage as IndexerStorage - elif cls == "local": - from . import IndexerStorage - elif cls == "memory": - from .in_memory import IndexerStorage - else: - raise ValueError("Unknown indexer storage class `%s`" % cls) - - return IndexerStorage(**args) + if "args" in kwargs: + warnings.warn( + 'Explicit "args" key is deprecated, use keys directly instead.', + DeprecationWarning, + ) + kwargs = kwargs["args"] + + class_path = SERVER_IMPLEMENTATIONS.get(cls) + if class_path is None: + raise ValueError( + f"Unknown indexer storage class `{cls}`. " + f"Supported: {', '.join(SERVER_IMPLEMENTATIONS)}" + ) + + (module_path, class_name) = class_path.rsplit(".", 1) + module = import_module(module_path if module_path else ".", package=__package__) + IndexerStorage = getattr(module, class_name) + check_config = kwargs.pop("check_config", {}) + idx_storage = IndexerStorage(**kwargs) + if check_config: + if not idx_storage.check_config(**check_config): + raise EnvironmentError("Indexer storage check config failed") + return idx_storage def check_id_duplicates(data): diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py --- a/swh/indexer/storage/interface.py +++ b/swh/indexer/storage/interface.py @@ -5,6 +5,8 @@ from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union +from typing_extensions import Protocol, runtime_checkable + from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult from swh.indexer.storage.model import ( @@ -24,7 +26,8 @@ Sha1 = bytes -class IndexerStorageInterface: +@runtime_checkable +class IndexerStorageInterface(Protocol): @remote_api_endpoint("check_config") def check_config(self, *, check_write): """Check that the storage is configured and ready to go.""" diff --git a/swh/indexer/tests/storage/test_init.py b/swh/indexer/tests/storage/test_init.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/storage/test_init.py @@ -0,0 +1,52 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.indexer.storage import IndexerStorage, get_indexer_storage +from swh.indexer.storage.api.client import RemoteStorage +from swh.indexer.storage.in_memory import IndexerStorage as MemoryIndexerStorage +from swh.indexer.storage.interface import IndexerStorageInterface + +SERVER_IMPLEMENTATIONS_KWARGS = [ + ("remote", RemoteStorage, {"url": "localhost"}), + ("local", IndexerStorage, {"db": "something"}), +] + +SERVER_IMPLEMENTATIONS = SERVER_IMPLEMENTATIONS_KWARGS + [ + ("memory", MemoryIndexerStorage, {}), +] + + +@pytest.fixture +def mock_psycopg2(mocker): + mocker.patch("swh.indexer.storage.psycopg2.pool") + return mocker + + +def test_init_get_indexer_storage_failure(): + with pytest.raises(ValueError, match="Unknown indexer storage class"): + get_indexer_storage("unknown-idx-storage") + + +@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) +def test_init_get_indexer_storage(class_name, expected_class, kwargs, mock_psycopg2): + if kwargs: + concrete_idx_storage = get_indexer_storage(class_name, **kwargs) + else: + concrete_idx_storage = get_indexer_storage(class_name) + assert isinstance(concrete_idx_storage, expected_class) + assert isinstance(concrete_idx_storage, IndexerStorageInterface) + + +@pytest.mark.parametrize( + "class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS_KWARGS +) +def test_init_get_indexer_storage_deprecation_warning( + class_name, expected_class, kwargs, mock_psycopg2 +): + with pytest.warns(DeprecationWarning): + concrete_idx_storage = get_indexer_storage(class_name, args=kwargs) + assert isinstance(concrete_idx_storage, expected_class)