Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/__init__.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import Counter | from collections import Counter | ||||
from importlib import import_module | |||||
import json | import json | ||||
from typing import Dict, Iterable, List, Optional, Tuple, Union | from typing import Dict, Iterable, List, Optional, Tuple, Union | ||||
import warnings | |||||
import psycopg2 | import psycopg2 | ||||
import psycopg2.pool | import psycopg2.pool | ||||
from swh.core.db.common import db_transaction | from swh.core.db.common import db_transaction | ||||
from swh.indexer.storage.interface import IndexerStorageInterface | |||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.model import SHA1_SIZE | from swh.model.model import SHA1_SIZE | ||||
from swh.storage.exc import StorageDBError | from swh.storage.exc import StorageDBError | ||||
from swh.storage.utils import get_partition_bounds_bytes | from swh.storage.utils import get_partition_bounds_bytes | ||||
from . import converters | from . import converters | ||||
from .db import Db | from .db import Db | ||||
from .exc import DuplicateId, IndexerStorageArgumentException | from .exc import DuplicateId, IndexerStorageArgumentException | ||||
Show All 10 Lines | |||||
) | ) | ||||
INDEXER_CFG_KEY = "indexer_storage" | INDEXER_CFG_KEY = "indexer_storage" | ||||
MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"] | MAPPING_NAMES = ["codemeta", "gemspec", "maven", "npm", "pkg-info"] | ||||
def get_indexer_storage(cls, args): | SERVER_IMPLEMENTATIONS: Dict[str, str] = { | ||||
"""Get an indexer storage object of class `storage_class` with | "local": ".IndexerStorage", | ||||
arguments `storage_args`. | "remote": ".api.client.RemoteStorage", | ||||
"memory": ".in_memory.IndexerStorage", | |||||
} | |||||
def get_indexer_storage(cls: str, **kwargs) -> IndexerStorageInterface: | |||||
"""Instantiate an indexer storage implementation of class `cls` with arguments | |||||
`kwargs`. | |||||
Args: | Args: | ||||
cls (str): storage's class, either 'local' or 'remote' | cls: indexer storage class (local, remote or memory) | ||||
args (dict): dictionary of arguments passed to the | kwargs: dictionary of arguments passed to the | ||||
storage class constructor | indexer storage class constructor | ||||
Returns: | Returns: | ||||
an instance of swh.indexer's storage (either local or remote) | an instance of swh.indexer.storage | ||||
Raises: | Raises: | ||||
ValueError if passed an unknown storage class. | ValueError if passed an unknown storage class. | ||||
""" | """ | ||||
if cls == "remote": | if "args" in kwargs: | ||||
from .api.client import RemoteStorage as IndexerStorage | warnings.warn( | ||||
elif cls == "local": | 'Explicit "args" key is deprecated, use keys directly instead.', | ||||
from . import IndexerStorage | DeprecationWarning, | ||||
elif cls == "memory": | ) | ||||
from .in_memory import IndexerStorage | kwargs = kwargs["args"] | ||||
else: | |||||
raise ValueError("Unknown indexer storage class `%s`" % cls) | class_path = SERVER_IMPLEMENTATIONS.get(cls) | ||||
if class_path is None: | |||||
return IndexerStorage(**args) | raise ValueError( | ||||
f"Unknown indexer storage class `{cls}`. " | |||||
f"Supported: {', '.join(SERVER_IMPLEMENTATIONS)}" | |||||
) | |||||
(module_path, class_name) = class_path.rsplit(".", 1) | |||||
module = import_module(module_path if module_path else ".", package=__package__) | |||||
BackendClass = getattr(module, class_name) | |||||
check_config = kwargs.pop("check_config", {}) | |||||
idx_storage = BackendClass(**kwargs) | |||||
if check_config: | |||||
if not idx_storage.check_config(**check_config): | |||||
raise EnvironmentError("Indexer storage check config failed") | |||||
return idx_storage | |||||
def check_id_duplicates(data): | def check_id_duplicates(data): | ||||
""" | """ | ||||
If any two row models in `data` have the same unique key, raises | If any two row models in `data` have the same unique key, raises | ||||
a `ValueError`. | a `ValueError`. | ||||
Values associated to the key must be hashable. | Values associated to the key must be hashable. | ||||
▲ Show 20 Lines • Show All 649 Lines • Show Last 20 Lines |