Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/pattern.py
- This file was added.
# Copyright (C) 2021 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from typing import Any, Dict, Optional | |||||
from swh.core.config import load_from_envvar | |||||
from swh.storage import get_storage | |||||
from swh.storage.interface import StorageInterface | |||||
DEFAULT_CONFIG: Dict[str, Any] = { | |||||
"max_content_size": 100 * 1024 * 1024, | |||||
} | |||||
class Loader: | |||||
"""The base class for a Software Heritage Loader. | |||||
A loader retrieves origin information (git/mercurial/svn repositories, pypi/npm/... | |||||
package artifacts), ingests the contents/directories/revisions/releases/snapshot to | |||||
the storage backend. | |||||
For now, this just exposes 2 static methods (from_config, from_configfile) to | |||||
centralize and ease the loader instantiation. | |||||
Args: | |||||
storage: the instance of the Storage being used to register the | |||||
origin information | |||||
""" | |||||
def __init__( | |||||
self, storage: StorageInterface, max_content_size: Optional[int] = None, | |||||
): | |||||
self.storage = storage | |||||
self.max_content_size = int(max_content_size) if max_content_size else None | |||||
@classmethod | |||||
def from_config(cls, storage: Dict[str, Any], **config: Any): | |||||
"""Instantiate a loader from a configuration dict. | |||||
This is basically a backwards-compatibility shim for the CLI. | |||||
Args: | |||||
storage: instantiation config for the storage | |||||
config: the configuration dict for the loader, with the following keys: | |||||
- credentials (optional): credentials list for the scheduler | |||||
- any other kwargs passed to the loader. | |||||
Returns: | |||||
the instantiated loader | |||||
""" | |||||
# Drop the legacy config keys which aren't used for this generation of loader. | |||||
for legacy_key in ("storage", "celery"): | |||||
config.pop(legacy_key, None) | |||||
# Instantiate the storage | |||||
storage_instance = get_storage(**storage) | |||||
return cls(storage=storage_instance, **config) | |||||
@classmethod | |||||
def from_configfile(cls, **kwargs: Any): | |||||
"""Instantiate a loader from the configuration loaded from the | |||||
SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments if their | |||||
value is not None. | |||||
Args: | |||||
kwargs: kwargs passed to the loader instantiation | |||||
""" | |||||
config = dict(load_from_envvar(DEFAULT_CONFIG)) | |||||
config.update({k: v for k, v in kwargs.items() if v is not None}) | |||||
return cls.from_config(**config) |