diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ -swh.core >= 0.0.75 +swh.core >= 0.3.0 swh.model >= 0.5.0 swh.scheduler >= 0.4.0 swh.storage >= 0.13.1 diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -10,7 +10,7 @@ import os from typing import Any, Dict, Iterable, Optional, Tuple -from swh.core import config +from swh.core.config import load_from_envvar from swh.model.model import ( BaseContent, Content, @@ -27,8 +27,14 @@ from swh.storage import get_storage from swh.storage.utils import now +DEFAULT_CONFIG: Dict[str, Any] = { + "max_content_size": 100 * 1024 * 1024, + "save_data": False, + "save_data_path": "", +} -class BaseLoader(config.SWHConfig, metaclass=ABCMeta): + +class BaseLoader(metaclass=ABCMeta): """Mixin base class for loader. To use this class, you must: @@ -62,27 +68,8 @@ """ - CONFIG_BASE_FILENAME = None # type: Optional[str] - - DEFAULT_CONFIG = { - "storage": ("dict", {"cls": "remote", "url": "http://localhost:5002/",}), - "max_content_size": ("int", 100 * 1024 * 1024), - "save_data": ("bool", False), - "save_data_path": ("str", ""), - } # type: Dict[str, Tuple[str, Any]] - - ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] - - def __init__( - self, logging_class: Optional[str] = None, config: Dict[str, Any] = {} - ): - if config: - self.config = config - else: - self.config = self.parse_config_file( - additional_configs=[self.ADDITIONAL_CONFIG] - ) - + def __init__(self, logging_class: Optional[str] = None): + self.config = load_from_envvar(DEFAULT_CONFIG) self.storage = get_storage(**self.config["storage"]) if logging_class is None: diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -40,19 +40,6 @@ class DummyDVCSLoader(DummyLoader, DVCSLoader): - """Unbuffered loader will send directly to storage new data - - """ - - def parse_config_file(self, *args, **kwargs): - return { - "max_content_size": 100 * 1024 * 1024, - "storage": { - "cls": "pipeline", - "steps": [{"cls": "retry",}, {"cls": "filter",}, {"cls": "memory",},], - }, - } - def get_contents(self): return [] @@ -73,51 +60,24 @@ class DummyBaseLoader(DummyLoader, BaseLoader): - """Buffered loader will send new data when threshold is reached - - """ - - def parse_config_file(self, *args, **kwargs): - return { - "max_content_size": 100 * 1024 * 1024, - "storage": { - "cls": "pipeline", - "steps": [ - {"cls": "retry",}, - {"cls": "filter",}, - { - "cls": "buffer", - "min_batch_size": { - "content": 2, - "content_bytes": 8, - "directory": 2, - "revision": 2, - "release": 2, - }, - }, - {"cls": "memory",}, - ], - }, - } - def store_data(self): pass -def test_base_loader(): +def test_base_loader(swh_config): loader = DummyBaseLoader() result = loader.load() assert result == {"status": "eventful"} -def test_dvcs_loader(): +def test_dvcs_loader(swh_config): loader = DummyDVCSLoader() result = loader.load() assert result == {"status": "eventful"} -def test_loader_logger_default_name(): +def test_loader_logger_default_name(swh_config): loader = DummyBaseLoader() assert isinstance(loader.log, logging.Logger) assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" @@ -127,13 +87,13 @@ assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader" -def test_loader_logger_with_name(): +def test_loader_logger_with_name(swh_config): loader = DummyBaseLoader("some.logger.name") assert isinstance(loader.log, logging.Logger) assert loader.log.name == "some.logger.name" -def test_loader_save_data_path(tmp_path): +def test_loader_save_data_path(tmp_path, swh_config): loader = DummyBaseLoader("some.logger.name.1") url = "http://bitbucket.org/something" loader.origin = Origin(url=url) @@ -179,7 +139,7 @@ raise RuntimeError("Failed to get contents!") -def test_dvcs_loader_exc_partial_visit(caplog): +def test_dvcs_loader_exc_partial_visit(swh_config, caplog): logger_name = "dvcsloaderexc" caplog.set_level(logging.ERROR, logger=logger_name) @@ -210,7 +170,7 @@ self.storage = BrokenStorageProxy(self.storage) -def test_dvcs_loader_storage_exc_partial_visit(caplog): +def test_dvcs_loader_storage_exc_partial_visit(swh_config, caplog): logger_name = "dvcsloaderexc" caplog.set_level(logging.ERROR, logger=logger_name) diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -27,7 +27,7 @@ import attr import sentry_sdk -from swh.core.config import SWHConfig +from swh.core.config import load_from_envvar from swh.core.tarball import uncompress from swh.loader.package.utils import download from swh.model import from_disk @@ -117,15 +117,17 @@ TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) +DEFAULT_CONFIG = { + "create_authorities": True, + "create_fetchers": True, + "max_content_size": 100 * 1024 * 1024, +} + + class PackageLoader(Generic[TPackageInfo]): # Origin visit type (str) set by the loader visit_type = "" - DEFAULT_CONFIG = { - "create_authorities": ("bool", True), - "create_fetchers": ("bool", True), - } - def __init__(self, url): """Loader's constructor. This raises exception if the minimal required configuration is missing (cf. fn:`check` method). @@ -134,8 +136,7 @@ url (str): Origin url to load data from """ - # This expects to use the environment variable SWH_CONFIG_FILENAME - self.config = SWHConfig.parse_config_file() + self.config = load_from_envvar(DEFAULT_CONFIG) self._check_configuration() self.storage: StorageInterface = get_storage(**self.config["storage"]) self.url = url diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -3,11 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import copy import os from os import path from unittest.mock import patch import pytest +import yaml from swh.core.pytest_plugin import requests_mock_datadir_factory from swh.core.tarball import uncompress @@ -143,14 +145,26 @@ # configuration error # -def test_badly_configured_loader_raise(monkeypatch): +@pytest.fixture +def swh_config_missing_storage_key(swh_loader_config, monkeypatch, tmp_path): + """Explicitly override the swh_loader_config to make it missing the storage key""" + config = copy.deepcopy(swh_loader_config) + config.pop("storage") # misconfigure explicitly so the storage key is missing + # then flush to disk + conf_path = os.path.join(str(tmp_path), "loader.yml") + with open(conf_path, "w") as f: + f.write(yaml.dump(config)) + monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) + return conf_path + + +def test_badly_configured_loader_raise(swh_config_missing_storage_key): """Badly configured loader should raise""" - monkeypatch.delenv("SWH_CONFIG_FILENAME", raising=False) - with pytest.raises(ValueError) as e: + with pytest.raises( + ValueError, match="Misconfiguration, at least the storage key should be set" + ): PyPILoader(url="some-url") - assert "Misconfiguration" in e.value.args[0] - def test_pypi_api_url(): """Compute pypi api url from the pypi project url should be ok"""