diff --git a/conftest.py b/conftest.py index 50822c3..d860c9f 100644 --- a/conftest.py +++ b/conftest.py @@ -1,75 +1,76 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import pytest import yaml from typing import Any, Dict from swh.storage.tests.conftest import * # noqa from swh.scheduler.tests.conftest import * # noqa @pytest.fixture def swh_loader_config(swh_storage_postgresql) -> Dict[str, Any]: return { 'storage': { 'cls': 'pipeline', 'steps': [ {'cls': 'retry'}, {'cls': 'filter'}, {'cls': 'buffer'}, + {'cls': 'validate'}, { 'cls': 'local', 'args': { 'db': swh_storage_postgresql.dsn, 'objstorage': { 'cls': 'memory', 'args': {} }, } } ] }, 'deposit': { 'url': 'https://deposit.softwareheritage.org/1/private', 'auth': { 'username': 'user', 'password': 'pass', } }, } @pytest.fixture def swh_config(swh_loader_config, monkeypatch, tmp_path): conffile = os.path.join(str(tmp_path), 'loader.yml') with open(conffile, 'w') as f: f.write(yaml.dump(swh_loader_config)) monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) return conffile @pytest.fixture(autouse=True, scope='session') def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ['http_proxy'] = 'http://localhost:999' os.environ['https_proxy'] = 'http://localhost:999' @pytest.fixture(scope='session') # type: ignore # expected redefinition def celery_includes(): return [ 'swh.loader.package.archive.tasks', 'swh.loader.package.cran.tasks', 'swh.loader.package.debian.tasks', 'swh.loader.package.deposit.tasks', 'swh.loader.package.npm.tasks', 'swh.loader.package.pypi.tasks', ] diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py index 7e53aa0..ae9ca77 100644 --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -1,145 +1,151 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import hashlib import logging import pytest from swh.loader.core.loader import BaseLoader, DVCSLoader class DummyLoader: def cleanup(self): pass def prepare(self, *args, **kwargs): pass def fetch_data(self): pass def store_data(self): pass def prepare_origin_visit(self, *args, **kwargs): origin = {'url': 'some-url'} self.origin = origin self.origin_url = origin['url'] self.visit_date = datetime.datetime.utcnow() self.visit_type = 'git' self.storage.origin_visit_add(self.origin_url, self.visit_date, self.visit_type) class DummyDVCSLoader(DummyLoader, DVCSLoader): """Unbuffered loader will send directly to storage new data """ def parse_config_file(self, *args, **kwargs): return { 'max_content_size': 100 * 1024 * 1024, 'storage': { 'cls': 'pipeline', 'steps': [ { 'cls': 'retry', }, { 'cls': 'filter', }, + { + 'cls': 'validate', + }, { 'cls': 'memory', }, ] }, } class DummyBaseLoader(DummyLoader, BaseLoader): """Buffered loader will send new data when threshold is reached """ def parse_config_file(self, *args, **kwargs): return { 'max_content_size': 100 * 1024 * 1024, 'storage': { 'cls': 'pipeline', 'steps': [ { 'cls': 'retry', }, { 'cls': 'filter', }, { 'cls': 'buffer', 'min_batch_size': { 'content': 2, 'content_bytes': 8, 'directory': 2, 'revision': 2, 'release': 2, }, }, + { + 'cls': 'validate', + }, { 'cls': 'memory', }, ] }, } def test_base_loader(): loader = DummyBaseLoader() result = loader.load() assert result == {'status': 'eventful'} def test_dvcs_loader(): loader = DummyDVCSLoader() result = loader.load() assert result == {'status': 'eventful'} def test_loader_logger_default_name(): loader = DummyBaseLoader() assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'swh.loader.core.tests.test_loader.DummyBaseLoader' loader = DummyDVCSLoader() assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'swh.loader.core.tests.test_loader.DummyDVCSLoader' def test_loader_logger_with_name(): loader = DummyBaseLoader('some.logger.name') assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'some.logger.name' @pytest.mark.fs def test_loader_save_data_path(tmp_path): loader = DummyBaseLoader('some.logger.name.1') url = 'http://bitbucket.org/something' loader.origin = { 'url': url, } loader.visit_date = datetime.datetime(year=2019, month=10, day=1) loader.config = { 'save_data_path': tmp_path, } hash_url = hashlib.sha1(url.encode('utf-8')).hexdigest() expected_save_path = '%s/sha1:%s/%s/2019' % ( str(tmp_path), hash_url[0:2], hash_url ) save_path = loader.get_save_data_path() assert save_path == expected_save_path diff --git a/swh/loader/package/tests/test_common.py b/swh/loader/package/tests/test_common.py index 2855f07..c430bf9 100644 --- a/swh/loader/package/tests/test_common.py +++ b/swh/loader/package/tests/test_common.py @@ -1,175 +1,188 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.model.hashutil import hash_to_bytes from swh.loader.package.tests.common import ( decode_target, check_snapshot, check_metadata, check_metadata_paths ) from swh.storage import get_storage hash_hex = '43e45d56f88993aae6a0198013efa80716fd8920' +storage_config = { + 'cls': 'pipeline', + 'steps': [ + { + 'cls': 'validate', + }, + { + 'cls': 'memory', + } + ] +} + + def test_decode_target_edge(): assert not decode_target(None) def test_decode_target(): actual_alias_decode_target = decode_target({ 'target_type': 'alias', 'target': b'something', }) assert actual_alias_decode_target == { 'target_type': 'alias', 'target': 'something', } actual_decode_target = decode_target({ 'target_type': 'revision', 'target': hash_to_bytes(hash_hex), }) assert actual_decode_target == { 'target_type': 'revision', 'target': hash_hex, } def test_check_snapshot(): - storage = get_storage(cls='memory') + storage = get_storage(**storage_config) snap_id = '2498dbf535f882bc7f9a18fb16c9ad27fda7bab7' snapshot = { 'id': hash_to_bytes(snap_id), 'branches': { b'master': { 'target': hash_to_bytes(hash_hex), 'target_type': 'revision', }, }, } s = storage.snapshot_add([snapshot]) assert s == { 'snapshot:add': 1, } expected_snapshot = { 'id': snap_id, 'branches': { 'master': { 'target': hash_hex, 'target_type': 'revision', } } } check_snapshot(expected_snapshot, storage) def test_check_snapshot_failure(): - storage = get_storage(cls='memory') + storage = get_storage(**storage_config) snapshot = { 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), 'branches': { b'master': { 'target': hash_to_bytes(hash_hex), 'target_type': 'revision', }, }, } s = storage.snapshot_add([snapshot]) assert s == { 'snapshot:add': 1, } unexpected_snapshot = { 'id': '2498dbf535f882bc7f9a18fb16c9ad27fda7bab7', 'branches': { 'master': { 'target': hash_hex, 'target_type': 'release', # wrong value } } } with pytest.raises(AssertionError): check_snapshot(unexpected_snapshot, storage) def test_check_metadata(): metadata = { 'a': { 'raw': { 'time': 'something', }, }, 'b': [], 'c': 1, } for raw_path, raw_type in [ ('a.raw', dict), ('a.raw.time', str), ('b', list), ('c', int), ]: check_metadata(metadata, raw_path, raw_type) def test_check_metadata_ko(): metadata = { 'a': { 'raw': 'hello', }, 'b': [], 'c': 1, } for raw_path, raw_type in [ ('a.b', dict), ('a.raw.time', str), ]: with pytest.raises(AssertionError): check_metadata(metadata, raw_path, raw_type) def test_check_metadata_paths(): metadata = { 'a': { 'raw': { 'time': 'something', }, }, 'b': [], 'c': 1, } check_metadata_paths(metadata, [ ('a.raw', dict), ('a.raw.time', str), ('b', list), ('c', int), ]) def test_check_metadata_paths_ko(): metadata = { 'a': { 'raw': 'hello', }, 'b': [], 'c': 1, } with pytest.raises(AssertionError): check_metadata_paths(metadata, [ ('a.b', dict), ('a.raw.time', str), ]) diff --git a/swh/loader/tests/conftest.py b/swh/loader/tests/conftest.py index 650ef58..385ef07 100644 --- a/swh/loader/tests/conftest.py +++ b/swh/loader/tests/conftest.py @@ -1,24 +1,32 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from typing import Any, Dict @pytest.fixture def swh_loader_config() -> Dict[str, Any]: return { 'storage': { - 'cls': 'memory', + 'cls': 'pipeline', + 'steps': [ + { + 'cls': 'validate', + }, + { + 'cls': 'memory', + }, + ], }, 'deposit': { 'url': 'https://deposit.softwareheritage.org/1/private', 'auth': { 'username': 'user', 'password': 'pass', } }, }