Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/tests/test_loader.py
Show All 12 Lines | |||||
import pytest | import pytest | ||||
from swh.loader.core.loader import ( | from swh.loader.core.loader import ( | ||||
SENTRY_ORIGIN_URL_TAG_NAME, | SENTRY_ORIGIN_URL_TAG_NAME, | ||||
SENTRY_VISIT_TYPE_TAG_NAME, | SENTRY_VISIT_TYPE_TAG_NAME, | ||||
BaseLoader, | BaseLoader, | ||||
ContentLoader, | ContentLoader, | ||||
DirectoryLoader, | DirectoryLoader, | ||||
DVCSLoader, | |||||
) | ) | ||||
from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol | from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol | ||||
from swh.loader.exception import NotFound, UnsupportedChecksumComputation | from swh.loader.exception import NotFound, UnsupportedChecksumComputation | ||||
from swh.loader.tests import assert_last_visit_matches | from swh.loader.tests import assert_last_visit_matches | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
Origin, | Origin, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Snapshot, | |||||
) | ) | ||||
import swh.storage.exc | import swh.storage.exc | ||||
from .conftest import compute_hashes, compute_nar_hashes, nix_store_missing | from .conftest import compute_hashes, compute_nar_hashes, nix_store_missing | ||||
ORIGIN = Origin(url="some-url") | ORIGIN = Origin(url="some-url") | ||||
PARENT_ORIGIN = Origin(url="base-origin-url") | PARENT_ORIGIN = Origin(url="base-origin-url") | ||||
Show All 30 Lines | class DummyLoader: | ||||
def fetch_data(self): | def fetch_data(self): | ||||
pass | pass | ||||
def get_snapshot_id(self): | def get_snapshot_id(self): | ||||
return None | return None | ||||
class DummyDVCSLoader(DummyLoader, DVCSLoader): | |||||
"""DVCS Loader that does nothing in regards to DAG objects.""" | |||||
def get_contents(self): | |||||
return [] | |||||
def get_directories(self): | |||||
return [] | |||||
def get_revisions(self): | |||||
return [] | |||||
def get_releases(self): | |||||
return [] | |||||
def get_snapshot(self): | |||||
return Snapshot(branches={}) | |||||
def eventful(self): | |||||
return False | |||||
class DummyBaseLoader(DummyLoader, BaseLoader): | class DummyBaseLoader(DummyLoader, BaseLoader): | ||||
"""Buffered loader will send new data when threshold is reached""" | """Buffered loader will send new data when threshold is reached""" | ||||
def store_data(self): | def store_data(self): | ||||
pass | pass | ||||
class DummyMetadataFetcher: | class DummyMetadataFetcher: | ||||
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | def test_base_loader_post_load_raise(swh_storage, mocker): | ||||
result = loader.load() | result = loader.load() | ||||
assert result == {"status": "failed"} | assert result == {"status": "failed"} | ||||
# ensure post_load has been called twice, once with success to True and | # ensure post_load has been called twice, once with success to True and | ||||
# once with success to False as the first post_load call raised exception | # once with success to False as the first post_load call raised exception | ||||
assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)] | assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)] | ||||
def test_dvcs_loader(swh_storage): | |||||
loader = DummyDVCSLoader(swh_storage) | |||||
result = loader.load() | |||||
assert result == {"status": "eventful"} | |||||
def test_dvcs_loader_with_config(swh_storage): | |||||
loader = DummyDVCSLoader(swh_storage, "another-logger") | |||||
result = loader.load() | |||||
assert result == {"status": "eventful"} | |||||
def test_loader_logger_default_name(swh_storage): | def test_loader_logger_default_name(swh_storage): | ||||
loader = DummyBaseLoader(swh_storage) | loader = DummyBaseLoader(swh_storage) | ||||
assert isinstance(loader.log, logging.Logger) | assert isinstance(loader.log, logging.Logger) | ||||
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" | assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" | ||||
loader = DummyDVCSLoader(swh_storage) | |||||
assert isinstance(loader.log, logging.Logger) | |||||
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader" | |||||
def test_loader_logger_with_name(swh_storage): | def test_loader_logger_with_name(swh_storage): | ||||
loader = DummyBaseLoader(swh_storage, "some.logger.name") | loader = DummyBaseLoader(swh_storage, "some.logger.name") | ||||
assert isinstance(loader.log, logging.Logger) | assert isinstance(loader.log, logging.Logger) | ||||
assert loader.log.name == "some.logger.name" | assert loader.log.name == "some.logger.name" | ||||
def test_loader_save_data_path(swh_storage, tmp_path): | def test_loader_save_data_path(swh_storage, tmp_path): | ||||
Show All 9 Lines | def test_loader_save_data_path(swh_storage, tmp_path): | ||||
assert save_path == expected_save_path | assert save_path == expected_save_path | ||||
def _check_load_failure( | def _check_load_failure( | ||||
caplog, loader, exc_class, exc_text, status="partial", origin=ORIGIN | caplog, loader, exc_class, exc_text, status="partial", origin=ORIGIN | ||||
): | ): | ||||
"""Check whether a failed load properly logged its exception, and that the | """Check whether a failed load properly logged its exception, and that the | ||||
snapshot didn't get referenced in storage""" | snapshot didn't get referenced in storage""" | ||||
assert isinstance(loader, (DVCSLoader, ContentLoader, DirectoryLoader)) | assert isinstance(loader, (ContentLoader, DirectoryLoader)) | ||||
for record in caplog.records: | for record in caplog.records: | ||||
if record.levelname != "ERROR": | if record.levelname != "ERROR": | ||||
continue | continue | ||||
assert "Loading failure" in record.message | assert "Loading failure" in record.message | ||||
assert record.exc_info | assert record.exc_info | ||||
exc = record.exc_info[1] | exc = record.exc_info[1] | ||||
assert isinstance(exc, exc_class) | assert isinstance(exc, exc_class) | ||||
assert exc_text in exc.args[0] | assert exc_text in exc.args[0] | ||||
if isinstance(loader, DVCSLoader): | |||||
# Check that the get_snapshot operation would have succeeded | |||||
assert loader.get_snapshot() is not None | |||||
# And confirm that the visit doesn't reference a snapshot | # And confirm that the visit doesn't reference a snapshot | ||||
visit = assert_last_visit_matches(loader.storage, origin.url, status) | visit = assert_last_visit_matches(loader.storage, origin.url, status) | ||||
if status != "partial": | if status != "partial": | ||||
assert visit.snapshot is None | assert visit.snapshot is None | ||||
# But that the snapshot didn't get loaded | # But that the snapshot didn't get loaded | ||||
assert loader.loaded_snapshot_id is None | assert loader.loaded_snapshot_id is None | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | assert statsd_report.mock_calls == [ | ||||
1, | 1, | ||||
) | ) | ||||
for (key, value) in runtimes.items() | for (key, value) in runtimes.items() | ||||
] | ] | ||||
assert loader.statsd.namespace == "swh_loader" | assert loader.statsd.namespace == "swh_loader" | ||||
assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"} | assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"} | ||||
class DummyDVCSLoaderExc(DummyDVCSLoader): | |||||
"""A loader which raises an exception when loading some contents""" | |||||
def get_contents(self): | |||||
raise RuntimeError("Failed to get contents!") | |||||
def test_dvcs_loader_exc_partial_visit(swh_storage, caplog): | |||||
logger_name = "dvcsloaderexc" | |||||
caplog.set_level(logging.ERROR, logger=logger_name) | |||||
loader = DummyDVCSLoaderExc(swh_storage, logging_class=logger_name) | |||||
# fake the loading ending up in a snapshot | |||||
loader.loaded_snapshot_id = hash_to_bytes( | |||||
"9e4dd2b40d1b46b70917c0949aa2195c823a648e" | |||||
) | |||||
result = loader.load() | |||||
# loading failed | |||||
assert result == {"status": "failed"} | |||||
# still resulted in a partial visit with a snapshot (somehow) | |||||
_check_load_failure( | |||||
caplog, | |||||
loader, | |||||
RuntimeError, | |||||
"Failed to get contents!", | |||||
) | |||||
class BrokenStorageProxy: | |||||
def __init__(self, storage): | |||||
self.storage = storage | |||||
def __getattr__(self, attr): | |||||
return getattr(self.storage, attr) | |||||
def snapshot_add(self, snapshots): | |||||
raise RuntimeError("Failed to add snapshot!") | |||||
class DummyDVCSLoaderStorageExc(DummyDVCSLoader): | |||||
"""A loader which raises an exception when loading some contents""" | |||||
def __init__(self, *args, **kwargs): | |||||
super().__init__(*args, **kwargs) | |||||
self.storage = BrokenStorageProxy(self.storage) | |||||
def test_dvcs_loader_storage_exc_failed_visit(swh_storage, caplog): | |||||
logger_name = "dvcsloaderexc" | |||||
caplog.set_level(logging.ERROR, logger=logger_name) | |||||
loader = DummyDVCSLoaderStorageExc(swh_storage, logging_class=logger_name) | |||||
result = loader.load() | |||||
assert result == {"status": "failed"} | |||||
_check_load_failure( | |||||
caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed" | |||||
) | |||||
class DummyDVCSLoaderNotFound(DummyDVCSLoader, BaseLoader): | |||||
"""A loader which raises a not_found exception during the prepare method call""" | |||||
def prepare(*args, **kwargs): | |||||
raise NotFound("Unknown origin!") | |||||
def load_status(self): | |||||
return { | |||||
"status": "uneventful", | |||||
} | |||||
def test_loader_not_found(swh_storage, caplog): | |||||
loader = DummyDVCSLoaderNotFound(swh_storage) | |||||
result = loader.load() | |||||
assert result == {"status": "uneventful"} | |||||
_check_load_failure(caplog, loader, NotFound, "Unknown origin!", status="not_found") | |||||
class DummyLoaderWithError(DummyBaseLoader): | class DummyLoaderWithError(DummyBaseLoader): | ||||
def prepare(self, *args, **kwargs): | def prepare(self, *args, **kwargs): | ||||
raise Exception("error") | raise Exception("error") | ||||
class DummyDVCSLoaderWithError(DummyDVCSLoader, BaseLoader): | def test_loader_sentry_tags_on_error(swh_storage, sentry_events): | ||||
def prepare(self, *args, **kwargs): | loader = DummyLoaderWithError(swh_storage) | ||||
raise Exception("error") | |||||
@pytest.mark.parametrize("loader_cls", [DummyLoaderWithError, DummyDVCSLoaderWithError]) | |||||
def test_loader_sentry_tags_on_error(swh_storage, sentry_events, loader_cls): | |||||
loader = loader_cls(swh_storage) | |||||
loader.load() | loader.load() | ||||
sentry_tags = sentry_events[0]["tags"] | sentry_tags = sentry_events[0]["tags"] | ||||
assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url | assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url | ||||
assert sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) == DummyLoader.visit_type | assert sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) == DummyLoader.visit_type | ||||
CONTENT_MIRROR = "https://common-lisp.net" | CONTENT_MIRROR = "https://common-lisp.net" | ||||
CONTENT_URL = f"{CONTENT_MIRROR}/project/asdf/archives/asdf-3.3.5.lisp" | CONTENT_URL = f"{CONTENT_MIRROR}/project/asdf/archives/asdf-3.3.5.lisp" | ||||
▲ Show 20 Lines • Show All 310 Lines • Show Last 20 Lines |