Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/tests/test_loader.py
| Show All 12 Lines | |||||
| import pytest | import pytest | ||||
| from swh.loader.core.loader import ( | from swh.loader.core.loader import ( | ||||
| SENTRY_ORIGIN_URL_TAG_NAME, | SENTRY_ORIGIN_URL_TAG_NAME, | ||||
| SENTRY_VISIT_TYPE_TAG_NAME, | SENTRY_VISIT_TYPE_TAG_NAME, | ||||
| BaseLoader, | BaseLoader, | ||||
| ContentLoader, | ContentLoader, | ||||
| DirectoryLoader, | DirectoryLoader, | ||||
| DVCSLoader, | |||||
| ) | ) | ||||
| from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol | from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol | ||||
| from swh.loader.exception import NotFound, UnsupportedChecksumComputation | from swh.loader.exception import NotFound, UnsupportedChecksumComputation | ||||
| from swh.loader.tests import assert_last_visit_matches | from swh.loader.tests import assert_last_visit_matches | ||||
| from swh.model.hashutil import hash_to_bytes | |||||
| from swh.model.model import ( | from swh.model.model import ( | ||||
| MetadataAuthority, | MetadataAuthority, | ||||
| MetadataAuthorityType, | MetadataAuthorityType, | ||||
| MetadataFetcher, | MetadataFetcher, | ||||
| Origin, | Origin, | ||||
| RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
| Snapshot, | |||||
| ) | ) | ||||
| import swh.storage.exc | import swh.storage.exc | ||||
| from .conftest import compute_hashes, compute_nar_hashes, nix_store_missing | from .conftest import compute_hashes, compute_nar_hashes, nix_store_missing | ||||
| ORIGIN = Origin(url="some-url") | ORIGIN = Origin(url="some-url") | ||||
| PARENT_ORIGIN = Origin(url="base-origin-url") | PARENT_ORIGIN = Origin(url="base-origin-url") | ||||
| Show All 30 Lines | class DummyLoader: | ||||
| def fetch_data(self): | def fetch_data(self): | ||||
| pass | pass | ||||
| def get_snapshot_id(self): | def get_snapshot_id(self): | ||||
| return None | return None | ||||
| class DummyDVCSLoader(DummyLoader, DVCSLoader): | |||||
| """DVCS Loader that does nothing in regards to DAG objects.""" | |||||
| def get_contents(self): | |||||
| return [] | |||||
| def get_directories(self): | |||||
| return [] | |||||
| def get_revisions(self): | |||||
| return [] | |||||
| def get_releases(self): | |||||
| return [] | |||||
| def get_snapshot(self): | |||||
| return Snapshot(branches={}) | |||||
| def eventful(self): | |||||
| return False | |||||
| class DummyBaseLoader(DummyLoader, BaseLoader): | class DummyBaseLoader(DummyLoader, BaseLoader): | ||||
| """Buffered loader will send new data when threshold is reached""" | """Buffered loader will send new data when threshold is reached""" | ||||
| def store_data(self): | def store_data(self): | ||||
| pass | pass | ||||
| class DummyMetadataFetcher: | class DummyMetadataFetcher: | ||||
| ▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | def test_base_loader_post_load_raise(swh_storage, mocker): | ||||
| result = loader.load() | result = loader.load() | ||||
| assert result == {"status": "failed"} | assert result == {"status": "failed"} | ||||
| # ensure post_load has been called twice, once with success to True and | # ensure post_load has been called twice, once with success to True and | ||||
| # once with success to False as the first post_load call raised exception | # once with success to False as the first post_load call raised exception | ||||
| assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)] | assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)] | ||||
| def test_dvcs_loader(swh_storage): | |||||
| loader = DummyDVCSLoader(swh_storage) | |||||
| result = loader.load() | |||||
| assert result == {"status": "eventful"} | |||||
| def test_dvcs_loader_with_config(swh_storage): | |||||
| loader = DummyDVCSLoader(swh_storage, "another-logger") | |||||
| result = loader.load() | |||||
| assert result == {"status": "eventful"} | |||||
| def test_loader_logger_default_name(swh_storage): | def test_loader_logger_default_name(swh_storage): | ||||
| loader = DummyBaseLoader(swh_storage) | loader = DummyBaseLoader(swh_storage) | ||||
| assert isinstance(loader.log, logging.Logger) | assert isinstance(loader.log, logging.Logger) | ||||
| assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" | assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" | ||||
| loader = DummyDVCSLoader(swh_storage) | |||||
| assert isinstance(loader.log, logging.Logger) | |||||
| assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader" | |||||
| def test_loader_logger_with_name(swh_storage): | def test_loader_logger_with_name(swh_storage): | ||||
| loader = DummyBaseLoader(swh_storage, "some.logger.name") | loader = DummyBaseLoader(swh_storage, "some.logger.name") | ||||
| assert isinstance(loader.log, logging.Logger) | assert isinstance(loader.log, logging.Logger) | ||||
| assert loader.log.name == "some.logger.name" | assert loader.log.name == "some.logger.name" | ||||
| def test_loader_save_data_path(swh_storage, tmp_path): | def test_loader_save_data_path(swh_storage, tmp_path): | ||||
| Show All 9 Lines | def test_loader_save_data_path(swh_storage, tmp_path): | ||||
| assert save_path == expected_save_path | assert save_path == expected_save_path | ||||
| def _check_load_failure( | def _check_load_failure( | ||||
| caplog, loader, exc_class, exc_text, status="partial", origin=ORIGIN | caplog, loader, exc_class, exc_text, status="partial", origin=ORIGIN | ||||
| ): | ): | ||||
| """Check whether a failed load properly logged its exception, and that the | """Check whether a failed load properly logged its exception, and that the | ||||
| snapshot didn't get referenced in storage""" | snapshot didn't get referenced in storage""" | ||||
| assert isinstance(loader, (DVCSLoader, ContentLoader, DirectoryLoader)) | assert isinstance(loader, (ContentLoader, DirectoryLoader)) | ||||
| for record in caplog.records: | for record in caplog.records: | ||||
| if record.levelname != "ERROR": | if record.levelname != "ERROR": | ||||
| continue | continue | ||||
| assert "Loading failure" in record.message | assert "Loading failure" in record.message | ||||
| assert record.exc_info | assert record.exc_info | ||||
| exc = record.exc_info[1] | exc = record.exc_info[1] | ||||
| assert isinstance(exc, exc_class) | assert isinstance(exc, exc_class) | ||||
| assert exc_text in exc.args[0] | assert exc_text in exc.args[0] | ||||
| if isinstance(loader, DVCSLoader): | |||||
| # Check that the get_snapshot operation would have succeeded | |||||
| assert loader.get_snapshot() is not None | |||||
| # And confirm that the visit doesn't reference a snapshot | # And confirm that the visit doesn't reference a snapshot | ||||
| visit = assert_last_visit_matches(loader.storage, origin.url, status) | visit = assert_last_visit_matches(loader.storage, origin.url, status) | ||||
| if status != "partial": | if status != "partial": | ||||
| assert visit.snapshot is None | assert visit.snapshot is None | ||||
| # But that the snapshot didn't get loaded | # But that the snapshot didn't get loaded | ||||
| assert loader.loaded_snapshot_id is None | assert loader.loaded_snapshot_id is None | ||||
| ▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | assert statsd_report.mock_calls == [ | ||||
| 1, | 1, | ||||
| ) | ) | ||||
| for (key, value) in runtimes.items() | for (key, value) in runtimes.items() | ||||
| ] | ] | ||||
| assert loader.statsd.namespace == "swh_loader" | assert loader.statsd.namespace == "swh_loader" | ||||
| assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"} | assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"} | ||||
| class DummyDVCSLoaderExc(DummyDVCSLoader): | |||||
| """A loader which raises an exception when loading some contents""" | |||||
| def get_contents(self): | |||||
| raise RuntimeError("Failed to get contents!") | |||||
| def test_dvcs_loader_exc_partial_visit(swh_storage, caplog): | |||||
| logger_name = "dvcsloaderexc" | |||||
| caplog.set_level(logging.ERROR, logger=logger_name) | |||||
| loader = DummyDVCSLoaderExc(swh_storage, logging_class=logger_name) | |||||
| # fake the loading ending up in a snapshot | |||||
| loader.loaded_snapshot_id = hash_to_bytes( | |||||
| "9e4dd2b40d1b46b70917c0949aa2195c823a648e" | |||||
| ) | |||||
| result = loader.load() | |||||
| # loading failed | |||||
| assert result == {"status": "failed"} | |||||
| # still resulted in a partial visit with a snapshot (somehow) | |||||
| _check_load_failure( | |||||
| caplog, | |||||
| loader, | |||||
| RuntimeError, | |||||
| "Failed to get contents!", | |||||
| ) | |||||
| class BrokenStorageProxy: | |||||
| def __init__(self, storage): | |||||
| self.storage = storage | |||||
| def __getattr__(self, attr): | |||||
| return getattr(self.storage, attr) | |||||
| def snapshot_add(self, snapshots): | |||||
| raise RuntimeError("Failed to add snapshot!") | |||||
| class DummyDVCSLoaderStorageExc(DummyDVCSLoader): | |||||
| """A loader which raises an exception when loading some contents""" | |||||
| def __init__(self, *args, **kwargs): | |||||
| super().__init__(*args, **kwargs) | |||||
| self.storage = BrokenStorageProxy(self.storage) | |||||
| def test_dvcs_loader_storage_exc_failed_visit(swh_storage, caplog): | |||||
| logger_name = "dvcsloaderexc" | |||||
| caplog.set_level(logging.ERROR, logger=logger_name) | |||||
| loader = DummyDVCSLoaderStorageExc(swh_storage, logging_class=logger_name) | |||||
| result = loader.load() | |||||
| assert result == {"status": "failed"} | |||||
| _check_load_failure( | |||||
| caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed" | |||||
| ) | |||||
| class DummyDVCSLoaderNotFound(DummyDVCSLoader, BaseLoader): | |||||
| """A loader which raises a not_found exception during the prepare method call""" | |||||
| def prepare(*args, **kwargs): | |||||
| raise NotFound("Unknown origin!") | |||||
| def load_status(self): | |||||
| return { | |||||
| "status": "uneventful", | |||||
| } | |||||
| def test_loader_not_found(swh_storage, caplog): | |||||
| loader = DummyDVCSLoaderNotFound(swh_storage) | |||||
| result = loader.load() | |||||
| assert result == {"status": "uneventful"} | |||||
| _check_load_failure(caplog, loader, NotFound, "Unknown origin!", status="not_found") | |||||
| class DummyLoaderWithError(DummyBaseLoader): | class DummyLoaderWithError(DummyBaseLoader): | ||||
| def prepare(self, *args, **kwargs): | def prepare(self, *args, **kwargs): | ||||
| raise Exception("error") | raise Exception("error") | ||||
| class DummyDVCSLoaderWithError(DummyDVCSLoader, BaseLoader): | def test_loader_sentry_tags_on_error(swh_storage, sentry_events): | ||||
| def prepare(self, *args, **kwargs): | loader = DummyLoaderWithError(swh_storage) | ||||
| raise Exception("error") | |||||
| @pytest.mark.parametrize("loader_cls", [DummyLoaderWithError, DummyDVCSLoaderWithError]) | |||||
| def test_loader_sentry_tags_on_error(swh_storage, sentry_events, loader_cls): | |||||
| loader = loader_cls(swh_storage) | |||||
| loader.load() | loader.load() | ||||
| sentry_tags = sentry_events[0]["tags"] | sentry_tags = sentry_events[0]["tags"] | ||||
| assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url | assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url | ||||
| assert sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) == DummyLoader.visit_type | assert sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) == DummyLoader.visit_type | ||||
| CONTENT_MIRROR = "https://common-lisp.net" | CONTENT_MIRROR = "https://common-lisp.net" | ||||
| CONTENT_URL = f"{CONTENT_MIRROR}/project/asdf/archives/asdf-3.3.5.lisp" | CONTENT_URL = f"{CONTENT_MIRROR}/project/asdf/archives/asdf-3.3.5.lisp" | ||||
| ▲ Show 20 Lines • Show All 310 Lines • Show Last 20 Lines | |||||