Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/tests/test_loader.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
from swh.loader.core.loader import DEFAULT_CONFIG, BaseLoader, DVCSLoader | from swh.loader.core.loader import DEFAULT_CONFIG, BaseLoader, DVCSLoader | ||||
from swh.loader.tests import assert_last_visit_matches | from swh.loader.tests import assert_last_visit_matches | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.model import Origin, OriginVisit, Snapshot | from swh.model.model import Origin, OriginVisit, Snapshot | ||||
ORIGIN = Origin(url="some-url") | ORIGIN = Origin(url="some-url") | ||||
class DummyLoader: | class DummyLoader: | ||||
"""Base Loader to overload and simplify the base class (technical: to avoid repetition | """Base Loader to overload and simplify the base class (technical: to avoid repetition | ||||
in other *Loader classes)""" | in other *Loader classes)""" | ||||
▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | def test_loader_save_data_path(swh_config, tmp_path): | ||||
hash_url = hashlib.sha1(url.encode("utf-8")).hexdigest() | hash_url = hashlib.sha1(url.encode("utf-8")).hexdigest() | ||||
expected_save_path = "%s/sha1:%s/%s/2019" % (str(tmp_path), hash_url[0:2], hash_url) | expected_save_path = "%s/sha1:%s/%s/2019" % (str(tmp_path), hash_url[0:2], hash_url) | ||||
save_path = loader.get_save_data_path() | save_path = loader.get_save_data_path() | ||||
assert save_path == expected_save_path | assert save_path == expected_save_path | ||||
def _check_load_failure(caplog, loader, exc_class, exc_text): | def _check_load_failure(caplog, loader, exc_class, exc_text, status="partial"): | ||||
"""Check whether a failed load properly logged its exception, and that the | """Check whether a failed load properly logged its exception, and that the | ||||
snapshot didn't get referenced in storage""" | snapshot didn't get referenced in storage""" | ||||
for record in caplog.records: | for record in caplog.records: | ||||
if record.levelname != "ERROR": | if record.levelname != "ERROR": | ||||
continue | continue | ||||
assert "Loading failure" in record.message | assert "Loading failure" in record.message | ||||
assert record.exc_info | assert record.exc_info | ||||
exc = record.exc_info[1] | exc = record.exc_info[1] | ||||
assert isinstance(exc, exc_class) | assert isinstance(exc, exc_class) | ||||
assert exc_text in exc.args[0] | assert exc_text in exc.args[0] | ||||
# Check that the get_snapshot operation would have succeeded | # Check that the get_snapshot operation would have succeeded | ||||
assert loader.get_snapshot() is not None | assert loader.get_snapshot() is not None | ||||
# But that the snapshot didn't get loaded | |||||
assert loader.loaded_snapshot_id is None | |||||
# And confirm that the visit doesn't reference a snapshot | # And confirm that the visit doesn't reference a snapshot | ||||
visit = assert_last_visit_matches(loader.storage, ORIGIN.url, status="partial") | visit = assert_last_visit_matches(loader.storage, ORIGIN.url, status) | ||||
if status != "partial": | |||||
assert visit.snapshot is None | assert visit.snapshot is None | ||||
# But that the snapshot didn't get loaded | |||||
assert loader.loaded_snapshot_id is None | |||||
class DummyDVCSLoaderExc(DummyDVCSLoader): | class DummyDVCSLoaderExc(DummyDVCSLoader): | ||||
"""A loader which raises an exception when loading some contents""" | """A loader which raises an exception when loading some contents""" | ||||
def get_contents(self): | def get_contents(self): | ||||
raise RuntimeError("Failed to get contents!") | raise RuntimeError("Failed to get contents!") | ||||
def test_dvcs_loader_exc_partial_visit(swh_config, caplog): | def test_dvcs_loader_exc_partial_visit(swh_config, caplog): | ||||
logger_name = "dvcsloaderexc" | logger_name = "dvcsloaderexc" | ||||
caplog.set_level(logging.ERROR, logger=logger_name) | caplog.set_level(logging.ERROR, logger=logger_name) | ||||
loader = DummyDVCSLoaderExc(logging_class=logger_name) | loader = DummyDVCSLoaderExc(logging_class=logger_name) | ||||
# fake the loading ending up in a snapshot | |||||
loader.loaded_snapshot_id = hash_to_bytes( | |||||
"9e4dd2b40d1b46b70917c0949aa2195c823a648e" | |||||
) | |||||
result = loader.load() | result = loader.load() | ||||
# loading failed | |||||
assert result == {"status": "failed"} | assert result == {"status": "failed"} | ||||
_check_load_failure(caplog, loader, RuntimeError, "Failed to get contents!") | # still resulted in a partial visit with a snapshot (somehow) | ||||
_check_load_failure( | |||||
caplog, loader, RuntimeError, "Failed to get contents!", | |||||
) | |||||
class BrokenStorageProxy: | class BrokenStorageProxy: | ||||
def __init__(self, storage): | def __init__(self, storage): | ||||
self.storage = storage | self.storage = storage | ||||
def __getattr__(self, attr): | def __getattr__(self, attr): | ||||
return getattr(self.storage, attr) | return getattr(self.storage, attr) | ||||
def snapshot_add(self, snapshots): | def snapshot_add(self, snapshots): | ||||
raise RuntimeError("Failed to add snapshot!") | raise RuntimeError("Failed to add snapshot!") | ||||
class DummyDVCSLoaderStorageExc(DummyDVCSLoader): | class DummyDVCSLoaderStorageExc(DummyDVCSLoader): | ||||
"""A loader which raises an exception when loading some contents""" | """A loader which raises an exception when loading some contents""" | ||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super().__init__(*args, **kwargs) | super().__init__(*args, **kwargs) | ||||
self.storage = BrokenStorageProxy(self.storage) | self.storage = BrokenStorageProxy(self.storage) | ||||
def test_dvcs_loader_storage_exc_partial_visit(swh_config, caplog): | def test_dvcs_loader_storage_exc_failed_visit(swh_config, caplog): | ||||
logger_name = "dvcsloaderexc" | logger_name = "dvcsloaderexc" | ||||
caplog.set_level(logging.ERROR, logger=logger_name) | caplog.set_level(logging.ERROR, logger=logger_name) | ||||
loader = DummyDVCSLoaderStorageExc(logging_class=logger_name) | loader = DummyDVCSLoaderStorageExc(logging_class=logger_name) | ||||
result = loader.load() | result = loader.load() | ||||
assert result == {"status": "failed"} | assert result == {"status": "failed"} | ||||
_check_load_failure(caplog, loader, RuntimeError, "Failed to add snapshot!") | _check_load_failure( | ||||
caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed" | |||||
) |