diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -105,6 +105,8 @@ self.origin_metadata: Dict[str, Any] = {} + self.loaded_snapshot_id: Optional[Sha1Git] = None + # Make sure the config is sane save_data = self.config.get("save_data") if save_data: @@ -256,10 +258,6 @@ """ return "full" - def get_snapshot_id(self) -> Optional[Sha1Git]: - """Get the snapshot id that needs to be loaded""" - raise NotImplementedError - def pre_cleanup(self) -> None: """As a first step, will try and check for dangling data to cleanup. This should do its best to avoid raising issues. @@ -310,7 +308,7 @@ self.origin.url, self.visit.visit, self.visit_status(), - snapshot=self.get_snapshot_id(), + snapshot=self.loaded_snapshot_id, ) self.post_load() except Exception: @@ -322,7 +320,7 @@ self.origin.url, self.visit.visit, "partial", - snapshot=self.get_snapshot_id(), + snapshot=self.loaded_snapshot_id, ) self.post_load(success=False) return {"status": "failed"} @@ -386,10 +384,6 @@ """Get the snapshot that needs to be loaded""" raise NotImplementedError - def get_snapshot_id(self) -> Optional[Sha1Git]: - snapshot = self.get_snapshot() - return snapshot.id if snapshot else None - def eventful(self) -> bool: """Whether the load was eventful""" raise NotImplementedError @@ -417,7 +411,7 @@ self.storage.revision_add(self.get_revisions()) if self.has_releases(): self.storage.release_add(self.get_releases()) - self.flush() # to ensure the snapshot targets existing objects snapshot = self.get_snapshot() self.storage.snapshot_add([snapshot]) self.flush() + self.loaded_snapshot_id = snapshot.id diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -146,3 +146,77 @@ save_path = loader.get_save_data_path() assert save_path == expected_save_path + + +def _check_load_failure(caplog, loader, exc_class, exc_text): + """Check whether a failed load properly logged its exception, and that the + snapshot didn't get referenced in storage""" + for record in caplog.records: + if record.levelname != "ERROR": + continue + assert "Loading failure" in record.message + assert record.exc_info + exc = record.exc_info[1] + assert isinstance(exc, exc_class) + assert exc_text in exc.args[0] + + # Check that the get_snapshot operation would have succeeded + assert loader.get_snapshot() is not None + + # But that the snapshot didn't get loaded + assert loader.loaded_snapshot_id is None + + # And confirm that the visit doesn't reference a snapshot + visit = loader.storage.origin_visit_get_latest(ORIGIN.url) + assert visit["status"] == "partial" + assert visit["snapshot"] is None + + +class DummyDVCSLoaderExc(DummyDVCSLoader): + """A loader which raises an exception when loading some contents""" + + def get_contents(self): + raise RuntimeError("Failed to get contents!") + + +def test_dvcs_loader_exc_partial_visit(caplog): + logger_name = "dvcsloaderexc" + caplog.set_level(logging.ERROR, logger=logger_name) + + loader = DummyDVCSLoaderExc(logging_class=logger_name) + result = loader.load() + + assert result == {"status": "failed"} + + _check_load_failure(caplog, loader, RuntimeError, "Failed to get contents!") + + +class BrokenStorageProxy: + def __init__(self, storage): + self.storage = storage + + def __getattr__(self, attr): + return getattr(self.storage, attr) + + def snapshot_add(self, snapshots): + raise RuntimeError("Failed to add snapshot!") + + +class DummyDVCSLoaderStorageExc(DummyDVCSLoader): + """A loader which raises an exception when loading some contents""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.storage = BrokenStorageProxy(self.storage) + + +def test_dvcs_loader_storage_exc_partial_visit(caplog): + logger_name = "dvcsloaderexc" + caplog.set_level(logging.ERROR, logger=logger_name) + + loader = DummyDVCSLoaderStorageExc(logging_class=logger_name) + result = loader.load() + + assert result == {"status": "failed"} + + _check_load_failure(caplog, loader, RuntimeError, "Failed to add snapshot!")