diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -105,6 +105,8 @@ self.origin_metadata: Dict[str, Any] = {} + self.loaded_snapshot: Optional[Sha1Git] = None + # Make sure the config is sane save_data = self.config.get("save_data") if save_data: @@ -256,10 +258,6 @@ """ return "full" - def get_snapshot_id(self) -> Optional[Sha1Git]: - """Get the snapshot id that needs to be loaded""" - raise NotImplementedError - def pre_cleanup(self) -> None: """As a first step, will try and check for dangling data to cleanup. This should do its best to avoid raising issues. @@ -310,7 +308,7 @@ self.origin.url, self.visit.visit, self.visit_status(), - snapshot=self.get_snapshot_id(), + snapshot=self.loaded_snapshot, ) self.post_load() except Exception: @@ -322,7 +320,7 @@ self.origin.url, self.visit.visit, "partial", - snapshot=self.get_snapshot_id(), + snapshot=self.loaded_snapshot, ) self.post_load(success=False) return {"status": "failed"} @@ -382,14 +380,10 @@ """Get the releases that need to be loaded""" raise NotImplementedError - def get_snapshot(self) -> Snapshot: + def get_snapshot(self) -> Optional[Snapshot]: """Get the snapshot that needs to be loaded""" raise NotImplementedError - def get_snapshot_id(self) -> Optional[Sha1Git]: - snapshot = self.get_snapshot() - return snapshot.id if snapshot else None - def eventful(self) -> bool: """Whether the load was eventful""" raise NotImplementedError @@ -417,7 +411,9 @@ self.storage.revision_add(self.get_revisions()) if self.has_releases(): self.storage.release_add(self.get_releases()) - self.flush() # to ensure the snapshot targets existing objects snapshot = self.get_snapshot() + if snapshot is None: + raise ValueError("Unexpected null snapshot!") self.storage.snapshot_add([snapshot]) self.flush() + self.loaded_snapshot = snapshot.id diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -103,6 +103,20 @@ pass +class DummyDVCSLoaderExc(DummyDVCSLoader): + """A loader which raises an exception when loading some contents""" + + def get_contents(self): + raise RuntimeError("Failed to get contents!") + + +class DummyDVCSLoaderNoSnapshot(DummyDVCSLoader): + """A loader which fails to provide a snapshot""" + + def get_snapshot(self): + return None + + def test_base_loader(): loader = DummyBaseLoader() result = loader.load() @@ -146,3 +160,52 @@ save_path = loader.get_save_data_path() assert save_path == expected_save_path + + +def test_dvcs_loader_exc_partial_visit(caplog): + logger_name = "dvcsloaderexc" + caplog.set_level(logging.ERROR, logger=logger_name) + + loader = DummyDVCSLoaderExc(logging_class=logger_name) + result = loader.load() + + assert result == {"status": "failed"} + + for record in caplog.records: + if record.levelname != "ERROR": + continue + assert "Loading failure" in record.message + assert record.exc_info + exc = record.exc_info[1] + assert isinstance(exc, RuntimeError) + assert "Failed to get contents!" in exc.args[0] + + # Check that the get_snapshot operation would have succeeded + assert loader.get_snapshot() is not None + + # But that the snapshot didn't get loaded + assert loader.loaded_snapshot is None + + # And confirm that the visit doesn't reference a snapshot + visit = loader.storage.origin_visit_get_latest(ORIGIN.url) + assert visit["status"] == "partial" + assert visit["snapshot"] is None + + +def test_dvcs_loader_exc_no_snapshot(caplog): + logger_name = "dvcsloadernosnapshot" + caplog.set_level(logging.ERROR, logger=logger_name) + + loader = DummyDVCSLoaderNoSnapshot(logging_class=logger_name) + result = loader.load() + + assert result == {"status": "failed"} + + for record in caplog.records: + if record.levelname != "ERROR": + continue + assert "Loading failure" in record.message + assert record.exc_info + exc = record.exc_info[1] + assert isinstance(exc, ValueError) + assert "null snapshot" in exc.args[0]