diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -542,6 +542,20 @@ def visit_status(self): return self._visit_status + def post_load(self, success: bool = True) -> None: + if success and self._last_revision is not None: + # force revision divergence check + self.check_revision = 1 + # check if the reconstructed filesystem for the last loaded revision is + # consistent with the one obtained with a svn export operation, if it is + # not an exception will be raised to report the issue and mark the visit + # as partial + self._check_revision_divergence( + self.check_revision, + int(dict(self._last_revision.extra_headers)[b"svn_revision"]), + self._last_revision.directory, + ) + class SvnLoaderFromDumpArchive(SvnLoader): """Uncompress an archive containing an svn dump, mount the svn dump as diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -130,6 +130,7 @@ archive_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz") loading_path = str(tmp_path / "loading") + os.mkdir(loading_path) # Prepare the dump as a local svn repository for test purposes temp_dir, repo_path = init_svn_repo_from_dump( @@ -761,7 +762,9 @@ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): @@ -911,7 +914,10 @@ ) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + assert get_stats(loader.storage) == { "content": 2, "directory": 5, @@ -974,7 +980,9 @@ ) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1064,7 +1072,9 @@ ) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1127,7 +1137,9 @@ ) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) paths = get_head_revision_paths_info(loader) # end of lines should not have been processed @@ -1166,8 +1178,12 @@ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) - assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + # post loading will detect an issue and make a partial visit with a snapshot + assert loader.load() == {"status": "failed"} + + assert_last_visit_matches( + loader.storage, repo_url, status="partial", type="svn", + ) assert get_stats(loader.storage) == { "content": 2, @@ -1233,4 +1249,30 @@ ) assert loader.load() == {"status": "eventful"} - assert loader.visit_status() == "full" + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_last_revision_divergence(swh_storage, datadir, tmp_path): + archive_name = "pkg-gourmet" + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) + + class SvnLoaderRevisionDivergence(SvnLoader): + def _check_revision_divergence(self, count, rev, dir_id): + raise ValueError("revision divergence detected") + + loader = SvnLoaderRevisionDivergence( + swh_storage, repo_url, destination_path=tmp_path + ) + + assert loader.load() == {"status": "failed"} + + assert_last_visit_matches( + loader.storage, + repo_url, + status="partial", + type="svn", + snapshot=GOURMET_SNAPSHOT.id, + )