diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -273,11 +273,15 @@ if not self.check_history_not_altered( self.svnrepo, revision_start, self.latest_revision ): - msg = "History of svn %s@%s altered. Skipping..." % ( + self.log.debug( + ( + "History of svn %s@%s altered. " + "A complete reloading of the repository will be performed." + ), self.svnrepo.remote_url, revision_start, ) - raise SvnLoaderHistoryAltered(msg) + revision_start = self.svnrepo.initial_revision() # now we know history is ok, we start at next revision revision_start = revision_start + 1 diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -254,17 +254,17 @@ commit log [1]. This results in a hash divergence which is detected at startup after a new run for the same origin. - In effect, that stops the loading and do nothing. + In effect, this will perform a complete reloading of the repository. - [1] Tampering with revision 6 log message following: + [1] Tampering with revision 6 log message following: - ``` + ``` tar xvf pkg-gourmet.tgz # initial repository ingested cd pkg-gourmet/ echo "Tampering with commit log message for fun and profit" > log.txt svnadmin setlog . -r 6 log.txt --bypass-hooks tar cvf pkg-gourmet-tampered-rev6-log.tgz pkg-gourmet/ - ``` + ``` """ archive_name = "pkg-gourmet" archive_path = os.path.join(datadir, f"{archive_name}.tgz") @@ -274,22 +274,34 @@ assert loader.load() == {"status": "eventful"} check_snapshot(GOURMET_SNAPSHOT, loader.storage) + assert_last_visit_matches( + loader.storage, + repo_url, + status="full", + type="svn", + snapshot=GOURMET_SNAPSHOT.id, + ) + archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") repo_tampered_url = prepare_repository_from_archive( archive_path2, archive_name, tmp_path ) loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url) - assert loader2.load() == {"status": "failed"} + assert loader2.load() == {"status": "eventful"} assert_last_visit_matches( - loader2.storage, repo_url, status="failed", type="svn", snapshot=None, + loader2.storage, + repo_url, + status="full", + type="svn", + snapshot=hash_to_bytes("c499eebc1e201024d47d24053ac0080049305897"), ) stats = get_stats(loader.storage) assert stats["origin"] == 1 assert stats["origin_visit"] == 2 - assert stats["snapshot"] == 1 + assert stats["snapshot"] == 2 def test_loader_svn_visit_with_changes(swh_storage, datadir, tmp_path):