diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -8,6 +8,7 @@ """ from datetime import datetime +import difflib import os import pty import re @@ -41,6 +42,7 @@ OutputStream, init_svn_repo_from_archive_dump, init_svn_repo_from_dump, + svn_urljoin, ) DEFAULT_BRANCH = b"HEAD" @@ -331,6 +333,44 @@ obj.object_type, # type: ignore path, ) + if obj.object_type == "content": # type: ignore + self.log.debug( + "expected sha1: %s, actual sha1: %s", + hashutil.hash_to_hex(checked_dir[path].data["sha1"]), + hashutil.hash_to_hex(dir[path].data["sha1"]), + ) + # compute and display diff between contents + file_path = ( + checked_dir[path] + .data["path"] + .replace(checked_dir.data["path"], b"") + ).decode() + with tempfile.TemporaryDirectory() as tmpdir: + export_path = os.path.join( + tmpdir, os.path.basename(file_path) + ) + assert self.svnrepo is not None + self.svnrepo.export( + url=svn_urljoin(self.svnrepo.remote_url, file_path), + to=export_path, + rev=rev, + peg_rev=rev, + ignore_keywords=True, + overwrite=True, + ) + with open(export_path, "rb") as exported_file, open( + dir[path].data["path"], "rb" + ) as checkout_file: + diff_lines = difflib.diff_bytes( + difflib.unified_diff, + exported_file.read().split(b"\n"), + checkout_file.read().split(b"\n"), + ) + self.log.debug( + "below is diff between files:\n" + + os.fsdecode(b"\n".join(list(diff_lines)[2:])) + ) + err = ( "Hash tree computation divergence detected at revision %s " "(%s != %s), stopping!" diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -2349,5 +2349,9 @@ "directory with path b'trunk/data' has different hash in reconstructed repository filesystem", # noqa "content with path b'trunk/data/foo' has different hash in reconstructed repository filesystem", # noqa "directory with path b'trunk/data/baz' is missing in reconstructed repository filesystem", # noqa + "below is diff between files:", + "@@ -1 +1 @@", + "-foo", + "+baz", ): assert debug_log in caplog.text