diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -11,7 +11,7 @@ import os import shutil import tempfile -from typing import List, Tuple +from typing import Dict, List, Tuple import click from subvertpy import delta, properties @@ -133,6 +133,9 @@ __slots__ = ["directory", "path", "fullpath", "executable", "link"] + # keep track of non link file content with svn:special property set + svn_special_path_non_link_data: Dict[str, bytes] = {} + def __init__(self, directory, rootpath, path): self.directory = directory self.path = path @@ -225,12 +228,32 @@ is_link, src = is_file_an_svnlink_p(self.fullpath) if is_link: self.__make_symlink(src) - else: # not a real link... + else: # not a real link ... self.link = False + # when a file with the svn:special property set is not a svn link, + # the svn export operation will extract a truncated version of that file + # if it contains a null byte (see create_special_file_from_stream + # implementation in libsvn_subr/subst.c), so ensure to produce the + # same file as the export operation. + with open(self.fullpath, "rb") as f: + content = f.read() + with open(self.fullpath, "wb") as f: + exported_data = content.split(b"\x00")[0] + if exported_data != content: + # keep track of original file content in order to restore + # it if the svn:special property gets unset in another revision + self.svn_special_path_non_link_data[self.fullpath] = content + f.write(exported_data) elif os.path.islink(self.fullpath): # path was a symbolic link in previous revision but got the property # svn:special unset in current one, revert its content to svn link format self.__make_svnlink() + elif self.fullpath in self.svn_special_path_non_link_data: + # path was a non link file with the svn:special property previously set + # and got truncated on export, restore its original content + with open(self.fullpath, "wb") as f: + f.write(self.svn_special_path_non_link_data[self.fullpath]) + del self.svn_special_path_non_link_data[self.fullpath] if not is_link: # if a link, do nothing regarding flag if self.executable == EXEC_FLAG: diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1076,3 +1076,58 @@ loader.storage.content_get_data(paths[b"external_link.txt"]["sha1"]) == b"link /home/user/data.txt" ) + + +def test_loader_svn_special_property_on_binary_file_with_null_byte( + swh_storage, tmp_path +): + """When a file has the svn:special property set but is not a svn link, + it will be truncated when performing an export operation if it contains + a null byte. Indeed, subversion will treat the file content as text but + it might be a binary file containing null bytes.""" + + # create a repository + repo_path = os.path.join(tmp_path, "tmprepo") + repos.create(repo_path) + repo_url = f"file://{repo_path}" + + data = ( + b"!\xff\xfea\x00p\x00t\x00-\x00c\x00y\x00g\x00.\x00s\x00h\x00\x00\x00" + ) + + # first commit + add_commit( + repo_url, + "Add a non svn link binary file and set the svn:special property on it", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="binary_file", + properties={"svn:special": "*"}, + data=data, + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Remove the svn:special property on the previously added file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="binary_file", + properties={"svn:special": None}, + ), + ], + ) + + # instantiate a svn loader checking after each processed revision that + # the repository filesystem it reconstructed does not differ from a subversion + # export of that revision + loader = SvnLoader( + swh_storage, repo_url, destination_path=tmp_path, check_revision=1 + ) + + assert loader.load() == {"status": "eventful"} + assert loader.visit_status() == "full"