diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -125,6 +125,9 @@ # EOL state check mess EOL_STYLE = {} +# keep track of non link file content with svn:special property set +SVN_SPECIAL_PATH_NON_LINK_DATA = {} + class FileEditor: """File Editor in charge of updating file on disk and memory objects. @@ -225,12 +228,32 @@ is_link, src = is_file_an_svnlink_p(self.fullpath) if is_link: self.__make_symlink(src) - else: # not a real link... + else: # not a real link ... self.link = False + # when a file with the svn:special property set is not a svn link, + # the svn export operation will extract a truncated version of that file + # if it contains a null byte (see create_special_file_from_stream + # implementation in libsvn_subr/subst.c), so ensure to produce the + # same file as the export operation. + with open(self.fullpath, "rb") as f: + content = f.read() + with open(self.fullpath, "wb") as f: + exported_data = content.split(b"\x00")[0] + if exported_data != content: + # keep track of original file content in order to restore + # it if the svn:special property gets unset in another revision + SVN_SPECIAL_PATH_NON_LINK_DATA[self.fullpath] = content + f.write(exported_data) elif os.path.islink(self.fullpath): # path was a symbolic link in previous revision but got the property # svn:special unset in current one, revert its content to svn link format self.__make_svnlink() + elif self.fullpath in SVN_SPECIAL_PATH_NON_LINK_DATA: + # path was a non link file with the svn:special property previously set + # and got truncated on export, restore its original content + with open(self.fullpath, "wb") as f: + f.write(SVN_SPECIAL_PATH_NON_LINK_DATA[self.fullpath]) + del SVN_SPECIAL_PATH_NON_LINK_DATA[self.fullpath] if not is_link: # if a link, do nothing regarding flag if self.executable == EXEC_FLAG: diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1076,3 +1076,54 @@ loader.storage.content_get_data(paths[b"external_link.txt"]["sha1"]) == b"link /home/user/data.txt" ) + + +def test_loader_svn_special_property_on_binary_file_with_null_bytes( + swh_storage, tmp_path +): + + # create a repository + repo_path = os.path.join(tmp_path, "tmprepo") + repos.create(repo_path) + repo_url = f"file://{repo_path}" + + data = ( + b"!\xff\xfea\x00p\x00t\x00-\x00c\x00y\x00g\x00.\x00s\x00h\x00\x00\x00" + ) + + # first commit + add_commit( + repo_url, + ("commit 1"), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="binary_file", + properties={"svn:special": "*"}, + data=data, + ), + ], + ) + + # second commit + add_commit( + repo_url, + ("commit 1"), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="binary_file", + properties={"svn:special": None}, + ), + ], + ) + + # instantiate a svn loader checking after each processed revision that + # the repository filesystem it reconstructed does not differ from a subversion + # export of that revision + loader = SvnLoader( + swh_storage, repo_url, destination_path=tmp_path, check_revision=1 + ) + + assert loader.load() == {"status": "eventful"} + assert loader.visit_status() == "full"