Page MenuHomeSoftware Heritage

D6601.id24043.diff
No OneTemporary

D6601.id24043.diff

diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -11,7 +11,7 @@
import os
import shutil
import tempfile
-from typing import List, Tuple
+from typing import Dict, List, Tuple
import click
from subvertpy import delta, properties
@@ -134,6 +134,9 @@
__slots__ = ["directory", "path", "fullpath", "executable", "link"]
+ # keep track of non link file content with svn:special property set
+ svn_special_path_non_link_data: Dict[str, bytes] = {}
+
def __init__(self, directory, rootpath, path):
self.directory = directory
self.path = path
@@ -226,12 +229,32 @@
is_link, src = is_file_an_svnlink_p(self.fullpath)
if is_link:
self.__make_symlink(src)
- else: # not a real link...
+ else: # not a real link ...
self.link = False
+ # when a file with the svn:special property set is not a svn link,
+ # the svn export operation will extract a truncated version of that file
+ # if it contains a null byte (see create_special_file_from_stream
+ # implementation in libsvn_subr/subst.c), so ensure to produce the
+ # same file as the export operation.
+ with open(self.fullpath, "rb") as f:
+ content = f.read()
+ with open(self.fullpath, "wb") as f:
+ exported_data = content.split(b"\x00")[0]
+ if exported_data != content:
+ # keep track of original file content in order to restore
+ # it if the svn:special property gets unset in another revision
+ self.svn_special_path_non_link_data[self.fullpath] = content
+ f.write(exported_data)
elif os.path.islink(self.fullpath):
# path was a symbolic link in previous revision but got the property
# svn:special unset in current one, revert its content to svn link format
self.__make_svnlink()
+ elif self.fullpath in self.svn_special_path_non_link_data:
+ # path was a non link file with the svn:special property previously set
+ # and got truncated on export, restore its original content
+ with open(self.fullpath, "wb") as f:
+ f.write(self.svn_special_path_non_link_data[self.fullpath])
+ del self.svn_special_path_non_link_data[self.fullpath]
if not is_link: # if a link, do nothing regarding flag
if self.executable == EXEC_FLAG:
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -1179,3 +1179,58 @@
"skipped_content": 0,
"snapshot": 1,
}
+
+
+def test_loader_svn_special_property_on_binary_file_with_null_byte(
+ swh_storage, tmp_path
+):
+ """When a file has the svn:special property set but is not a svn link,
+ it will be truncated when performing an export operation if it contains
+ a null byte. Indeed, subversion will treat the file content as text but
+ it might be a binary file containing null bytes."""
+
+ # create a repository
+ repo_path = os.path.join(tmp_path, "tmprepo")
+ repos.create(repo_path)
+ repo_url = f"file://{repo_path}"
+
+ data = (
+ b"!<symlink>\xff\xfea\x00p\x00t\x00-\x00c\x00y\x00g\x00.\x00s\x00h\x00\x00\x00"
+ )
+
+ # first commit
+ add_commit(
+ repo_url,
+ "Add a non svn link binary file and set the svn:special property on it",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="binary_file",
+ properties={"svn:special": "*"},
+ data=data,
+ ),
+ ],
+ )
+
+ # second commit
+ add_commit(
+ repo_url,
+ "Remove the svn:special property on the previously added file",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="binary_file",
+ properties={"svn:special": None},
+ ),
+ ],
+ )
+
+ # instantiate a svn loader checking after each processed revision that
+ # the repository filesystem it reconstructed does not differ from a subversion
+ # export of that revision
+ loader = SvnLoader(
+ swh_storage, repo_url, destination_path=tmp_path, check_revision=1
+ )
+
+ assert loader.load() == {"status": "eventful"}
+ assert loader.visit_status() == "full"

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:29 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224730

Event Timeline