Page MenuHomeSoftware Heritage

D8793.id31700.diff
No OneTemporary

D8793.id31700.diff

diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py
--- a/swh/loader/svn/replay.py
+++ b/swh/loader/svn/replay.py
@@ -443,14 +443,30 @@
svnrepo=self.svnrepo,
)
- def add_directory(self, path: str, *args) -> DirEditor:
+ def add_directory(
+ self, path: str, copyfrom_path: Optional[str] = None, copyfrom_rev: int = -1
+ ) -> DirEditor:
"""Adding a new directory."""
path_bytes = os.fsencode(path)
+ fullpath = os.path.join(self.rootpath, path_bytes)
- os.makedirs(os.path.join(self.rootpath, path_bytes), exist_ok=True)
- if path_bytes and path_bytes not in self.directory:
- self.dir_states[path_bytes] = DirState()
- self.directory[path_bytes] = from_disk.Directory()
+ os.makedirs(fullpath, exist_ok=True)
+ if copyfrom_rev == -1:
+ if path_bytes and path_bytes not in self.directory:
+ self.dir_states[path_bytes] = DirState()
+ self.directory[path_bytes] = from_disk.Directory()
+ else:
+ url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path)
+ self.remove_child(path_bytes)
+ self.svnrepo.export(
+ url,
+ to=fullpath,
+ peg_rev=copyfrom_rev,
+ ignore_keywords=True,
+ overwrite=True,
+ ignore_externals=True,
+ )
+ self.directory[path_bytes] = from_disk.Directory.from_disk(path=fullpath)
return DirEditor(
self.directory,
@@ -474,12 +490,28 @@
svnrepo=self.svnrepo,
)
- def add_file(self, path: str, *args) -> FileEditor:
+ def add_file(
+ self, path: str, copyfrom_path: Optional[str] = None, copyfrom_rev: int = -1
+ ) -> FileEditor:
"""Creating a new file."""
path_bytes = os.fsencode(path)
- self.directory[path_bytes] = from_disk.Content()
fullpath = os.path.join(self.rootpath, path_bytes)
+
self.file_states[fullpath] = FileState()
+ if copyfrom_rev == -1:
+ self.directory[path_bytes] = from_disk.Content()
+ else:
+ url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path)
+ self.remove_child(path_bytes)
+ self.svnrepo.export(
+ url,
+ to=fullpath,
+ peg_rev=copyfrom_rev,
+ ignore_keywords=True,
+ overwrite=True,
+ )
+ self.directory[path_bytes] = from_disk.Content.from_file(path=fullpath)
+
return FileEditor(
self.directory,
self.rootpath,
@@ -931,7 +963,7 @@
rootpath=rootpath, directory=directory, svnrepo=svnrepo, temp_dir=temp_dir
)
- def replay(self, rev: int) -> from_disk.Directory:
+ def replay(self, rev: int, low_water_mark: int) -> from_disk.Directory:
"""Replay svn actions between rev and rev+1.
This method updates in place the self.editor.directory, as well as the
@@ -942,12 +974,12 @@
"""
codecs.register_error("strict", _ra_codecs_error_handler)
- self.conn.replay(rev, rev + 1, self.editor)
+ self.conn.replay(rev, low_water_mark, self.editor)
codecs.register_error("strict", codecs.strict_errors)
return self.editor.directory
def compute_objects(
- self, rev: int
+ self, rev: int, low_water_mark: int
) -> Tuple[List[Content], List[SkippedContent], List[Directory]]:
"""Compute objects added or modified at revisions rev.
Expects the state to be at previous revision's objects.
@@ -960,7 +992,7 @@
mutates the filesystem at rootpath accordingly.
"""
- self.replay(rev)
+ self.replay(rev, low_water_mark)
contents: List[Content] = []
skipped_contents: List[SkippedContent] = []
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -220,6 +220,7 @@
"author_name": author,
"message": message,
"has_changes": has_changes,
+ "changed_paths": changed_paths,
}
def logs(self, revision_start: int, revision_end: int) -> Iterator[Dict]:
@@ -249,7 +250,7 @@
paths=None,
start=revision_start,
end=revision_end,
- discover_changed_paths=self.from_dump,
+ discover_changed_paths=True,
):
yield self.__to_entry(log_entry)
@@ -539,7 +540,23 @@
first_revision = 1 if start_revision else 0 # handle empty repository edge case
for commit in self.logs(first_revision, end_revision):
rev = commit["rev"]
- objects = self.swhreplay.compute_objects(rev)
+ copyfrom_revs = (
+ [
+ copyfrom_rev
+ for (_, _, copyfrom_rev, _) in commit["changed_paths"].values()
+ if copyfrom_rev != -1
+ ]
+ if commit["changed_paths"]
+ else None
+ )
+ low_water_mark = rev + 1
+ if copyfrom_revs:
+ # when files or directories in the revision to replay have been copied from
+ # ancestor revisions, we need to adjust the low water mark revision used by
+ # svn replay API to handle the copies in our commit editor and to ensure
+ # replace operations after copy will be replayed
+ low_water_mark = min(copyfrom_revs)
+ objects = self.swhreplay.compute_objects(rev, low_water_mark)
if rev >= start_revision:
# start yielding new data to archive once we reached the revision to
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2197,3 +2197,92 @@
with open(dest_path, "rb") as f:
assert f.read() == content
+
+
+@pytest.mark.parametrize("svn_loader_cls", [SvnLoader, SvnLoaderFromRemoteDump])
+def test_loader_repo_with_copyfrom_and_replace_operations(
+ swh_storage, repo_url, tmp_path, svn_loader_cls
+):
+ add_commit(
+ repo_url,
+ "Create trunk/data folder",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/data/foo",
+ data=b"foo",
+ ),
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/data/bar",
+ data=b"bar",
+ ),
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/data/baz/",
+ ),
+ ],
+ )
+
+ add_commit(
+ repo_url,
+ "Create trunk/project folder",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/project/",
+ ),
+ ],
+ )
+
+ add_commit(
+ repo_url,
+ "Create trunk/project/bar as copy of trunk/data/bar from revision 1",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/project/bar",
+ copyfrom_path=repo_url + "/trunk/data/bar",
+ copyfrom_rev=1,
+ ),
+ ],
+ )
+
+ add_commit(
+ repo_url,
+ (
+ "Create trunk/project/data/ folder as a copy of /trunk/data from revision 1"
+ " and replace the trunk/project/data/baz/ folder by a trunk/project/data/baz file"
+ ),
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/project/data/",
+ copyfrom_path=repo_url + "/trunk/data/",
+ copyfrom_rev=1,
+ ),
+ CommitChange(
+ change_type=CommitChangeType.Delete,
+ path="trunk/project/data/baz/",
+ ),
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/project/data/baz",
+ data=b"baz",
+ ),
+ ],
+ )
+
+ loader = svn_loader_cls(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1
+ )
+
+ assert loader.load() == {"status": "eventful"}
+
+ assert_last_visit_matches(
+ loader.storage,
+ repo_url,
+ status="full",
+ type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
diff --git a/swh/loader/svn/tests/utils.py b/swh/loader/svn/tests/utils.py
--- a/swh/loader/svn/tests/utils.py
+++ b/swh/loader/svn/tests/utils.py
@@ -23,6 +23,8 @@
path: str
properties: Dict[str, str]
data: bytes
+ copyfrom_path: str
+ copyfrom_rev: int
def add_commit(repo_url: str, message: str, changes: List[CommitChange]) -> None:
@@ -35,17 +37,19 @@
else:
dir_change = change["path"].endswith("/")
split_path = change["path"].rstrip("/").split("/")
+ copyfrom_path = change.get("copyfrom_path")
+ copyfrom_rev = change.get("copyfrom_rev", -1)
for i in range(len(split_path)):
path = "/".join(split_path[0 : i + 1])
if i < len(split_path) - 1:
try:
- root.add_directory(path).close()
+ root.add_directory(path, copyfrom_path, copyfrom_rev).close()
except SubversionException:
pass
else:
if dir_change:
try:
- dir = root.add_directory(path)
+ dir = root.add_directory(path, copyfrom_path, copyfrom_rev)
except SubversionException:
dir = root.open_directory(path)
if "properties" in change:
@@ -54,7 +58,7 @@
dir.close()
else:
try:
- file = root.add_file(path)
+ file = root.add_file(path, copyfrom_path, copyfrom_rev)
except SubversionException:
file = root.open_file(path)
if "properties" in change:

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 17, 4:47 PM (2 h, 4 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227470

Event Timeline