diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -250,7 +250,11 @@ - complete Directory representation """ - for commit in self.logs(start_revision, end_revision): + # even in incremental loading mode, we need to replay the whole set of + # path modifications from first revision to restore possible file states induced + # by setting svn properties on those files (end of line style for instance) + first_revision = 1 if start_revision else 0 # handle empty repository edge case + for commit in self.logs(first_revision, end_revision): rev = commit["rev"] objects = self.swhreplay.compute_objects(rev) @@ -259,7 +263,10 @@ else: nextrev = rev + 1 - yield rev, nextrev, commit, objects, self.swhreplay.directory + if rev >= start_revision: + # start yielding new data to archive once we reached the revision to + # resume the loading from + yield rev, nextrev, commit, objects, self.swhreplay.directory def swh_hash_data_at_revision( self, revision: int diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1456,3 +1456,75 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + + +def test_svn_loader_incremental(swh_storage, tmp_path): + # create a repository + repo_path = os.path.join(tmp_path, "tmprepo") + repos.create(repo_path) + repo_url = f"file://{repo_path}" + + # first commit + add_commit( + repo_url, + ( + "Add a directory containing a file with CRLF end of line " + "and set svn:eol-style property to native so CRLF will be " + "replaced by LF in the file when exporting the revision" + ), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="file_with_crlf_eol.txt", + properties={"svn:eol-style": "native"}, + data=b"Hello world!\r\n", + ) + ], + ) + + # first load + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + # second commit + add_commit( + repo_url, + "Modify previously added file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="file_with_crlf_eol.txt", + data=b"Hello World!\r\n", + ) + ], + ) + + # second load, incremental + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + # third commit + add_commit( + repo_url, + "Unset svn:eol-style property on file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="file_with_crlf_eol.txt", + properties={"svn:eol-style": None}, + ) + ], + ) + + # third load, incremental + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + )