diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -186,17 +186,6 @@ ): yield self.__to_entry(log_entry) - def export(self, revision: int) -> None: - """Export the repository to a given version. - - """ - self.client.export( - self.remote_url, - to=self.local_url.decode("utf-8"), - rev=revision, - ignore_keywords=True, - ) - def export_temporary(self, revision: int) -> Tuple[str, bytes]: """Export the repository to a given revision in a temporary location. This is up to the caller of this function to clean up the temporary location when done (cf. @@ -272,27 +261,25 @@ self, revision: int ) -> Iterator[Tuple[Dict, DirectoryFromDisk]]: """Compute the information at a given svn revision. This is expected to be used - for update only. + for checks only. Yields: The tuple (commit dictionary, targeted directory object). """ # Update disk representation of the repository at revision id - self.export(revision) + local_dirname, local_url = self.export_temporary(revision) # Compute the current hashes on disk directory = DirectoryFromDisk.from_disk( - path=os.fsencode(self.local_url), max_content_length=self.max_content_length - ) - - # Update the replay collaborator with the right state - self.swhreplay = ra.Replay( - conn=self.conn, rootpath=self.local_url, directory=directory + path=local_url, max_content_length=self.max_content_length ) # Retrieve the commit information for revision commit = list(self.logs(revision, revision))[0] + # Clean export directory + self.clean_fs(local_dirname) + yield commit, directory def clean_fs(self, local_dirname: Optional[str] = None) -> None: diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1558,3 +1558,64 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + + +def test_svn_loader_incremental_replay_start_with_empty_directory( + swh_storage, mocker, tmp_path +): + # create a repository + repo_path = os.path.join(tmp_path, "tmprepo") + repos.create(repo_path) + repo_url = f"file://{repo_path}" + + # first commit + add_commit( + repo_url, + ("Add a file"), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, path="foo.txt", data=b"foo\n", + ) + ], + ) + + # first load + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + # second commit + add_commit( + repo_url, + "Modify previously added file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, path="foo.txt", data=b"bar\n", + ) + ], + ) + + class SvnRepoCheckReplayStartWithEmptyDirectory(SvnRepo): + def swh_hash_data_per_revision(self, start_revision: int, end_revision: int): + """Overrides swh_hash_data_per_revision method to grab the content + of the directory where the svn revisions will be replayed before that + process starts.""" + self.replay_dir_content_before_start = [ + os.path.join(root, name) + for root, _, files in os.walk(self.local_url) + for name in files + ] + yield from super().swh_hash_data_per_revision(start_revision, end_revision) + + from swh.loader.svn import loader + + mocker.patch.object(loader, "SvnRepo", SvnRepoCheckReplayStartWithEmptyDirectory) + + # second load, incremental + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) + loader.load() + + # check work directory was empty before replaying revisions + assert loader.svnrepo.replay_dir_content_before_start == []