(swh) ✔ ~/swh/swh-environment/swh-loader-svn [master|✚ 2⚑ 10] 16:43 $ git diff diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py index eac96a3..698babe 100644 --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -223,15 +223,15 @@ Local repository not cleaned up for investigation: %s""", rev, commit, self.svnrepo.uuid, dir_id, parents ) - def check_history_not_altered( - self, svnrepo: SvnRepo, revision_start: int, swh_rev: Revision - ) -> bool: + def check_history_not_altered(self, revision_start: int, swh_rev: Revision) -> bool: """Given a svn repository, check if the history was modified in between visits. """ revision_id = swh_rev.id parents = swh_rev.parents - hash_data_per_revs = svnrepo.swh_hash_data_at_revision(revision_start) + + assert self.svnrepo is not None + hash_data_per_revs = self.svnrepo.swh_hash_data_at_revision(revision_start) rev = revision_start commit, root_dir = list(hash_data_per_revs)[0] @@ -279,9 +279,7 @@ Local repository not cleaned up for investigation: %s""", revision_start, ) - if not self.check_history_not_altered( - self.svnrepo, revision_start, self.latest_revision - ): + if not self.check_history_not_altered(revision_start, self.latest_revision): self.log.debug( ( "History of svn %s@%s altered. " @@ -782,6 +780,22 @@ class SvnLoaderFromRemoteDump(SvnLoader): # subversion origin and get the number of the last one last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url) + # Then check if the last loaded revision in the archive is different + # from the last revision on the remote subversion server. + # Skip the dump of all revisions and the loading process if they are identical + # to save some disk space and processing time. + last_loaded_snp_and_rev = self._latest_snapshot_revision(self.origin_url) + if last_loaded_snp_and_rev is not None: + last_loaded_snp, last_loaded_rev = last_loaded_snp_and_rev + self.svnrepo = SvnRepo( + self.origin_url, self.origin_url, self.temp_dir, self.max_content_size + ) + if self.check_history_not_altered(last_loaded_svn_rev, last_loaded_rev): + self._snapshot = last_loaded_snp + self._last_revision = last_loaded_rev + self.done = True + return + # Then try to generate a dump file containing relevant svn revisions # to load, an exception will be thrown if something wrong happened dump_path = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev)