Page MenuHomeSoftware Heritage

D6658.id24197.diff
No OneTemporary

D6658.id24197.diff

diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -223,15 +223,15 @@
rev, commit, self.svnrepo.uuid, dir_id, parents
)
- def check_history_not_altered(
- self, svnrepo: SvnRepo, revision_start: int, swh_rev: Revision
- ) -> bool:
+ def check_history_not_altered(self, revision_start: int, swh_rev: Revision) -> bool:
"""Given a svn repository, check if the history was modified in between visits.
"""
revision_id = swh_rev.id
parents = swh_rev.parents
- hash_data_per_revs = svnrepo.swh_hash_data_at_revision(revision_start)
+
+ assert self.svnrepo is not None
+ hash_data_per_revs = self.svnrepo.swh_hash_data_at_revision(revision_start)
rev = revision_start
commit, root_dir = list(hash_data_per_revs)[0]
@@ -279,9 +279,7 @@
revision_start,
)
- if not self.check_history_not_altered(
- self.svnrepo, revision_start, self.latest_revision
- ):
+ if not self.check_history_not_altered(revision_start, self.latest_revision):
self.log.debug(
(
"History of svn %s@%s altered. "
@@ -782,6 +780,22 @@
# subversion origin and get the number of the last one
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url)
+ # Then check if the last loaded revision in the archive is different
+ # from the last revision on the remote subversion server.
+ # Skip the dump of all revisions and the loading process if they are identical
+ # to save some disk space and processing time.
+ last_loaded_snp_and_rev = self._latest_snapshot_revision(self.origin_url)
+ if last_loaded_snp_and_rev is not None:
+ last_loaded_snp, last_loaded_rev = last_loaded_snp_and_rev
+ self.svnrepo = SvnRepo(
+ self.origin_url, self.origin_url, self.temp_dir, self.max_content_size
+ )
+ if self.check_history_not_altered(last_loaded_svn_rev, last_loaded_rev):
+ self._snapshot = last_loaded_snp
+ self._last_revision = last_loaded_rev
+ self.done = True
+ return
+
# Then try to generate a dump file containing relevant svn revisions
# to load, an exception will be thrown if something wrong happened
dump_path = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev)
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -742,14 +742,7 @@
assert not os.path.exists(loader.temp_dir)
-def test_loader_svn_loader_from_remote_dump(swh_storage, datadir, tmp_path):
- """Repository with wrong symlinks should be ingested ok nonetheless
-
- Edge case:
- - wrong symbolic link
- - wrong symbolic link with empty space names
-
- """
+def test_svn_loader_from_remote_dump(swh_storage, datadir, tmp_path):
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
@@ -810,6 +803,53 @@
assert loaderFromDump.load() == {"status": "uneventful"}
+def test_svn_loader_from_remote_dump_multiple_load_on_stale_repo(
+ swh_storage, datadir, tmp_path, mocker
+):
+ archive_name = "pkg-gourmet"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+
+ # first load: a dump file will be created, mounted to a local repository
+ # and the latter will be loaded into the archive
+ loaderFromDump = SvnLoaderFromRemoteDump(
+ swh_storage, repo_url, temp_directory=tmp_path
+ )
+ assert loaderFromDump.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loaderFromDump.storage,
+ repo_url,
+ status="full",
+ type="svn",
+ snapshot=GOURMET_SNAPSHOT.id,
+ )
+
+ # second load on same repository: the loader will detect there is no changes
+ # since last load and will skip the dump, mount and load phases
+ loaderFromDump = SvnLoaderFromRemoteDump(
+ swh_storage, repo_url, temp_directory=tmp_path
+ )
+
+ loaderFromDump.dump_svn_revisions = mocker.MagicMock()
+ init_svn_repo_from_dump = mocker.patch(
+ "swh.loader.svn.loader.init_svn_repo_from_dump"
+ )
+ loaderFromDump.process_svn_revisions = mocker.MagicMock()
+
+ assert loaderFromDump.load() == {"status": "uneventful"}
+ assert_last_visit_matches(
+ loaderFromDump.storage,
+ repo_url,
+ status="full",
+ type="svn",
+ snapshot=GOURMET_SNAPSHOT.id,
+ )
+
+ loaderFromDump.dump_svn_revisions.assert_not_called()
+ init_svn_repo_from_dump.assert_not_called()
+ loaderFromDump.process_svn_revisions.assert_not_called()
+
+
def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 3:24 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228733

Event Timeline