Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066579
D6658.id24197.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D6658.id24197.diff
View Options
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -223,15 +223,15 @@
rev, commit, self.svnrepo.uuid, dir_id, parents
)
- def check_history_not_altered(
- self, svnrepo: SvnRepo, revision_start: int, swh_rev: Revision
- ) -> bool:
+ def check_history_not_altered(self, revision_start: int, swh_rev: Revision) -> bool:
"""Given a svn repository, check if the history was modified in between visits.
"""
revision_id = swh_rev.id
parents = swh_rev.parents
- hash_data_per_revs = svnrepo.swh_hash_data_at_revision(revision_start)
+
+ assert self.svnrepo is not None
+ hash_data_per_revs = self.svnrepo.swh_hash_data_at_revision(revision_start)
rev = revision_start
commit, root_dir = list(hash_data_per_revs)[0]
@@ -279,9 +279,7 @@
revision_start,
)
- if not self.check_history_not_altered(
- self.svnrepo, revision_start, self.latest_revision
- ):
+ if not self.check_history_not_altered(revision_start, self.latest_revision):
self.log.debug(
(
"History of svn %s@%s altered. "
@@ -782,6 +780,22 @@
# subversion origin and get the number of the last one
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url)
+ # Then check if the last loaded revision in the archive is different
+ # from the last revision on the remote subversion server.
+ # Skip the dump of all revisions and the loading process if they are identical
+ # to save some disk space and processing time.
+ last_loaded_snp_and_rev = self._latest_snapshot_revision(self.origin_url)
+ if last_loaded_snp_and_rev is not None:
+ last_loaded_snp, last_loaded_rev = last_loaded_snp_and_rev
+ self.svnrepo = SvnRepo(
+ self.origin_url, self.origin_url, self.temp_dir, self.max_content_size
+ )
+ if self.check_history_not_altered(last_loaded_svn_rev, last_loaded_rev):
+ self._snapshot = last_loaded_snp
+ self._last_revision = last_loaded_rev
+ self.done = True
+ return
+
# Then try to generate a dump file containing relevant svn revisions
# to load, an exception will be thrown if something wrong happened
dump_path = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev)
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -742,14 +742,7 @@
assert not os.path.exists(loader.temp_dir)
-def test_loader_svn_loader_from_remote_dump(swh_storage, datadir, tmp_path):
- """Repository with wrong symlinks should be ingested ok nonetheless
-
- Edge case:
- - wrong symbolic link
- - wrong symbolic link with empty space names
-
- """
+def test_svn_loader_from_remote_dump(swh_storage, datadir, tmp_path):
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
@@ -810,6 +803,53 @@
assert loaderFromDump.load() == {"status": "uneventful"}
+def test_svn_loader_from_remote_dump_multiple_load_on_stale_repo(
+ swh_storage, datadir, tmp_path, mocker
+):
+ archive_name = "pkg-gourmet"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+
+ # first load: a dump file will be created, mounted to a local repository
+ # and the latter will be loaded into the archive
+ loaderFromDump = SvnLoaderFromRemoteDump(
+ swh_storage, repo_url, temp_directory=tmp_path
+ )
+ assert loaderFromDump.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loaderFromDump.storage,
+ repo_url,
+ status="full",
+ type="svn",
+ snapshot=GOURMET_SNAPSHOT.id,
+ )
+
+ # second load on same repository: the loader will detect there is no changes
+ # since last load and will skip the dump, mount and load phases
+ loaderFromDump = SvnLoaderFromRemoteDump(
+ swh_storage, repo_url, temp_directory=tmp_path
+ )
+
+ loaderFromDump.dump_svn_revisions = mocker.MagicMock()
+ init_svn_repo_from_dump = mocker.patch(
+ "swh.loader.svn.loader.init_svn_repo_from_dump"
+ )
+ loaderFromDump.process_svn_revisions = mocker.MagicMock()
+
+ assert loaderFromDump.load() == {"status": "uneventful"}
+ assert_last_visit_matches(
+ loaderFromDump.storage,
+ repo_url,
+ status="full",
+ type="svn",
+ snapshot=GOURMET_SNAPSHOT.id,
+ )
+
+ loaderFromDump.dump_svn_revisions.assert_not_called()
+ init_svn_repo_from_dump.assert_not_called()
+ loaderFromDump.process_svn_revisions.assert_not_called()
+
+
def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 3:24 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228733
Attached To
D6658: loader: Optimize SvnLoaderFromRemoteDump use on stale repository
Event Timeline
Log In to Comment