Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342232
D7163.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D7163.diff
View Options
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -121,6 +121,7 @@
# state from previous visit
self.latest_snapshot = None
self.latest_revision: Optional[Revision] = None
+ self.from_dump = False
def pre_cleanup(self):
"""Cleanup potential dangling files from prior runs (e.g. OOM killed
@@ -160,9 +161,9 @@
"""
assert self.svnrepo is not None
local_dirname, local_url = self.svnrepo.export_temporary(revision)
- h = from_disk.Directory.from_disk(path=local_url).hash
+ root_dir = from_disk.Directory.from_disk(path=local_url)
self.svnrepo.clean_fs(local_dirname)
- return h
+ return root_dir.hash
def _latest_snapshot_revision(
self, origin_url: str,
@@ -392,7 +393,11 @@
try:
self.svnrepo = SvnRepo(
- self.svn_url, self.origin_url, local_dirname, self.max_content_size
+ self.svn_url,
+ self.origin_url,
+ local_dirname,
+ self.max_content_size,
+ self.from_dump,
)
except SubversionException as e:
error_msgs = [
@@ -577,6 +582,7 @@
self.archive_path = archive_path
self.temp_dir = None
self.repo_path = None
+ self.from_dump = True
def prepare(self):
self.log.info("Archive to mount and load %s", self.archive_path)
@@ -630,6 +636,7 @@
check_revision=check_revision,
max_content_size=max_content_size,
)
+ self.from_dump = True
self.temp_dir = self._create_tmp_dir(self.temp_directory)
self.repo_path = None
self.truncated_dump = False
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -53,9 +53,11 @@
origin_url: str,
local_dirname: str,
max_content_length: int,
+ from_dump: bool = False,
):
self.remote_url = remote_url.rstrip("/")
self.origin_url = origin_url
+ self.from_dump = from_dump
auth = Auth([get_username_provider()])
# one connection for log iteration
@@ -81,6 +83,12 @@
self.has_recursive_externals = False
self.replay_started = False
+ # compute root directory path from the remote repository URL, required to
+ # properly load the sub-tree of a repository mounted from a dump file
+ info = self.client.info(origin_url.rstrip("/"))
+ repos_root_url = next(iter(info.values())).repos_root_url
+ self.root_directory = origin_url.replace(repos_root_url, "", 1)
+
def __str__(self):
return str(
{
@@ -157,11 +165,21 @@
revprops.get(properties.PROP_REVISION_LOG, DEFAULT_AUTHOR_MESSAGE)
)
+ has_changes = (
+ not self.from_dump
+ or changed_paths is not None
+ and any(
+ changed_path.startswith(self.root_directory)
+ for changed_path in changed_paths.keys()
+ )
+ )
+
return {
"rev": rev,
"author_date": author_date,
"author_name": author,
"message": message,
+ "has_changes": has_changes,
}
def logs(self, revision_start: int, revision_end: int) -> Iterator[Dict]:
@@ -191,7 +209,7 @@
paths=None,
start=revision_start,
end=revision_end,
- discover_changed_paths=False,
+ discover_changed_paths=self.from_dump,
):
yield self.__to_entry(log_entry)
@@ -293,6 +311,14 @@
pass
else:
raise
+
+ if self.from_dump:
+ # when exporting a subpath of a subversion repository mounted from
+ # a dump file gnerated by svnrdump, exported paths are relative to
+ # the repository root path while they are relative to the subpath
+ # otherwise, so we need to adjust the URL of the exported filesystem
+ local_url = os.path.join(local_url, self.root_directory.strip("/"))
+
return local_dirname, os.fsencode(local_url)
def swh_hash_data_per_revision(
@@ -335,7 +361,10 @@
if rev >= start_revision:
# start yielding new data to archive once we reached the revision to
# resume the loading from
- yield rev, commit, objects, self.swhreplay.directory
+ if commit["has_changes"] or start_revision == 0:
+ # yield data only if commit has changes or if repository is empty
+ root_dir = self.swhreplay.directory[self.root_directory.encode()]
+ yield rev, commit, objects, root_dir
def swh_hash_data_at_revision(
self, revision: int
diff --git a/swh/loader/svn/tests/test_externals.py b/swh/loader/svn/tests/test_externals.py
--- a/swh/loader/svn/tests/test_externals.py
+++ b/swh/loader/svn/tests/test_externals.py
@@ -1175,6 +1175,11 @@
mock_client = mocker.MagicMock()
mocker.patch.object(client, "Client", mock_client)
+ class Info:
+ repos_root_url = repo_url
+
+ mock_client().info.return_value = {"repo": Info()}
+
loader = SvnLoaderFromRemoteDump(swh_storage, repo_url, temp_directory=tmp_path)
loader.load()
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -709,9 +709,10 @@
assert not os.path.exists(loader.temp_dir)
-def test_svn_loader_from_remote_dump(swh_storage, datadir, tmp_path):
+def test_svn_loader_from_remote_dump(swh_storage, datadir, tmpdir_factory):
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ tmp_path = tmpdir_factory.mktemp("repo1")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
loaderFromDump = SvnLoaderFromRemoteDump(
@@ -726,7 +727,10 @@
snapshot=GOURMET_SNAPSHOT.id,
)
- origin_url = repo_url + "2" # rename to another origin
+ # rename to another origin
+ tmp_path = tmpdir_factory.mktemp("repo2")
+ origin_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+
loader = SvnLoader(
swh_storage, repo_url, origin_url=origin_url, temp_directory=tmp_path
)
@@ -911,7 +915,6 @@
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- origin_url = f"svn://{archive_name}"
dump_filename = f"{archive_name}.dump"
with open(os.path.join(tmp_path, dump_filename), "wb") as dump_file:
@@ -922,7 +925,7 @@
# load svn repo from that compressed dump file
loader = SvnLoaderFromDumpArchive(
swh_storage,
- url=origin_url,
+ url=repo_url,
archive_path=os.path.join(tmp_path, f"{dump_filename}.gz"),
temp_directory=tmp_path,
)
@@ -931,7 +934,7 @@
assert_last_visit_matches(
loader.storage,
- origin_url,
+ repo_url,
status="full",
type="svn",
snapshot=GOURMET_SNAPSHOT.id,
@@ -1778,3 +1781,99 @@
loader.storage, repo_url, status="full", type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
+
+
+@pytest.mark.parametrize(
+ "svn_loader_cls", [SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump]
+)
+def test_loader_with_subprojects(swh_storage, repo_url, tmp_path, svn_loader_cls):
+
+ # first commit
+ add_commit(
+ repo_url,
+ "Add first project in repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="project1/foo.sh",
+ data=b"#!/bin/bash\necho foo",
+ ),
+ ],
+ )
+
+ # second commit
+ add_commit(
+ repo_url,
+ "Add second project in repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="project2/bar.sh",
+ data=b"#!/bin/bash\necho bar",
+ ),
+ ],
+ )
+
+ # third commit
+ add_commit(
+ repo_url,
+ "Add third project in repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="project3/baz.sh",
+ data=b"#!/bin/bash\necho baz",
+ ),
+ ],
+ )
+
+ def dump_project(origin_url):
+ svnrdump_cmd = ["svnrdump", "dump", origin_url]
+ dump_path = f"{tmp_path}/repo.dump"
+ with open(dump_path, "wb") as dump_file:
+ subprocess.run(svnrdump_cmd, stdout=dump_file)
+ subprocess.run(["gzip", dump_path])
+ return dump_path + ".gz"
+
+ for i in range(1, 4):
+ # load each project in the repository separately
+ origin_url = f"{repo_url}/project{i}"
+
+ loader_params = {
+ "storage": swh_storage,
+ "url": origin_url,
+ "origin_url": origin_url,
+ "temp_directory": tmp_path,
+ "incremental": True,
+ "check_revision": 1,
+ }
+
+ if svn_loader_cls == SvnLoaderFromDumpArchive:
+ loader_params["archive_path"] = dump_project(origin_url)
+
+ loader = svn_loader_cls(**loader_params)
+
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, origin_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+
+ if svn_loader_cls == SvnLoaderFromDumpArchive:
+ loader_params["archive_path"] = dump_project(origin_url)
+
+ loader = svn_loader_cls(**loader_params)
+
+ assert loader.load() == {"status": "uneventful"}
+
+ # each project origin must have
+ assert get_stats(loader.storage) == {
+ "content": i, # one content
+ "directory": 2 * i, # two directories
+ "origin": i,
+ "origin_visit": 2 * i, # two visits
+ "release": 0,
+ "revision": i, # one revision
+ "skipped_content": 0,
+ "snapshot": i, # one snapshot
+ }
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 12:34 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217212
Attached To
D7163: loader: Fix repo sub-tree loading when using dump loaders
Event Timeline
Log In to Comment