diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -314,10 +314,13 @@ if self.from_dump: # when exporting a subpath of a subversion repository mounted from - # a dump file gnerated by svnrdump, exported paths are relative to + # a dump file generated by svnrdump, exported paths are relative to # the repository root path while they are relative to the subpath # otherwise, so we need to adjust the URL of the exported filesystem - local_url = os.path.join(local_url, self.root_directory.strip("/")) + root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/")) + # check that root directory of a subproject did not get removed in revision + if os.path.exists(root_dir_local_url): + local_url = root_dir_local_url return local_dirname, os.fsencode(local_url) @@ -363,7 +366,13 @@ # resume the loading from if commit["has_changes"] or start_revision == 0: # yield data only if commit has changes or if repository is empty - root_dir = self.swhreplay.directory[self.root_directory.encode()] + root_dir_path = self.root_directory.encode()[1:] + if not root_dir_path or root_dir_path in self.swhreplay.directory: + root_dir = self.swhreplay.directory[root_dir_path] + else: + # root directory of subproject got removed in revision, return + # empty directory for that edge case + root_dir = DirectoryFromDisk() yield rev, commit, objects, root_dir def swh_hash_data_at_revision( diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1736,6 +1736,15 @@ check_snapshot(GOURMET_SNAPSHOT, loader.storage) +def _dump_project(tmp_path, origin_url): + svnrdump_cmd = ["svnrdump", "dump", origin_url] + dump_path = f"{tmp_path}/repo.dump" + with open(dump_path, "wb") as dump_file: + subprocess.run(svnrdump_cmd, stdout=dump_file) + subprocess.run(["gzip", dump_path]) + return dump_path + ".gz" + + def test_loader_svn_add_property_on_directory_link(swh_storage, repo_url, tmp_path): # first commit @@ -1827,14 +1836,6 @@ ], ) - def dump_project(origin_url): - svnrdump_cmd = ["svnrdump", "dump", origin_url] - dump_path = f"{tmp_path}/repo.dump" - with open(dump_path, "wb") as dump_file: - subprocess.run(svnrdump_cmd, stdout=dump_file) - subprocess.run(["gzip", dump_path]) - return dump_path + ".gz" - for i in range(1, 4): # load each project in the repository separately origin_url = f"{repo_url}/project{i}" @@ -1849,7 +1850,7 @@ } if svn_loader_cls == SvnLoaderFromDumpArchive: - loader_params["archive_path"] = dump_project(origin_url) + loader_params["archive_path"] = _dump_project(tmp_path, origin_url) loader = svn_loader_cls(**loader_params) @@ -1860,7 +1861,7 @@ check_snapshot(loader.snapshot, loader.storage) if svn_loader_cls == SvnLoaderFromDumpArchive: - loader_params["archive_path"] = dump_project(origin_url) + loader_params["archive_path"] = _dump_project(tmp_path, origin_url) loader = svn_loader_cls(**loader_params) @@ -1877,3 +1878,73 @@ "skipped_content": 0, "snapshot": i, # one snapshot } + + +@pytest.mark.parametrize( + "svn_loader_cls", [SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump] +) +def test_loader_subproject_root_dir_removal( + swh_storage, repo_url, tmp_path, svn_loader_cls +): + + # first commit + add_commit( + repo_url, + "Add project in repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project/foo.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Remove project root directory", + [CommitChange(change_type=CommitChangeType.Delete, path="project/")], + ) + + # third commit + add_commit( + repo_url, + "Re-add project in repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project/foo.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + origin_url = f"{repo_url}/project" + + loader_params = { + "storage": swh_storage, + "url": origin_url, + "origin_url": origin_url, + "temp_directory": tmp_path, + "incremental": True, + "check_revision": 1, + } + + if svn_loader_cls == SvnLoaderFromDumpArchive: + loader_params["archive_path"] = _dump_project(tmp_path, origin_url) + + loader = svn_loader_cls(**loader_params) + + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, origin_url, status="full", type="svn", + ) + check_snapshot(loader.snapshot, loader.storage) + + if svn_loader_cls == SvnLoaderFromDumpArchive: + loader_params["archive_path"] = _dump_project(tmp_path, origin_url) + + loader = svn_loader_cls(**loader_params) + + assert loader.load() == {"status": "uneventful"}