diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -36,6 +36,7 @@ from subvertpy.ra import Auth, RemoteAccess, get_username_provider from swh.model import from_disk, hashutil +from swh.model.from_disk import DiskBackedContent from swh.model.model import Content, Directory, SkippedContent if TYPE_CHECKING: @@ -205,6 +206,8 @@ self.svnrepo = svnrepo self.editor = svnrepo.swhreplay.editor + self.editor.modified_paths.add(path) + def change_prop(self, key: str, value: str) -> None: if key == properties.PROP_EXECUTABLE: if value is None: # bit flip off @@ -392,6 +395,10 @@ self.editor = svnrepo.swhreplay.editor self.externals: Dict[str, Tuple[str, Optional[int], bool]] = {} + # repository root dir has empty path + if path: + self.editor.modified_paths.add(path) + def remove_child(self, path: bytes) -> None: """Remove a path from the current objects. @@ -424,6 +431,8 @@ if state_path.startswith(fullpath + b"/"): del self.file_states[state_path] + self.editor.modified_paths.discard(path) + def open_directory(self, path: str, *args) -> DirEditor: """Updating existing directory. @@ -617,6 +626,9 @@ self.remove_child(dest_fullpath) # copy exported path to reconstructed filesystem fullpath = os.path.join(self.rootpath, dest_fullpath) + + self.editor.external_paths.add(dest_fullpath) + self.editor.modified_paths.add(dest_fullpath) # update from_disk model and store external paths self.editor.external_paths.add(dest_fullpath) if os.path.isfile(temp_path): @@ -629,13 +641,16 @@ self.directory[dest_fullpath] = from_disk.Directory.from_disk( path=fullpath ) + external_paths = set() for root, dirs, files in os.walk(fullpath): - self.editor.external_paths.update( + external_paths.update( [ os.path.join(root.replace(self.rootpath + b"/", b""), p) for p in chain(dirs, files) ] ) + self.editor.external_paths.update(external_paths) + self.editor.modified_paths.update(external_paths) # ensure hash update for the directory with externals set self.directory[self.path].update_hash(force=True) @@ -713,6 +728,8 @@ self.externals_cache: Dict[Tuple[str, Optional[int]], str] = {} self.svnrepo = svnrepo self.revnum = None + # to store the set of paths added or modified when replaying a revision + self.modified_paths: Set[bytes] = set() def set_target_revision(self, revnum) -> None: self.revnum = revnum @@ -724,6 +741,8 @@ pass def open_root(self, base_revnum: int) -> DirEditor: + # a new revision is being replayed so clear the modified_paths set + self.modified_paths.clear() return DirEditor( self.directory, rootpath=self.rootpath, @@ -773,7 +792,7 @@ def compute_objects( self, rev: int ) -> Tuple[List[Content], List[SkippedContent], List[Directory]]: - """Compute objects at revisions rev. + """Compute objects added or modified at revisions rev. Expects the state to be at previous revision's objects. Args: @@ -785,7 +804,23 @@ """ self.replay(rev) - return from_disk.iter_directory(self.directory) + + contents: List[Content] = [] + skipped_contents: List[SkippedContent] = [] + directories: List[Directory] = [] + + directories.append(self.editor.directory.to_model()) + for path in self.editor.modified_paths: + obj = self.directory[path].to_model() + obj_type = obj.object_type + if obj_type in (Content.object_type, DiskBackedContent.object_type): + contents.append(obj.with_data()) + elif obj_type == SkippedContent.object_type: + skipped_contents.append(obj) + elif obj_type == Directory.object_type: + directories.append(obj) + + return contents, skipped_contents, directories @click.command() diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -149,6 +149,7 @@ actual_visit = assert_last_visit_matches( swh_storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) loader2 = SvnLoaderFromRemoteDump( swh_storage, repo_url, temp_directory=loading_path @@ -181,6 +182,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) stats = get_stats(loader.storage) assert stats == { @@ -215,6 +217,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) assert loader.load() == {"status": "uneventful"} visit_status2 = assert_last_visit_matches( @@ -287,6 +290,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") repo_tampered_url = prepare_repository_from_archive( @@ -305,6 +309,7 @@ type="svn", snapshot=hash_to_bytes("5aa61959e788e281fd6e187053d0f46c68e8d8bb"), ) + check_snapshot(loader.snapshot, loader.storage) stats = get_stats(loader.storage) assert stats["origin"] == 1 @@ -335,6 +340,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") repo_updated_url = prepare_repository_from_archive( @@ -422,6 +428,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) start_revision = loader.storage.revision_get( [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] @@ -899,6 +906,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): @@ -1068,6 +1076,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert get_stats(loader.storage) == { "content": 2, @@ -1128,6 +1137,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1214,6 +1224,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1273,6 +1284,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) paths = get_head_revision_paths_info(loader) # end of lines should not have been processed @@ -1315,6 +1327,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="partial", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert get_stats(loader.storage) == { "content": 2, @@ -1402,6 +1415,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_last_revision_divergence(swh_storage, datadir, tmp_path): @@ -1424,6 +1438,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) def test_loader_delete_directory_while_file_has_same_prefix( @@ -1471,6 +1486,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_svn_loader_incremental(swh_storage, repo_url, tmp_path): @@ -1499,6 +1515,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # second commit add_commit( @@ -1519,6 +1536,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # third commit add_commit( @@ -1539,6 +1557,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_svn_loader_incremental_replay_start_with_empty_directory( @@ -1562,6 +1581,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # second commit add_commit( @@ -1653,6 +1673,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_add_property_on_link(swh_storage, repo_url, tmp_path): @@ -1699,6 +1720,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_link_parsing(swh_storage, repo_url, tmp_path): @@ -1745,6 +1767,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_empty_local_dir_before_post_load(swh_storage, datadir, tmp_path): @@ -1777,6 +1800,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) def test_loader_svn_add_property_on_directory_link(swh_storage, repo_url, tmp_path): @@ -1823,6 +1847,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) @pytest.fixture @@ -1892,6 +1917,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # third commit add_commit( @@ -1912,6 +1938,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path): @@ -1953,6 +1980,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_valid_externals_modification( @@ -2027,6 +2055,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_valid_externals_and_versioned_path( @@ -2093,6 +2122,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_invalid_externals_and_versioned_path( @@ -2134,6 +2164,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_externals_then_remove_and_add_as_local( @@ -2190,6 +2221,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_invalid_externals_then_remove(swh_storage, repo_url, tmp_path): @@ -2227,6 +2259,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_externals_with_versioned_file_overlap( @@ -2293,6 +2326,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_dump_loader_relative_externals_detection( @@ -2343,6 +2377,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert loader.svnrepo.has_relative_externals add_commit( @@ -2364,6 +2399,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert not loader.svnrepo.has_relative_externals @@ -2421,5 +2457,6 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert (external_url, None) in loader.svnrepo.swhreplay.editor.externals_cache