diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -36,6 +36,7 @@ from subvertpy.ra import Auth, RemoteAccess, get_username_provider from swh.model import from_disk, hashutil +from swh.model.from_disk import DiskBackedContent from swh.model.model import Content, Directory, SkippedContent if TYPE_CHECKING: @@ -205,6 +206,8 @@ self.svnrepo = svnrepo self.editor = svnrepo.swhreplay.editor + self.editor.modified_paths.add(path) + def change_prop(self, key: str, value: str) -> None: if key == properties.PROP_EXECUTABLE: if value is None: # bit flip off @@ -392,6 +395,9 @@ self.editor = svnrepo.swhreplay.editor self.externals: Dict[str, Tuple[str, Optional[int], bool]] = {} + if path: + self.editor.modified_paths.add(path) + def remove_child(self, path: bytes) -> None: """Remove a path from the current objects. @@ -424,6 +430,8 @@ if state_path.startswith(fullpath + b"/"): del self.file_states[state_path] + self.editor.modified_paths.discard(path) + def open_directory(self, path: str, *args) -> DirEditor: """Updating existing directory. @@ -616,6 +624,9 @@ self.remove_child(dest_fullpath) # copy exported path to reconstructed filesystem fullpath = os.path.join(self.rootpath, dest_fullpath) + + self.editor.external_paths.add(dest_fullpath) + self.editor.modified_paths.add(dest_fullpath) # update from_disk model and store external paths self.editor.external_paths.add(dest_fullpath) if os.path.isfile(temp_path): @@ -628,13 +639,16 @@ self.directory[dest_fullpath] = from_disk.Directory.from_disk( path=fullpath ) + external_paths = set() for root, dirs, files in os.walk(fullpath): - self.editor.external_paths.update( + external_paths.update( [ os.path.join(root.replace(self.rootpath + b"/", b""), p) for p in chain(dirs, files) ] ) + self.editor.external_paths.update(external_paths) + self.editor.modified_paths.update(external_paths) # ensure hash update for the directory with externals set self.directory[self.path].update_hash(force=True) @@ -712,6 +726,8 @@ self.externals_cache: Dict[Tuple[str, Optional[int]], str] = {} self.svnrepo = svnrepo self.revnum = None + # to store the set of paths added or modified when replaying a revision + self.modified_paths: Set[bytes] = set() def set_target_revision(self, revnum) -> None: self.revnum = revnum @@ -723,6 +739,8 @@ pass def open_root(self, base_revnum: int) -> DirEditor: + # a new revision is being replayed so clear the modified_paths set + self.modified_paths.clear() return DirEditor( self.directory, rootpath=self.rootpath, @@ -772,7 +790,7 @@ def compute_objects( self, rev: int ) -> Tuple[List[Content], List[SkippedContent], List[Directory]]: - """Compute objects at revisions rev. + """Compute objects added or modified at revisions rev. Expects the state to be at previous revision's objects. Args: @@ -784,7 +802,23 @@ """ self.replay(rev) - return from_disk.iter_directory(self.directory) + + contents: List[Content] = [] + skipped_contents: List[SkippedContent] = [] + directories: List[Directory] = [] + + directories.append(self.editor.directory.to_model()) + for path in self.editor.modified_paths: + obj = self.directory[path].to_model() + obj_type = obj.object_type + if obj_type in (Content.object_type, DiskBackedContent.object_type): + contents.append(obj.with_data()) + elif obj_type == SkippedContent.object_type: + skipped_contents.append(obj) + elif obj_type == Directory.object_type: + directories.append(obj) + + return contents, skipped_contents, directories @click.command() diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -148,6 +148,7 @@ actual_visit = assert_last_visit_matches( swh_storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) loader2 = SvnLoaderFromRemoteDump( swh_storage, repo_url, temp_directory=loading_path @@ -180,6 +181,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) stats = get_stats(loader.storage) assert stats == { @@ -214,6 +216,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) assert loader.load() == {"status": "uneventful"} visit_status2 = assert_last_visit_matches( @@ -286,6 +289,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(loader.snapshot, loader.storage) archive_path2 = os.path.join(datadir, "pkg-gourmet-tampered-rev6-log.tgz") repo_tampered_url = prepare_repository_from_archive( @@ -304,6 +308,7 @@ type="svn", snapshot=hash_to_bytes("5aa61959e788e281fd6e187053d0f46c68e8d8bb"), ) + check_snapshot(loader.snapshot, loader.storage) stats = get_stats(loader.storage) assert stats["origin"] == 1 @@ -334,6 +339,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) archive_path = os.path.join(datadir, "pkg-gourmet-with-updates.tgz") repo_updated_url = prepare_repository_from_archive( @@ -421,6 +427,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) start_revision = loader.storage.revision_get( [hash_to_bytes("95edacc8848369d6fb1608e887d6d2474fd5224f")] @@ -898,6 +905,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): @@ -1067,6 +1075,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert get_stats(loader.storage) == { "content": 2, @@ -1127,6 +1136,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1213,6 +1223,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # check loaded objects are those expected assert get_stats(loader.storage) == { @@ -1272,6 +1283,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) paths = get_head_revision_paths_info(loader) # end of lines should not have been processed @@ -1314,6 +1326,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="partial", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert get_stats(loader.storage) == { "content": 2, @@ -1401,6 +1414,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_last_revision_divergence(swh_storage, datadir, tmp_path): @@ -1423,6 +1437,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) def test_loader_delete_directory_while_file_has_same_prefix( @@ -1470,6 +1485,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_svn_loader_incremental(swh_storage, repo_url, tmp_path): @@ -1498,6 +1514,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # second commit add_commit( @@ -1518,6 +1535,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # third commit add_commit( @@ -1538,6 +1556,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_svn_loader_incremental_replay_start_with_empty_directory( @@ -1561,6 +1580,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # second commit add_commit( @@ -1652,6 +1672,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_add_property_on_link(swh_storage, repo_url, tmp_path): @@ -1698,6 +1719,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_link_parsing(swh_storage, repo_url, tmp_path): @@ -1744,6 +1766,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_svn_empty_local_dir_before_post_load(swh_storage, datadir, tmp_path): @@ -1776,6 +1799,7 @@ type="svn", snapshot=GOURMET_SNAPSHOT.id, ) + check_snapshot(GOURMET_SNAPSHOT, loader.storage) def test_loader_svn_add_property_on_directory_link(swh_storage, repo_url, tmp_path): @@ -1822,6 +1846,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) @pytest.fixture @@ -1891,6 +1916,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) # third commit add_commit( @@ -1911,6 +1937,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path): @@ -1952,6 +1979,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_valid_externals_modification( @@ -2026,6 +2054,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_valid_externals_and_versioned_path( @@ -2092,6 +2121,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_with_invalid_externals_and_versioned_path( @@ -2133,6 +2163,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_externals_then_remove_and_add_as_local( @@ -2189,6 +2220,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_invalid_externals_then_remove(swh_storage, repo_url, tmp_path): @@ -2226,6 +2258,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_loader_set_externals_with_versioned_file_overlap( @@ -2292,6 +2325,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) def test_dump_loader_relative_externals_detection( @@ -2342,6 +2376,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert loader.svnrepo.has_relative_externals add_commit( @@ -2363,6 +2398,7 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert not loader.svnrepo.has_relative_externals @@ -2420,5 +2456,6 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + check_snapshot(loader.snapshot, loader.storage) assert (external_url, None) in loader.svnrepo.swhreplay.editor.externals_cache