diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -317,7 +317,7 @@ # before the last revision to load) if self.debug and dir_id == dir.hash: for obj in checked_dir.iter_tree(): - path = obj.data["path"].replace(checked_dir.data["path"], b"")[1:] + path = obj.data["path"].replace(checked_dir.data["path"], b"") if not path: # ignore root directory continue diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py --- a/swh/loader/svn/replay.py +++ b/swh/loader/svn/replay.py @@ -33,7 +33,7 @@ ) import click -from subvertpy import SubversionException, delta, properties +from subvertpy import SubversionException, properties from subvertpy.ra import Auth, RemoteAccess, get_username_provider from swh.model import from_disk, hashutil @@ -49,99 +49,9 @@ svn_urljoin, ) -_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"} - logger = logging.getLogger(__name__) -def _normalize_line_endings(lines: bytes, eol_style: str = "native") -> bytes: - r"""Normalize line endings to unix (\\n), windows (\\r\\n) or mac (\\r). - - Args: - lines: The lines to normalize - - eol_style: The line ending format as defined for - svn:eol-style property. Acceptable values are 'native', - 'CRLF', 'LF' and 'CR' - - Returns: - Lines with endings normalized - """ - if eol_style in _eol_style: - lines = lines.replace(_eol_style["CRLF"], _eol_style["LF"]).replace( - _eol_style["CR"], _eol_style["LF"] - ) - if _eol_style[eol_style] != _eol_style["LF"]: - lines = lines.replace(_eol_style["LF"], _eol_style[eol_style]) - - return lines - - -def apply_txdelta_handler( - sbuf: bytes, target_stream: BinaryIO -) -> Callable[[Any, bytes, BinaryIO], None]: - """Return a function that can be called repeatedly with txdelta windows. - When done, closes the target_stream. - - Adapted from subvertpy.delta.apply_txdelta_handler to close the - stream when done. - - Args: - sbuf: Source buffer - target_stream: Target stream to write to. - - Returns: - Function to be called to apply txdelta windows - - """ - - def apply_window( - window: Any, sbuf: bytes = sbuf, target_stream: BinaryIO = target_stream - ): - if window is None: - target_stream.close() - return # Last call - patch = delta.apply_txdelta_window(sbuf, window) - target_stream.write(patch) - - return apply_window - - -def read_svn_link(data: bytes) -> Tuple[bytes, bytes]: - """Read the svn link's content. - - Args: - data: svn link's raw content - - Returns: - The tuple of (filetype, destination path) - - """ - split_byte = b" " - first_line = data.split(b"\n")[0] - filetype, *src = first_line.split(split_byte) - target = split_byte.join(src) - return filetype, target - - -def is_file_an_svnlink_p(fullpath: bytes) -> Tuple[bool, bytes]: - """Determine if a filepath is an svnlink or something else. - - Args: - fullpath: Full path to the potential symlink to check - - Returns: - Tuple containing a boolean value to determine if it's indeed a symlink - (as per svn) and the link target. - - """ - if os.path.islink(fullpath): - return False, b"" - with open(fullpath, "rb") as f: - filetype, src = read_svn_link(f.read()) - return filetype == b"link", src - - def _ra_codecs_error_handler(e: UnicodeError) -> Tuple[Union[str, bytes], int]: """Subvertpy may fail to decode to utf-8 the user svn properties. As they are not used by the loader, return an empty string instead @@ -161,25 +71,6 @@ SVN_PROPERTY_EOL = "svn:eol-style" -@dataclass -class FileState: - """Persists some file states (eg. end of lines style) across revisions while - replaying them.""" - - eol_style: Optional[str] = None - """EOL state check mess""" - - svn_special_path_non_link_data: Optional[bytes] = None - """keep track of non link file content with svn:special property set""" - - # default value: 0, 1: set the flag, 2: remove the exec flag - executable: int = DEFAULT_FLAG - """keep track if file is executable when setting svn:executable property""" - - link: bool = False - """keep track if file is a svn link when setting svn:special property""" - - class FileEditor: """File Editor in charge of updating file on disk and memory objects.""" @@ -199,163 +90,47 @@ directory: from_disk.Directory, rootpath: bytes, path: bytes, - state: FileState, svnrepo: SvnRepo, ): self.directory = directory self.path = path self.fullpath = os.path.join(rootpath, path) - self.state = state self.svnrepo = svnrepo - self.editor = svnrepo.swhreplay.editor + self.editor: Editor = svnrepo.swhreplay.editor def change_prop(self, key: str, value: str) -> None: if self.editor.debug: logger.debug( "Setting property %s to value %s on path %s", key, value, self.path ) - if key == properties.PROP_EXECUTABLE: - if value is None: # bit flip off - self.state.executable = NOEXEC_FLAG - else: - self.state.executable = EXEC_FLAG - elif key == properties.PROP_SPECIAL: - # Possibly a symbolic link. We cannot check further at - # that moment though, patch(s) not being applied yet - self.state.link = value is not None - elif key == SVN_PROPERTY_EOL: - # backup end of line style for file - self.state.eol_style = value - - def __make_symlink(self, src: bytes) -> None: - """Convert the svnlink to a symlink on disk. - - This function expects self.fullpath to be a svn link. - - Args: - src: Path to the link's source - - Return: - tuple: The svnlink's data tuple: - - - type (should be only 'link') - - - - """ - os.remove(self.fullpath) - os.symlink(src=src, dst=self.fullpath) - - def __make_svnlink(self) -> bytes: - """Convert the symlink to a svnlink on disk. - - Return: - The symlink's svnlink data (``b'type '``) - - """ - # we replace the symlink by a svnlink - # to be able to patch the file on future commits - src = os.readlink(self.fullpath) - os.remove(self.fullpath) - sbuf = b"link " + src - with open(self.fullpath, "wb") as f: - f.write(sbuf) - return sbuf def apply_textdelta(self, base_checksum) -> Callable[[Any, bytes, BinaryIO], None]: if self.editor.debug: logger.debug("Applying textdelta to file %s", self.path) - # if the filepath matches an external, do not apply local patch - if self.path in self.editor.external_paths: - return lambda *args: None - - if os.path.lexists(self.fullpath): - if os.path.islink(self.fullpath): - # svn does not deal with symlink so we transform into - # real svn symlink for potential patching in later - # commits - sbuf = self.__make_svnlink() - self.state.link = True - else: - with open(self.fullpath, "rb") as f: - sbuf = f.read() - else: - sbuf = b"" - - t = open(self.fullpath, "wb") - return apply_txdelta_handler(sbuf, target_stream=t) + # do not apply textdelta, file will be fully exported when closing the editor + return lambda *args: None def close(self) -> None: - """When done with the file, this is called. - - So the file exists and is updated, we can: - - - adapt accordingly its execution flag if any - - compute the objects' checksums - - replace the svnlink with a real symlink (for disk - computation purposes) + """When done with a file added or modified in the current replayed revision, + we export it to disk and update the from_disk model. """ if self.editor.debug: logger.debug("Closing file %s", self.path) - if self.state.link: - # can only check now that the link is a real one - # since patch has been applied - is_link, src = is_file_an_svnlink_p(self.fullpath) - if is_link: - self.__make_symlink(src) - elif not os.path.isdir(self.fullpath): # not a real link ... - # when a file with the svn:special property set is not a svn link, - # the svn export operation might extract a truncated version of it - # if it is a binary file, so ensure to produce the same file as the - # export operation. - with open(self.fullpath, "rb") as f: - content = f.read() - self.svnrepo.export( - os.path.join(self.svnrepo.remote_url, os.fsdecode(self.path)), - to=self.fullpath, - peg_rev=self.editor.revnum, - ignore_keywords=True, - overwrite=True, - ) - with open(self.fullpath, "rb") as f: - exported_data = f.read() - if exported_data != content: - # keep track of original file content in order to restore - # it if the svn:special property gets unset in another revision - self.state.svn_special_path_non_link_data = content - elif os.path.islink(self.fullpath): - # path was a symbolic link in previous revision but got the property - # svn:special unset in current one, revert its content to svn link format - self.__make_svnlink() - elif self.state.svn_special_path_non_link_data is not None: - # path was a non link file with the svn:special property previously set - # and got truncated on export, restore its original content - with open(self.fullpath, "wb") as f: - f.write(self.state.svn_special_path_non_link_data) - self.state.svn_special_path_non_link_data = None - - is_link = os.path.islink(self.fullpath) - if not is_link: # if a link, do nothing regarding flag - if self.state.executable == EXEC_FLAG: - os.chmod(self.fullpath, 0o755) - elif self.state.executable == NOEXEC_FLAG: - os.chmod(self.fullpath, 0o644) + if self.path not in self.editor.external_paths: + # export file to disk if its path does not match an external + self.svnrepo.export( + os.path.join(self.svnrepo.remote_url, os.fsdecode(self.path)), + to=self.fullpath, + rev=self.editor.revnum, + peg_rev=self.editor.revnum, + ignore_keywords=True, + overwrite=True, + ) # And now compute file's checksums - if self.state.eol_style and not is_link: - # ensure to normalize line endings as defined by svn:eol-style - # property to get the same file checksum as after an export - # or checkout operation with subversion - with open(self.fullpath, "rb") as f: - data = f.read() - data = _normalize_line_endings(data, self.state.eol_style) - mode = os.lstat(self.fullpath).st_mode - self.directory[self.path] = from_disk.Content.from_bytes( - mode=mode, data=data - ) - else: - self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath) + self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath) ExternalDefinition = Tuple[str, Optional[int], bool] @@ -384,7 +159,6 @@ "directory", "rootpath", "path", - "file_states", "dir_states", "svnrepo", "editor", @@ -396,7 +170,6 @@ directory: from_disk.Directory, rootpath: bytes, path: bytes, - file_states: Dict[bytes, FileState], dir_states: Dict[bytes, DirState], svnrepo: SvnRepo, ): @@ -405,7 +178,6 @@ self.path = path # build directory on init os.makedirs(rootpath, exist_ok=True) - self.file_states = file_states self.dir_states = dir_states self.svnrepo = svnrepo self.editor = svnrepo.swhreplay.editor @@ -432,14 +204,6 @@ else: os.remove(fpath) - # when deleting a directory ensure to remove any svn property for the - # file it contains as they can be added again later in another revision - # without the same property set - fullpath = os.path.join(self.rootpath, path) - for state_path in list(self.file_states): - if state_path.startswith(fullpath + b"/"): - del self.file_states[state_path] - def open_directory(self, path: str, *args) -> DirEditor: """Updating existing directory.""" if self.editor.debug: @@ -448,7 +212,6 @@ self.directory, rootpath=self.rootpath, path=os.fsencode(path), - file_states=self.file_states, dir_states=self.dir_states, svnrepo=self.svnrepo, ) @@ -489,7 +252,6 @@ self.directory, self.rootpath, path_bytes, - self.file_states, self.dir_states, svnrepo=self.svnrepo, ) @@ -501,12 +263,10 @@ path_bytes = os.fsencode(path) self.directory[path_bytes] = from_disk.Content() - fullpath = os.path.join(self.rootpath, path_bytes) return FileEditor( self.directory, rootpath=self.rootpath, path=path_bytes, - state=self.file_states[fullpath], svnrepo=self.svnrepo, ) @@ -525,7 +285,6 @@ path_bytes = os.fsencode(path) fullpath = os.path.join(self.rootpath, path_bytes) - self.file_states[fullpath] = FileState() if copyfrom_rev == -1: self.directory[path_bytes] = from_disk.Content() else: @@ -544,7 +303,6 @@ self.directory, self.rootpath, path_bytes, - state=self.file_states[fullpath], svnrepo=self.svnrepo, ) @@ -619,7 +377,6 @@ ) if path_bytes not in self.editor.external_paths: - self.file_states.pop(fullpath, None) self.remove_child(path_bytes) elif os.path.isdir(fullpath): # versioned and external paths can overlap so we need to iterate on @@ -629,7 +386,6 @@ full_repo_path = os.path.join(root, p) repo_path = full_repo_path.replace(self.rootpath + b"/", b"") if repo_path not in self.editor.external_paths: - self.file_states.pop(full_repo_path, None) self.remove_child(repo_path) def close(self): @@ -947,7 +703,6 @@ ): self.rootpath = rootpath self.directory = directory - self.file_states: Dict[bytes, FileState] = defaultdict(FileState) self.dir_states: Dict[bytes, DirState] = defaultdict(DirState) self.external_paths: Dict[bytes, int] = defaultdict(int) self.valid_externals: Dict[bytes, Tuple[str, bool]] = {} @@ -972,7 +727,6 @@ self.directory, rootpath=self.rootpath, path=b"", - file_states=self.file_states, dir_states=self.dir_states, svnrepo=self.svnrepo, ) diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -113,6 +113,9 @@ # another for replay self.conn = self.remote_access(auth) + if not self.from_dump: + self.remote_url = self.info(self.remote_url).repos_root_url + self.local_dirname = local_dirname local_name = os.path.basename(self.remote_url) self.local_url = os.path.join(self.local_dirname, local_name).encode("utf-8") @@ -495,15 +498,12 @@ else: raise - if self.from_dump: - # when exporting a subpath of a subversion repository mounted from - # a dump file generated by svnrdump, exported paths are relative to - # the repository root path while they are relative to the subpath - # otherwise, so we need to adjust the URL of the exported filesystem - root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/")) - # check that root directory of a subproject did not get removed in revision - if os.path.exists(root_dir_local_url): - local_url = root_dir_local_url + # exported paths are relative to the repository root path so we need to + # adjust the URL of the exported filesystem + root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/")) + # check that root directory of a subproject did not get removed in revision + if os.path.exists(root_dir_local_url): + local_url = root_dir_local_url return local_dirname, os.fsencode(local_url) diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -2291,6 +2291,79 @@ check_snapshot(loader.snapshot, loader.storage) +@pytest.mark.parametrize("svn_loader_cls", [SvnLoader, SvnLoaderFromRemoteDump]) +def test_loader_repo_with_copyfrom_operations_and_eol_style( + swh_storage, repo_url, tmp_path, svn_loader_cls +): + add_commit( + repo_url, + "Create trunk/code/foo file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/code/foo", + data=b"foo\n", + properties={"svn:eol-style": "CRLF"}, + ), + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="branches/code/", + ), + ], + ) + + add_commit( + repo_url, + "Modify svn:eol-style property for the trunk/code/foo file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/code/foo", + properties={"svn:eol-style": "native"}, + ), + ], + ) + + add_commit( + repo_url, + "Copy trunk/code/foo folder from revision 1", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="branches/code/foo", + copyfrom_path=repo_url + "/trunk/code/foo", + copyfrom_rev=1, + ), + ], + ) + + add_commit( + repo_url, + "Modify branches/code/foo previously copied", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="branches/code/foo", + data=b"foo\r\nbar\n", + ), + ], + ) + + loader = svn_loader_cls( + swh_storage, repo_url, temp_directory=tmp_path, check_revision=1 + ) + + assert loader.load() == {"status": "eventful"} + + assert_last_visit_matches( + loader.storage, + repo_url, + status="full", + type="svn", + ) + check_snapshot(loader.snapshot, loader.storage) + + def test_loader_check_tree_divergence(swh_storage, repo_url, tmp_path, caplog): # create sample repository add_commit(