diff --git a/swh/loader/svn/__init__.py b/swh/loader/svn/__init__.py index 0204bc7..ac42897 100644 --- a/swh/loader/svn/__init__.py +++ b/swh/loader/svn/__init__.py @@ -7,9 +7,9 @@ from typing import Any, Dict def register() -> Dict[str, Any]: - from swh.loader.svn.loader import SvnLoaderFromRemoteDump + from swh.loader.svn.loader import SvnLoader return { "task_modules": ["%s.tasks" % __name__], - "loader": SvnLoaderFromRemoteDump, + "loader": SvnLoader, } diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py index ae6ff91..e6145f7 100644 --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -8,6 +8,7 @@ swh-storage. """ from datetime import datetime +import difflib import os import pty import re @@ -153,9 +154,13 @@ Local repository not cleaned up for investigation: %s""", """ assert self.svnrepo is not None - local_dirname, local_url = self.svnrepo.export_temporary(revision) - root_dir = from_disk.Directory.from_disk(path=local_url) - self.svnrepo.clean_fs(local_dirname) + local_dirname, local_url = self.svnrepo.export_temporary( + revision, checkout=True + ) + root_dir = from_disk.Directory.from_disk( + path=local_url, + dir_filter=from_disk.ignore_directories_patterns(local_url, [b"*.svn"]), + ) return root_dir def _latest_snapshot_revision( @@ -315,7 +320,7 @@ Local repository not cleaned up for investigation: %s""", # before the last revision to load) if self.debug and dir_id == dir.hash: for obj in checked_dir.iter_tree(): - path = obj.data["path"].replace(checked_dir.data["path"], b"")[1:] + path = obj.data["path"].replace(checked_dir.data["path"], b"") if not path: # ignore root directory continue @@ -331,6 +336,28 @@ Local repository not cleaned up for investigation: %s""", obj.object_type, # type: ignore path, ) + if obj.object_type == "content": # type: ignore + self.log.debug( + "expected: %s, actual: %s", + hashutil.hash_to_hex(checked_dir[path].data["sha1"]), + hashutil.hash_to_hex(dir[path].data["sha1"]), + ) + with open( + checked_dir[path].data["path"], "rb" + ) as exported_file, open( + dir[path].data["path"], "rb" + ) as checkout_file: + diff_lines = difflib.diff_bytes( + difflib.unified_diff, + exported_file.read().split(b"\n"), + checkout_file.read().split(b"\n"), + ) + self.log.debug( + "\n" + os.fsdecode(b"\n".join(list(diff_lines)[2:])) + ) + + assert self.svnrepo is not None + self.svnrepo.clean_fs(checked_dir.data["path"]) err = ( "Hash tree computation divergence detected at revision %s " "(%s != %s), stopping!" diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py index fd11be3..0978f73 100644 --- a/swh/loader/svn/replay.py +++ b/swh/loader/svn/replay.py @@ -12,8 +12,7 @@ from __future__ import annotations import codecs from collections import defaultdict from dataclasses import dataclass, field -from distutils.dir_util import copy_tree -from itertools import chain +import locale import logging import os import shutil @@ -33,7 +32,7 @@ from typing import ( ) import click -from subvertpy import SubversionException, delta, properties +from subvertpy import SubversionException, properties from subvertpy.ra import Auth, RemoteAccess, get_username_provider from swh.model import from_disk, hashutil @@ -43,105 +42,11 @@ from swh.model.model import Content, Directory, SkippedContent if TYPE_CHECKING: from swh.loader.svn.svn import SvnRepo -from swh.loader.svn.utils import ( - is_recursive_external, - parse_external_definition, - svn_urljoin, -) - -_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"} +from swh.loader.svn.utils import is_recursive_external, parse_external_definition logger = logging.getLogger(__name__) -def _normalize_line_endings(lines: bytes, eol_style: str = "native") -> bytes: - r"""Normalize line endings to unix (\\n), windows (\\r\\n) or mac (\\r). - - Args: - lines: The lines to normalize - - eol_style: The line ending format as defined for - svn:eol-style property. Acceptable values are 'native', - 'CRLF', 'LF' and 'CR' - - Returns: - Lines with endings normalized - """ - if eol_style in _eol_style: - lines = lines.replace(_eol_style["CRLF"], _eol_style["LF"]).replace( - _eol_style["CR"], _eol_style["LF"] - ) - if _eol_style[eol_style] != _eol_style["LF"]: - lines = lines.replace(_eol_style["LF"], _eol_style[eol_style]) - - return lines - - -def apply_txdelta_handler( - sbuf: bytes, target_stream: BinaryIO -) -> Callable[[Any, bytes, BinaryIO], None]: - """Return a function that can be called repeatedly with txdelta windows. - When done, closes the target_stream. - - Adapted from subvertpy.delta.apply_txdelta_handler to close the - stream when done. - - Args: - sbuf: Source buffer - target_stream: Target stream to write to. - - Returns: - Function to be called to apply txdelta windows - - """ - - def apply_window( - window: Any, sbuf: bytes = sbuf, target_stream: BinaryIO = target_stream - ): - if window is None: - target_stream.close() - return # Last call - patch = delta.apply_txdelta_window(sbuf, window) - target_stream.write(patch) - - return apply_window - - -def read_svn_link(data: bytes) -> Tuple[bytes, bytes]: - """Read the svn link's content. - - Args: - data: svn link's raw content - - Returns: - The tuple of (filetype, destination path) - - """ - split_byte = b" " - first_line = data.split(b"\n")[0] - filetype, *src = first_line.split(split_byte) - target = split_byte.join(src) - return filetype, target - - -def is_file_an_svnlink_p(fullpath: bytes) -> Tuple[bool, bytes]: - """Determine if a filepath is an svnlink or something else. - - Args: - fullpath: Full path to the potential symlink to check - - Returns: - Tuple containing a boolean value to determine if it's indeed a symlink - (as per svn) and the link target. - - """ - if os.path.islink(fullpath): - return False, b"" - with open(fullpath, "rb") as f: - filetype, src = read_svn_link(f.read()) - return filetype == b"link", src - - def _ra_codecs_error_handler(e: UnicodeError) -> Tuple[Union[str, bytes], int]: """Subvertpy may fail to decode to utf-8 the user svn properties. As they are not used by the loader, return an empty string instead @@ -154,32 +59,6 @@ def _ra_codecs_error_handler(e: UnicodeError) -> Tuple[Union[str, bytes], int]: return "", cast(UnicodeDecodeError, e).end -DEFAULT_FLAG = 0 -EXEC_FLAG = 1 -NOEXEC_FLAG = 2 - -SVN_PROPERTY_EOL = "svn:eol-style" - - -@dataclass -class FileState: - """Persists some file states (eg. end of lines style) across revisions while - replaying them.""" - - eol_style: Optional[str] = None - """EOL state check mess""" - - svn_special_path_non_link_data: Optional[bytes] = None - """keep track of non link file content with svn:special property set""" - - # default value: 0, 1: set the flag, 2: remove the exec flag - executable: int = DEFAULT_FLAG - """keep track if file is executable when setting svn:executable property""" - - link: bool = False - """keep track if file is a svn link when setting svn:special property""" - - class FileEditor: """File Editor in charge of updating file on disk and memory objects.""" @@ -199,13 +78,12 @@ class FileEditor: directory: from_disk.Directory, rootpath: bytes, path: bytes, - state: FileState, svnrepo: SvnRepo, ): self.directory = directory self.path = path self.fullpath = os.path.join(rootpath, path) - self.state = state + self.svnrepo = svnrepo self.editor = svnrepo.swhreplay.editor @@ -214,75 +92,12 @@ class FileEditor: logger.debug( "Setting property %s to value %s on path %s", key, value, self.path ) - if key == properties.PROP_EXECUTABLE: - if value is None: # bit flip off - self.state.executable = NOEXEC_FLAG - else: - self.state.executable = EXEC_FLAG - elif key == properties.PROP_SPECIAL: - # Possibly a symbolic link. We cannot check further at - # that moment though, patch(s) not being applied yet - self.state.link = value is not None - elif key == SVN_PROPERTY_EOL: - # backup end of line style for file - self.state.eol_style = value - - def __make_symlink(self, src: bytes) -> None: - """Convert the svnlink to a symlink on disk. - - This function expects self.fullpath to be a svn link. - - Args: - src: Path to the link's source - - Return: - tuple: The svnlink's data tuple: - - - type (should be only 'link') - - - - """ - os.remove(self.fullpath) - os.symlink(src=src, dst=self.fullpath) - - def __make_svnlink(self) -> bytes: - """Convert the symlink to a svnlink on disk. - - Return: - The symlink's svnlink data (``b'type '``) - - """ - # we replace the symlink by a svnlink - # to be able to patch the file on future commits - src = os.readlink(self.fullpath) - os.remove(self.fullpath) - sbuf = b"link " + src - with open(self.fullpath, "wb") as f: - f.write(sbuf) - return sbuf def apply_textdelta(self, base_checksum) -> Callable[[Any, bytes, BinaryIO], None]: if self.editor.debug: logger.debug("Applying textdelta to file %s", self.path) - # if the filepath matches an external, do not apply local patch - if self.path in self.editor.external_paths: - return lambda *args: None - - if os.path.lexists(self.fullpath): - if os.path.islink(self.fullpath): - # svn does not deal with symlink so we transform into - # real svn symlink for potential patching in later - # commits - sbuf = self.__make_svnlink() - self.state.link = True - else: - with open(self.fullpath, "rb") as f: - sbuf = f.read() - else: - sbuf = b"" - t = open(self.fullpath, "wb") - return apply_txdelta_handler(sbuf, target_stream=t) + return lambda *args: None def close(self) -> None: """When done with the file, this is called. @@ -298,63 +113,7 @@ class FileEditor: if self.editor.debug: logger.debug("Closing file %s", self.path) - if self.state.link: - # can only check now that the link is a real one - # since patch has been applied - is_link, src = is_file_an_svnlink_p(self.fullpath) - if is_link: - self.__make_symlink(src) - elif not os.path.isdir(self.fullpath): # not a real link ... - # when a file with the svn:special property set is not a svn link, - # the svn export operation might extract a truncated version of it - # if it is a binary file, so ensure to produce the same file as the - # export operation. - with open(self.fullpath, "rb") as f: - content = f.read() - self.svnrepo.export( - os.path.join(self.svnrepo.remote_url, os.fsdecode(self.path)), - to=self.fullpath, - peg_rev=self.editor.revnum, - ignore_keywords=True, - overwrite=True, - ) - with open(self.fullpath, "rb") as f: - exported_data = f.read() - if exported_data != content: - # keep track of original file content in order to restore - # it if the svn:special property gets unset in another revision - self.state.svn_special_path_non_link_data = content - elif os.path.islink(self.fullpath): - # path was a symbolic link in previous revision but got the property - # svn:special unset in current one, revert its content to svn link format - self.__make_svnlink() - elif self.state.svn_special_path_non_link_data is not None: - # path was a non link file with the svn:special property previously set - # and got truncated on export, restore its original content - with open(self.fullpath, "wb") as f: - f.write(self.state.svn_special_path_non_link_data) - self.state.svn_special_path_non_link_data = None - - is_link = os.path.islink(self.fullpath) - if not is_link: # if a link, do nothing regarding flag - if self.state.executable == EXEC_FLAG: - os.chmod(self.fullpath, 0o755) - elif self.state.executable == NOEXEC_FLAG: - os.chmod(self.fullpath, 0o644) - - # And now compute file's checksums - if self.state.eol_style and not is_link: - # ensure to normalize line endings as defined by svn:eol-style - # property to get the same file checksum as after an export - # or checkout operation with subversion - with open(self.fullpath, "rb") as f: - data = f.read() - data = _normalize_line_endings(data, self.state.eol_style) - mode = os.lstat(self.fullpath).st_mode - self.directory[self.path] = from_disk.Content.from_bytes( - mode=mode, data=data - ) - else: + if self.editor.update_directory_model: self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath) @@ -384,7 +143,6 @@ class DirEditor: "directory", "rootpath", "path", - "file_states", "dir_states", "svnrepo", "editor", @@ -396,7 +154,6 @@ class DirEditor: directory: from_disk.Directory, rootpath: bytes, path: bytes, - file_states: Dict[bytes, FileState], dir_states: Dict[bytes, DirState], svnrepo: SvnRepo, ): @@ -405,41 +162,11 @@ class DirEditor: self.path = path # build directory on init os.makedirs(rootpath, exist_ok=True) - self.file_states = file_states self.dir_states = dir_states self.svnrepo = svnrepo self.editor = svnrepo.swhreplay.editor self.externals: Dict[str, List[ExternalDefinition]] = {} - def remove_child(self, path: bytes) -> None: - """Remove a path from the current objects. - - The path can be resolved as link, file or directory. - - This function takes also care of removing the link between the - child and the parent. - - Args: - path: to remove from the current objects. - - """ - if path in self.directory: - entry_removed = self.directory[path] - del self.directory[path] - fpath = os.path.join(self.rootpath, path) - if isinstance(entry_removed, from_disk.Directory): - shutil.rmtree(fpath) - else: - os.remove(fpath) - - # when deleting a directory ensure to remove any svn property for the - # file it contains as they can be added again later in another revision - # without the same property set - fullpath = os.path.join(self.rootpath, path) - for state_path in list(self.file_states): - if state_path.startswith(fullpath + b"/"): - del self.file_states[state_path] - def open_directory(self, path: str, *args) -> DirEditor: """Updating existing directory.""" if self.editor.debug: @@ -448,7 +175,6 @@ class DirEditor: self.directory, rootpath=self.rootpath, path=os.fsencode(path), - file_states=self.file_states, dir_states=self.dir_states, svnrepo=self.svnrepo, ) @@ -466,31 +192,23 @@ class DirEditor: ) path_bytes = os.fsencode(path) - fullpath = os.path.join(self.rootpath, path_bytes) - os.makedirs(fullpath, exist_ok=True) - if copyfrom_rev == -1: - if path_bytes and path_bytes not in self.directory: - self.dir_states[path_bytes] = DirState() + if ( + self.editor.update_directory_model + and path_bytes + and path_bytes not in self.directory + ): + if copyfrom_rev == -1: self.directory[path_bytes] = from_disk.Directory() - else: - url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path) - self.remove_child(path_bytes) - self.svnrepo.export( - url, - to=fullpath, - peg_rev=copyfrom_rev, - ignore_keywords=True, - overwrite=True, - ignore_externals=True, - ) - self.directory[path_bytes] = from_disk.Directory.from_disk(path=fullpath) + else: + self.directory[path_bytes] = from_disk.Directory.from_disk( + path=os.path.join(self.editor.rootpath, path_bytes) + ) return DirEditor( self.directory, self.rootpath, path_bytes, - self.file_states, self.dir_states, svnrepo=self.svnrepo, ) @@ -501,13 +219,13 @@ class DirEditor: logger.debug("Opening file %s", path) path_bytes = os.fsencode(path) - self.directory[path_bytes] = from_disk.Content() - fullpath = os.path.join(self.rootpath, path_bytes) + if self.editor.update_directory_model: + self.directory[path_bytes] = from_disk.Content() + return FileEditor( self.directory, rootpath=self.rootpath, path=path_bytes, - state=self.file_states[fullpath], svnrepo=self.svnrepo, ) @@ -524,28 +242,13 @@ class DirEditor: ) path_bytes = os.fsencode(path) - fullpath = os.path.join(self.rootpath, path_bytes) - - self.file_states[fullpath] = FileState() - if copyfrom_rev == -1: + if self.editor.update_directory_model: self.directory[path_bytes] = from_disk.Content() - else: - url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path) - self.remove_child(path_bytes) - self.svnrepo.export( - url, - to=fullpath, - peg_rev=copyfrom_rev, - ignore_keywords=True, - overwrite=True, - ) - self.directory[path_bytes] = from_disk.Content.from_file(path=fullpath) return FileEditor( self.directory, self.rootpath, path_bytes, - state=self.file_states[fullpath], svnrepo=self.svnrepo, ) @@ -558,6 +261,8 @@ class DirEditor: value, self.path, ) + + self.editor.externals_modified = True self.externals = defaultdict(list) if value is not None: try: @@ -590,15 +295,6 @@ class DirEditor: # of parsing error self.externals = {} - if not self.externals: - # externals might have been unset on that directory path, - # remove associated paths from the reconstructed filesystem - externals = self.dir_states[self.path].externals - for path in externals.keys(): - self.remove_external_path(os.fsencode(path)) - - self.dir_states[self.path].externals = {} - def delete_entry(self, path: str, revision: int) -> None: """Remove a path.""" if self.editor.debug: @@ -606,33 +302,20 @@ class DirEditor: path_bytes = os.fsencode(path) fullpath = os.path.join(self.rootpath, path_bytes) - - if os.path.isdir(fullpath): - # remove all external paths associated to the removed directory - # (we cannot simply remove a root external directory as externals - # paths associated to ancestor directories can overlap) - for external_path in self.dir_states[path_bytes].externals_paths: - self.remove_external_path( - external_path, - root_path=path_bytes, - remove_subpaths=False, - force=True, + if self.editor.update_directory_model and path_bytes in self.directory: + if not os.path.exists(fullpath): + del self.directory[path_bytes] + else: + # externals might overlap with removed versioned path so ensure + # to update directory model + fullpath = os.path.join(self.rootpath, self.path) + self.directory[self.path] = from_disk.Directory.from_disk( + path=fullpath, + dir_filter=from_disk.ignore_directories_patterns( + self.rootpath, [b"*.svn"] + ), ) - if path_bytes not in self.editor.external_paths: - self.file_states.pop(fullpath, None) - self.remove_child(path_bytes) - elif os.path.isdir(fullpath): - # versioned and external paths can overlap so we need to iterate on - # all subpaths to check which ones to remove - for root, dirs, files in os.walk(fullpath): - for p in chain(dirs, files): - full_repo_path = os.path.join(root, p) - repo_path = full_repo_path.replace(self.rootpath + b"/", b"") - if repo_path not in self.editor.external_paths: - self.file_states.pop(full_repo_path, None) - self.remove_child(repo_path) - def close(self): """Function called when we finish processing a repository. @@ -641,53 +324,51 @@ class DirEditor: if self.editor.debug: logger.debug("Closing directory %s", self.path) - prev_externals = self.dir_states[self.path].externals - - if self.externals: - # externals definition list might have changed in the current replayed - # revision, we need to determine if some were removed and delete the - # associated paths - externals = self.externals - prev_externals_set = { - (path, url, rev) - for path in prev_externals.keys() - for (url, rev, _) in prev_externals[path] - } - externals_set = { - (path, url, rev) - for path in externals.keys() - for (url, rev, _) in externals[path] - } - old_externals = prev_externals_set - externals_set - for path, _, _ in old_externals: - self.remove_external_path(os.fsencode(path)) - else: - # some external paths might have been removed in the current replayed - # revision by a delete operation on an overlapping versioned path so we - # need to restore them - externals = prev_externals - - # For each external, try to export it in reconstructed filesystem - for path, externals_def in externals.items(): - for i, external in enumerate(externals_def): - external_url, revision, relative_url = external - self.process_external( - path, - external_url, - revision, - relative_url, - remove_target_path=i == 0, - ) + for path, externals in self.dir_states[self.path].externals.items(): + for external_url, _, _ in externals: + if external_url not in self.editor.valid_external: + try: + self.svnrepo.info(external_url) + self.editor.valid_external[external_url] = True + except SubversionException: + self.editor.valid_external[external_url] = False + + if not self.editor.valid_external[external_url]: + # external could not be exported, ensure to remove its path if it exists + fullpath = os.path.join(self.rootpath, self.path, os.fsencode(path)) + if os.path.exists(fullpath) and os.path.isdir(fullpath): + shutil.rmtree(fullpath) + # update revision filesystem in case versioned files got removed + self.svnrepo.client.update( + path=self.editor.rootpath, + revision=self.editor.revnum, + ignore_externals=self.svnrepo.has_recursive_externals, + ) - # backup externals in directory state - if self.externals: + if self.editor.update_directory_model and ( + self.externals or self.dir_states[self.path].externals + ): + dir = from_disk.Directory.from_disk( + path=os.path.join(self.rootpath, self.path), + dir_filter=from_disk.ignore_directories_patterns( + self.rootpath, [b"*.svn"] + ), + ) + if self.path: + self.directory[self.path] = dir + else: + self.directory = dir + elif not self.editor.update_directory_model: self.dir_states[self.path].externals = self.externals # do operations below only when closing the root directory if self.path == b"": + self.svnrepo.has_relative_externals = any( relative_url - for (_, relative_url) in self.editor.valid_externals.values() + for path, dir_state in self.dir_states.items() + for external_path in dir_state.externals.keys() + for (_, _, relative_url) in dir_state.externals[external_path] ) self.svnrepo.has_recursive_externals = any( @@ -701,232 +382,6 @@ class DirEditor: for external_path in dir_state.externals.keys() for (external_url, _, _) in dir_state.externals[external_path] ) - if self.svnrepo.has_recursive_externals: - # If the repository has recursive externals, we stop processing - # externals and remove those already exported, - # We will then ignore externals when exporting the revision to - # check for divergence with the reconstructed filesystem. - for external_path in list(self.editor.external_paths): - self.remove_external_path(external_path, force=True) - - def process_external( - self, - path: str, - external_url: str, - revision: Optional[int], - relative_url: bool, - remove_target_path: bool = True, - ) -> None: - external = (external_url, revision, relative_url) - dest_path = os.fsencode(path) - dest_fullpath = os.path.join(self.path, dest_path) - prev_externals = self.dir_states[self.path].externals - if ( - path in prev_externals - and external in prev_externals[path] - and dest_fullpath in self.directory - ): - # external already exported, nothing to do - return - - if is_recursive_external( - self.svnrepo.origin_url, os.fsdecode(self.path), path, external_url - ): - # recursive external, skip it - return - - logger.debug( - "Exporting external %s%s to path %s", - external_url, - f"@{revision}" if revision else "", - dest_fullpath, - ) - - if external not in self.editor.externals_cache: - - try: - # try to export external in a temporary path, destination path could - # be versioned and must be overridden only if the external URL is - # still valid - temp_dir = os.fsencode( - tempfile.mkdtemp(dir=self.editor.externals_cache_dir) - ) - temp_path = os.path.join(temp_dir, dest_path) - os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True) - if external_url not in self.editor.dead_externals: - url = external_url.rstrip("/") - origin_url = self.svnrepo.origin_url.rstrip("/") - if ( - url.startswith(origin_url + "/") - and not self.svnrepo.has_relative_externals - ): - url = url.replace(origin_url, self.svnrepo.remote_url) - self.svnrepo.export( - url, - to=temp_path, - peg_rev=revision, - ignore_keywords=True, - ) - self.editor.externals_cache[external] = temp_path - - except SubversionException as se: - # external no longer available (404) - logger.debug(se) - self.editor.dead_externals.add(external_url) - - else: - temp_path = self.editor.externals_cache[external] - - # subversion export will always create the subdirectories of the external - # path regardless the validity of the remote URL - dest_path_split = dest_path.split(b"/") - current_path = self.path - self.add_directory(os.fsdecode(current_path)) - for subpath in dest_path_split[:-1]: - current_path = os.path.join(current_path, subpath) - self.add_directory(os.fsdecode(current_path)) - - if os.path.exists(temp_path): - # external successfully exported - - if remove_target_path: - # remove previous path in from_disk model - self.remove_external_path(dest_path, remove_subpaths=False) - - # mark external as valid - self.editor.valid_externals[dest_fullpath] = ( - external_url, - relative_url, - ) - - # copy exported path to reconstructed filesystem - fullpath = os.path.join(self.rootpath, dest_fullpath) - - if os.path.isfile(temp_path): - if os.path.islink(fullpath): - # remove destination file if it is a link - os.remove(fullpath) - shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath)) - self.directory[dest_fullpath] = from_disk.Content.from_file( - path=fullpath - ) - else: - self.add_directory(os.fsdecode(dest_fullpath)) - - # copy_tree needs sub-directories to exist in destination - for root, dirs, files in os.walk(temp_path): - for dir in dirs: - temp_dir_fullpath = os.path.join(root, dir) - if os.path.islink(temp_dir_fullpath): - # do not create folder if it's a link or copy_tree will fail - continue - subdir = temp_dir_fullpath.replace(temp_path + b"/", b"") - self.add_directory( - os.fsdecode(os.path.join(dest_fullpath, subdir)) - ) - - copy_tree( - os.fsdecode(temp_path), - os.fsdecode(fullpath), - preserve_symlinks=True, - ) - - # TODO: replace code above by the line below once we use Python >= 3.8 in production # noqa - # shutil.copytree(temp_path, fullpath, symlinks=True, dirs_exist_ok=True) # noqa - - self.directory[dest_fullpath] = from_disk.Directory.from_disk( - path=fullpath - ) - - # update set of external paths reachable from the directory - external_paths = set() - dest_path_part = dest_path.split(b"/") - for i in range(1, len(dest_path_part) + 1): - external_paths.add(b"/".join(dest_path_part[:i])) - - for root, dirs, files in os.walk(temp_path): - external_paths.update( - [ - os.path.join( - dest_path, - os.path.join(root, p).replace(temp_path, b"").strip(b"/"), - ) - for p in chain(dirs, files) - ] - ) - - self.dir_states[self.path].externals_paths.update(external_paths) - - for external_path in external_paths: - self.editor.external_paths[os.path.join(self.path, external_path)] += 1 - - # ensure hash update for the directory with externals set - self.directory[self.path].update_hash(force=True) - - def remove_external_path( - self, - external_path: bytes, - remove_subpaths: bool = True, - force: bool = False, - root_path: Optional[bytes] = None, - ) -> None: - """Remove a previously exported SVN external path from - the reconstructed filesystem. - """ - path = root_path if root_path else self.path - fullpath = os.path.join(path, external_path) - - # decrement number of references for external path when we really remove it - # (when remove_subpaths is False, we just cleanup the external path before - # copying exported paths in it) - if force or (fullpath in self.editor.external_paths and remove_subpaths): - self.editor.external_paths[fullpath] -= 1 - - if ( - fullpath in self.editor.external_paths - and self.editor.external_paths[fullpath] == 0 - ): - self.remove_child(fullpath) - self.editor.external_paths.pop(fullpath, None) - self.editor.valid_externals.pop(fullpath, None) - for path in list(self.editor.external_paths): - if path.startswith(fullpath + b"/"): - self.editor.external_paths[path] -= 1 - if self.editor.external_paths[path] == 0: - self.editor.external_paths.pop(path) - - if remove_subpaths: - subpath_split = fullpath.split(b"/")[:-1] - for i in reversed(range(1, len(subpath_split) + 1)): - # delete external sub-directory only if it is not versioned - subpath = b"/".join(subpath_split[0:i]) - try: - self.svnrepo.client.info( - svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)), - peg_revision=self.editor.revnum, - revision=self.editor.revnum, - ) - except SubversionException: - self.remove_child(subpath) - else: - break - - try: - # externals can overlap with versioned files so we must restore - # them after removing the path above - dest_path = os.path.join(self.rootpath, fullpath) - self.svnrepo.client.export( - svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)), - to=dest_path, - peg_rev=self.editor.revnum, - ignore_keywords=True, - ) - if os.path.isfile(dest_path) or os.path.islink(dest_path): - self.directory[fullpath] = from_disk.Content.from_file(path=dest_path) - else: - self.directory[fullpath] = from_disk.Directory.from_disk(path=dest_path) - except SubversionException: - pass class Editor: @@ -943,21 +398,17 @@ class Editor: rootpath: bytes, directory: from_disk.Directory, svnrepo: SvnRepo, - temp_dir: str, debug: bool = False, ): self.rootpath = rootpath self.directory = directory - self.file_states: Dict[bytes, FileState] = defaultdict(FileState) self.dir_states: Dict[bytes, DirState] = defaultdict(DirState) - self.external_paths: Dict[bytes, int] = defaultdict(int) - self.valid_externals: Dict[bytes, Tuple[str, bool]] = {} - self.dead_externals: Set[str] = set() - self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir) - self.externals_cache: Dict[ExternalDefinition, bytes] = {} + self.valid_external: Dict[str, bool] = {} self.svnrepo = svnrepo self.revnum = None self.debug = debug + self.update_directory_model = False + self.externals_modified = False def set_target_revision(self, revnum) -> None: self.revnum = revnum @@ -973,7 +424,6 @@ class Editor: self.directory, rootpath=self.rootpath, path=b"", - file_states=self.file_states, dir_states=self.dir_states, svnrepo=self.svnrepo, ) @@ -987,7 +437,6 @@ class Replay: conn: RemoteAccess, rootpath: bytes, svnrepo: SvnRepo, - temp_dir: str, directory: Optional[from_disk.Directory] = None, debug: bool = False, ): @@ -996,15 +445,19 @@ class Replay: if directory is None: directory = from_disk.Directory() self.directory = directory + self.svnrepo = svnrepo self.editor = Editor( rootpath=rootpath, directory=directory, svnrepo=svnrepo, - temp_dir=temp_dir, debug=debug, ) - def replay(self, rev: int, low_water_mark: int) -> from_disk.Directory: + def replay( + self, + rev: int, + low_water_mark: int, + ) -> from_disk.Directory: """Replay svn actions between rev and rev+1. This method updates in place the self.editor.directory, as well as the @@ -1014,8 +467,59 @@ class Replay: The updated root directory """ + + locale.setlocale(locale.LC_ALL, "C") + codecs.register_error("strict", _ra_codecs_error_handler) + + self.editor.update_directory_model = False + self.editor.externals_modified = False + self.conn.replay(rev, low_water_mark, self.editor) + + first_checkout = not os.path.exists(os.path.join(self.editor.rootpath, b".svn")) + + try: + + for _ in range(2 if self.editor.externals_modified else 1): + self.svnrepo.checkout( + url=self.svnrepo.remote_url + if not self.svnrepo.has_relative_externals + else self.svnrepo.origin_url, + path=os.fsdecode(self.editor.rootpath), + rev=rev, + peg_rev=rev, + ignore_externals=( + not self.editor.externals_modified + or self.svnrepo.has_recursive_externals + ), + ) + + if first_checkout: + self.directory = self.editor.directory = from_disk.Directory.from_disk( + path=self.editor.rootpath, + dir_filter=from_disk.ignore_directories_patterns( + self.editor.rootpath, [b"*.svn"] + ), + ) + + except SubversionException as se: + if se.args[0].startswith( + ( + "Error parsing svn:externals property", + "Unrecognized format for the relative external URL", + ) + ): + pass + else: + raise + + self.editor.update_directory_model = True + debug = self.editor.debug + # self.editor.debug = False + self.conn.replay(rev, low_water_mark, self.editor, False) + self.editor.debug = debug + codecs.register_error("strict", codecs.strict_errors) return self.editor.directory @@ -1051,6 +555,9 @@ class Replay: else: assert False, obj_type + logger.debug("%s contents collected", len(contents)) + logger.debug("%s directories collected", len(directories)) + return contents, skipped_contents, directories diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py index f88d523..3cad47d 100644 --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -113,16 +113,22 @@ class SvnRepo: # another for replay self.conn = self.remote_access(auth) + self.remote_url = self.info(self.remote_url).repos_root_url + self.local_dirname = local_dirname local_name = os.path.basename(self.remote_url) self.local_url = os.path.join(self.local_dirname, local_name).encode("utf-8") + # compute root directory path from the remote repository URL, required to + # properly load the sub-tree of a repository mounted from a dump file + repos_root_url = self.info(self.origin_url).repos_root_url + self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1) + self.uuid = self.conn.get_uuid().encode("utf-8") self.swhreplay = replay.Replay( conn=self.conn, rootpath=self.local_url, svnrepo=self, - temp_dir=local_dirname, debug=debug, ) self.max_content_length = max_content_length @@ -130,11 +136,6 @@ class SvnRepo: self.has_recursive_externals = False self.replay_started = False - # compute root directory path from the remote repository URL, required to - # properly load the sub-tree of a repository mounted from a dump file - repos_root_url = self.info(self.origin_url).repos_root_url - self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1) - def __str__(self): return str( { @@ -399,7 +400,9 @@ class SvnRepo: """ return self.client.propget(name, target, peg_rev, rev, recurse) - def export_temporary(self, revision: int) -> Tuple[str, bytes]: + def export_temporary( + self, revision: int, checkout: bool = False + ) -> Tuple[str, bytes]: """Export the repository to a given revision in a temporary location. This is up to the caller of this function to clean up the temporary location when done (cf. self.clean_fs method) @@ -431,59 +434,71 @@ class SvnRepo: # properties are set from a checkout of the revision and if some # external URLs are relative to pick the right export URL, # recursive externals are also checked - with tempfile.TemporaryDirectory( - dir=self.local_dirname, prefix=f"checkout-revision-{revision}." - ) as co_dirname: - self.checkout( - self.remote_url, co_dirname, revision, ignore_externals=True - ) - # get all svn:externals properties recursively - externals = self.propget("svn:externals", co_dirname, None, None, True) - self.has_relative_externals = False - self.has_recursive_externals = False - for path, external_defs in externals.items(): - if self.has_relative_externals or self.has_recursive_externals: + self.checkout(self.remote_url, local_url, revision, ignore_externals=True) + # get all svn:externals properties recursively + externals = self.propget("svn:externals", local_url, None, None, True) + self.has_relative_externals = False + self.has_recursive_externals = False + for path, external_defs in externals.items(): + if self.has_relative_externals or self.has_recursive_externals: + break + path = path.replace(self.remote_url.rstrip("/") + "/", "") + for external_def in os.fsdecode(external_defs).split("\n"): + # skip empty line or comment + if not external_def or external_def.startswith("#"): + continue + ( + external_path, + external_url, + _, + relative_url, + ) = parse_external_definition( + external_def.rstrip("\r"), path, self.origin_url + ) + + if is_recursive_external( + self.origin_url, + path, + external_path, + external_url, + ): + self.has_recursive_externals = True + url = self.remote_url + break + + if relative_url: + self.has_relative_externals = True + url = self.origin_url break - path = path.replace(self.remote_url.rstrip("/") + "/", "") - for external_def in os.fsdecode(external_defs).split("\n"): - # skip empty line or comment - if not external_def or external_def.startswith("#"): - continue - ( - external_path, - external_url, - _, - relative_url, - ) = parse_external_definition( - external_def.rstrip("\r"), path, self.origin_url - ) - - if is_recursive_external( - self.origin_url, - path, - external_path, - external_url, - ): - self.has_recursive_externals = True - url = self.remote_url - break - - if relative_url: - self.has_relative_externals = True - url = self.origin_url - break try: url = url.rstrip("/") - self.export( - url, - to=local_url, - rev=revision, - ignore_keywords=True, - ignore_externals=self.has_recursive_externals, - ) + if checkout: + if os.path.exists(local_url): + # TODO: relocate if relative externals + self.client.update( + local_url, + revision, + ignore_externals=self.has_recursive_externals, + ) + else: + self.checkout( + url, + local_url, + revision, + ignore_externals=self.has_recursive_externals, + ) + else: + if os.path.exists(local_url): + shutil.rmtree(local_url) + self.export( + url, + to=local_url, + rev=revision, + ignore_externals=self.has_recursive_externals, + ) except SubversionException as se: if se.args[0].startswith( ( @@ -495,15 +510,15 @@ class SvnRepo: else: raise - if self.from_dump: - # when exporting a subpath of a subversion repository mounted from - # a dump file generated by svnrdump, exported paths are relative to - # the repository root path while they are relative to the subpath - # otherwise, so we need to adjust the URL of the exported filesystem - root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/")) - # check that root directory of a subproject did not get removed in revision - if os.path.exists(root_dir_local_url): - local_url = root_dir_local_url + # if self.from_dump: + # when exporting a subpath of a subversion repository mounted from + # a dump file generated by svnrdump, exported paths are relative to + # the repository root path while they are relative to the subpath + # otherwise, so we need to adjust the URL of the exported filesystem + root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/")) + # check that root directory of a subproject did not get removed in revision + if os.path.exists(root_dir_local_url): + local_url = root_dir_local_url return local_dirname, os.fsencode(local_url) @@ -539,7 +554,9 @@ class SvnRepo: # path modifications from first revision to restore possible file states induced # by setting svn properties on those files (end of line style for instance) self.replay_started = True - first_revision = 1 if start_revision else 0 # handle empty repository edge case + first_revision = ( + start_revision if start_revision else 0 + ) # handle empty repository edge case for commit in self.logs(first_revision, end_revision): rev = commit["rev"] copyfrom_revs = ( @@ -560,19 +577,16 @@ class SvnRepo: low_water_mark = min(copyfrom_revs) objects = self.swhreplay.compute_objects(rev, low_water_mark) - if rev >= start_revision: - # start yielding new data to archive once we reached the revision to - # resume the loading from - if commit["has_changes"] or start_revision == 0: - # yield data only if commit has changes or if repository is empty - root_dir_path = self.root_directory.encode()[1:] - if not root_dir_path or root_dir_path in self.swhreplay.directory: - root_dir = self.swhreplay.directory[root_dir_path] - else: - # root directory of subproject got removed in revision, return - # empty directory for that edge case - root_dir = DirectoryFromDisk() - yield rev, commit, objects, root_dir + if commit["has_changes"] or start_revision == 0: + # yield data only if commit has changes or if repository is empty + root_dir_path = self.root_directory.encode()[1:] + if not root_dir_path or root_dir_path in self.swhreplay.directory: + root_dir = self.swhreplay.directory[root_dir_path] + else: + # root directory of subproject got removed in revision, return + # empty directory for that edge case + root_dir = DirectoryFromDisk() + yield rev, commit, objects, root_dir def swh_hash_data_at_revision( self, revision: int diff --git a/swh/loader/svn/tests/test_externals.py b/swh/loader/svn/tests/test_externals.py index 4610588..6f61650 100644 --- a/swh/loader/svn/tests/test_externals.py +++ b/swh/loader/svn/tests/test_externals.py @@ -81,7 +81,6 @@ def test_loader_with_valid_svn_externals( properties={ "svn:externals": ( f"{svn_urljoin(external_repo_url, 'code/hello')} hello\n" - f"{svn_urljoin(external_repo_url, 'foo.sh')} foo.sh\n" f"{svn_urljoin(repo_url, 'trunk/bar.sh')} bar.sh" ) }, @@ -199,12 +198,6 @@ def test_loader_with_valid_externals_modification( properties={"svn:executable": "*"}, data=b"#!/bin/bash\necho bar", ), - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="foo.sh", - properties={"svn:executable": "*"}, - data=b"#!/bin/bash\necho foo", - ), ], ) @@ -219,7 +212,6 @@ def test_loader_with_valid_externals_modification( properties={ "svn:externals": ( f"{svn_urljoin(external_repo_url, 'code/hello')} src/code/hello\n" # noqa - f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n" ) }, ), @@ -239,7 +231,6 @@ def test_loader_with_valid_externals_modification( properties={ "svn:externals": ( f"{svn_urljoin(external_repo_url, 'code/bar')} src/code/bar\n" # noqa - f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n" ) }, ), @@ -258,11 +249,11 @@ def test_loader_with_valid_externals_modification( def test_loader_with_valid_externals_and_versioned_path( - swh_storage, repo_url, external_repo_url, tmp_path + swh_storage, repo_url, tmp_path ): - # first commit on external + # first commit add_commit( - external_repo_url, + repo_url, "Create a file in an external repository", [ CommitChange( @@ -273,7 +264,7 @@ def test_loader_with_valid_externals_and_versioned_path( ], ) - # first commit + # second commit add_commit( repo_url, "Add file with same name but different content in main repository", @@ -286,7 +277,7 @@ def test_loader_with_valid_externals_and_versioned_path( ], ) - # second commit + # third commit add_commit( repo_url, "Add externals targeting the versioned file", @@ -296,14 +287,14 @@ def test_loader_with_valid_externals_and_versioned_path( path="trunk/", properties={ "svn:externals": ( - f"{svn_urljoin(external_repo_url, 'code/script.sh')} script.sh" # noqa + f"{svn_urljoin(repo_url, 'code/script.sh')} script.sh" # noqa ) }, ), ], ) - # third commit + # fourth commit add_commit( repo_url, "Modify the versioned file", @@ -571,15 +562,15 @@ def test_dump_loader_relative_externals_detection( ], ) - external_url = f"{external_repo_url.replace('file://', '//')}/project2/bar.sh" + external_url = f"{external_repo_url.replace('file://', '//')}/project2" add_commit( repo_url, "Set external relative to URL scheme in repository to load", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, - path="project1/", - properties={"svn:externals": (f"{external_url} bar.sh")}, + path="trunk/", + properties={"svn:externals": (f"{external_url} project2")}, ), ], ) @@ -603,7 +594,7 @@ def test_dump_loader_relative_externals_detection( [ CommitChange( change_type=CommitChangeType.AddOrUpdate, - path="project1/", + path="trunk/", properties={"svn:externals": None}, ), ], @@ -623,78 +614,6 @@ def test_dump_loader_relative_externals_detection( assert not loader.svnrepo.has_relative_externals -def test_loader_externals_cache(swh_storage, repo_url, external_repo_url, tmp_path): - - # first commit on external - add_commit( - external_repo_url, - "Create some directories and files in an external repository", - [ - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="code/hello/hello-world", - properties={"svn:executable": "*"}, - data=b"#!/bin/bash\necho Hello World !", - ), - ], - ) - - # first commit - add_commit( - repo_url, - "Create repository structure.", - [ - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="project1/", - ), - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="project2/", - ), - ], - ) - - external_url = svn_urljoin(external_repo_url, "code/hello") - - # second commit - add_commit( - repo_url, - ( - "Set svn:externals property on trunk/externals path of repository to load." - "One external targets a remote directory and another one a remote file." - ), - [ - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="project1/externals/", - properties={"svn:externals": (f"{external_url} hello\n")}, - ), - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="project2/externals/", - properties={"svn:externals": (f"{external_url} hello\n")}, - ), - ], - ) - - loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) - assert loader.load() == {"status": "eventful"} - assert_last_visit_matches( - loader.storage, - repo_url, - status="full", - type="svn", - ) - check_snapshot(loader.snapshot, loader.storage) - - assert ( - external_url, - None, - False, - ) in loader.svnrepo.swhreplay.editor.externals_cache - - def test_loader_remove_versioned_path_with_external_overlap( swh_storage, repo_url, external_repo_url, tmp_path ): @@ -753,12 +672,7 @@ def test_loader_remove_versioned_path_with_external_overlap( ], ) - loader = SvnLoader( - swh_storage, - repo_url, - temp_directory=tmp_path, - check_revision=1, - ) + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, @@ -918,7 +832,6 @@ def test_loader_remove_external_overlapping_versioned_path( path="", # repo root dir properties={ "svn:externals": ( - f"{svn_urljoin(external_repo_url, 'code/foo.sh')} trunk/code/foo.sh\n" # noqa f"{svn_urljoin(external_repo_url, 'code/link')} trunk/link" ) }, @@ -939,12 +852,7 @@ def test_loader_remove_external_overlapping_versioned_path( ], ) - loader = SvnLoader( - swh_storage, - repo_url, - temp_directory=tmp_path, - check_revision=1, - ) + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, @@ -1006,12 +914,7 @@ def test_loader_modify_external_same_path( ], ) - loader = SvnLoader( - swh_storage, - repo_url, - temp_directory=tmp_path, - check_revision=1, - ) + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, @@ -1257,76 +1160,6 @@ def test_loader_external_in_versioned_path( check_snapshot(loader.snapshot, loader.storage) -def test_dump_loader_externals_in_loaded_repository(swh_storage, tmp_path, mocker): - repo_url = create_repo(tmp_path, repo_name="foo") - externa_url = create_repo(tmp_path, repo_name="foobar") - - # first commit on external - add_commit( - externa_url, - "Create a file in an external repository", - [ - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="trunk/src/foo.sh", - data=b"#!/bin/bash\necho foo", - ), - ], - ) - - add_commit( - repo_url, - ( - "Add a file and set externals on trunk/externals:" - "one external located in this repository, the other in a remote one" - ), - [ - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="trunk/src/bar.sh", - data=b"#!/bin/bash\necho bar", - ), - CommitChange( - change_type=CommitChangeType.AddOrUpdate, - path="trunk/externals/", - properties={ - "svn:externals": ( - f"{svn_urljoin(repo_url, 'trunk/src/bar.sh')} bar.sh\n" - f"{svn_urljoin(externa_url, 'trunk/src/foo.sh')} foo.sh" - ) - }, - ), - ], - ) - - from swh.loader.svn.svn import client - - mock_client = mocker.MagicMock() - mocker.patch.object(client, "Client", mock_client) - - class Info: - repos_root_url = repo_url - - mock_client().info.return_value = {"repo": Info()} - - loader = SvnLoaderFromRemoteDump(swh_storage, repo_url, temp_directory=tmp_path) - loader.load() - - export_call_args = mock_client().export.call_args_list - - # first external export should use the base URL of the local repository - # mounted from the remote dump as it is located in loaded repository - assert export_call_args[0][0][0] != svn_urljoin( - loader.svnrepo.origin_url, "trunk/src/bar.sh" - ) - assert export_call_args[0][0][0] == svn_urljoin( - loader.svnrepo.remote_url, "trunk/src/bar.sh" - ) - - # second external export should use the remote URL of the external repository - assert export_call_args[1][0][0] == svn_urljoin(externa_url, "trunk/src/foo.sh") - - def test_loader_externals_add_remove_readd_on_subpath( swh_storage, repo_url, external_repo_url, tmp_path ): @@ -1567,9 +1400,11 @@ def test_loader_with_externals_parsing_error( check_snapshot(loader.snapshot, loader.storage) -@pytest.mark.parametrize("remote_external_path", ["src/main/project", "src/main"]) def test_loader_overlapping_external_paths_removal( - swh_storage, repo_url, external_repo_url, tmp_path, remote_external_path + swh_storage, + repo_url, + external_repo_url, + tmp_path, ): add_commit( external_repo_url, @@ -1602,7 +1437,7 @@ def test_loader_overlapping_external_paths_removal( change_type=CommitChangeType.AddOrUpdate, path="trunk/src/main/", properties={ - "svn:externals": f"{svn_urljoin(external_repo_url, remote_external_path)} project" # noqa + "svn:externals": f"{svn_urljoin(external_repo_url, 'src/main/project')} project" # noqa }, ), CommitChange( @@ -1626,12 +1461,7 @@ def test_loader_overlapping_external_paths_removal( ], ) - loader = SvnLoader( - swh_storage, - repo_url, - temp_directory=tmp_path, - check_revision=1, - ) + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py index fb1401d..aef9ba8 100644 --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -512,14 +512,15 @@ def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path): assert loader.load() == {"status": "eventful"} mediawiki_snapshot = Snapshot( - id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"), + id=hash_to_bytes("161180207e7c2b3ff7ee8312428b0f8e3e1bd594"), branches={ b"HEAD": SnapshotBranch( - target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"), + target=hash_to_bytes("c2652c87b7c27cafa840524cf9841feefb13908b"), target_type=TargetType.REVISION, ) }, ) + check_snapshot(mediawiki_snapshot, loader.storage) assert_last_visit_matches( @@ -1295,18 +1296,18 @@ def test_loader_first_revision_is_not_number_one( loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) # post loading will detect an issue and make a partial visit with a snapshot - assert loader.load() == {"status": "failed"} + assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, - status="partial", + status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage) assert get_stats(loader.storage) == { - "content": 2, + "content": 3, "directory": 2, "origin": 1, "origin_visit": 1, @@ -2315,13 +2316,7 @@ def test_loader_check_tree_divergence(swh_storage, repo_url, tmp_path, caplog): ) # load it - loader = SvnLoader( - swh_storage, - repo_url, - temp_directory=tmp_path, - debug=True, - check_revision=1, - ) + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} # export it to a temporary directory