Page MenuHomeSoftware Heritage
Paste P1535

subversion loader based on incremental checkout operations
ActivePublic

Authored by anlambert on Dec 5 2022, 10:47 AM.
diff --git a/swh/loader/svn/__init__.py b/swh/loader/svn/__init__.py
index 0204bc7..ac42897 100644
--- a/swh/loader/svn/__init__.py
+++ b/swh/loader/svn/__init__.py
@@ -7,9 +7,9 @@ from typing import Any, Dict
def register() -> Dict[str, Any]:
- from swh.loader.svn.loader import SvnLoaderFromRemoteDump
+ from swh.loader.svn.loader import SvnLoader
return {
"task_modules": ["%s.tasks" % __name__],
- "loader": SvnLoaderFromRemoteDump,
+ "loader": SvnLoader,
}
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
index ae6ff91..e6145f7 100644
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -8,6 +8,7 @@ swh-storage.
"""
from datetime import datetime
+import difflib
import os
import pty
import re
@@ -153,9 +154,13 @@ Local repository not cleaned up for investigation: %s""",
"""
assert self.svnrepo is not None
- local_dirname, local_url = self.svnrepo.export_temporary(revision)
- root_dir = from_disk.Directory.from_disk(path=local_url)
- self.svnrepo.clean_fs(local_dirname)
+ local_dirname, local_url = self.svnrepo.export_temporary(
+ revision, checkout=True
+ )
+ root_dir = from_disk.Directory.from_disk(
+ path=local_url,
+ dir_filter=from_disk.ignore_directories_patterns(local_url, [b"*.svn"]),
+ )
return root_dir
def _latest_snapshot_revision(
@@ -315,7 +320,7 @@ Local repository not cleaned up for investigation: %s""",
# before the last revision to load)
if self.debug and dir_id == dir.hash:
for obj in checked_dir.iter_tree():
- path = obj.data["path"].replace(checked_dir.data["path"], b"")[1:]
+ path = obj.data["path"].replace(checked_dir.data["path"], b"")
if not path:
# ignore root directory
continue
@@ -331,6 +336,28 @@ Local repository not cleaned up for investigation: %s""",
obj.object_type, # type: ignore
path,
)
+ if obj.object_type == "content": # type: ignore
+ self.log.debug(
+ "expected: %s, actual: %s",
+ hashutil.hash_to_hex(checked_dir[path].data["sha1"]),
+ hashutil.hash_to_hex(dir[path].data["sha1"]),
+ )
+ with open(
+ checked_dir[path].data["path"], "rb"
+ ) as exported_file, open(
+ dir[path].data["path"], "rb"
+ ) as checkout_file:
+ diff_lines = difflib.diff_bytes(
+ difflib.unified_diff,
+ exported_file.read().split(b"\n"),
+ checkout_file.read().split(b"\n"),
+ )
+ self.log.debug(
+ "\n" + os.fsdecode(b"\n".join(list(diff_lines)[2:]))
+ )
+
+ assert self.svnrepo is not None
+ self.svnrepo.clean_fs(checked_dir.data["path"])
err = (
"Hash tree computation divergence detected at revision %s "
"(%s != %s), stopping!"
diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py
index fd11be3..0978f73 100644
--- a/swh/loader/svn/replay.py
+++ b/swh/loader/svn/replay.py
@@ -12,8 +12,7 @@ from __future__ import annotations
import codecs
from collections import defaultdict
from dataclasses import dataclass, field
-from distutils.dir_util import copy_tree
-from itertools import chain
+import locale
import logging
import os
import shutil
@@ -33,7 +32,7 @@ from typing import (
)
import click
-from subvertpy import SubversionException, delta, properties
+from subvertpy import SubversionException, properties
from subvertpy.ra import Auth, RemoteAccess, get_username_provider
from swh.model import from_disk, hashutil
@@ -43,105 +42,11 @@ from swh.model.model import Content, Directory, SkippedContent
if TYPE_CHECKING:
from swh.loader.svn.svn import SvnRepo
-from swh.loader.svn.utils import (
- is_recursive_external,
- parse_external_definition,
- svn_urljoin,
-)
-
-_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"}
+from swh.loader.svn.utils import is_recursive_external, parse_external_definition
logger = logging.getLogger(__name__)
-def _normalize_line_endings(lines: bytes, eol_style: str = "native") -> bytes:
- r"""Normalize line endings to unix (\\n), windows (\\r\\n) or mac (\\r).
-
- Args:
- lines: The lines to normalize
-
- eol_style: The line ending format as defined for
- svn:eol-style property. Acceptable values are 'native',
- 'CRLF', 'LF' and 'CR'
-
- Returns:
- Lines with endings normalized
- """
- if eol_style in _eol_style:
- lines = lines.replace(_eol_style["CRLF"], _eol_style["LF"]).replace(
- _eol_style["CR"], _eol_style["LF"]
- )
- if _eol_style[eol_style] != _eol_style["LF"]:
- lines = lines.replace(_eol_style["LF"], _eol_style[eol_style])
-
- return lines
-
-
-def apply_txdelta_handler(
- sbuf: bytes, target_stream: BinaryIO
-) -> Callable[[Any, bytes, BinaryIO], None]:
- """Return a function that can be called repeatedly with txdelta windows.
- When done, closes the target_stream.
-
- Adapted from subvertpy.delta.apply_txdelta_handler to close the
- stream when done.
-
- Args:
- sbuf: Source buffer
- target_stream: Target stream to write to.
-
- Returns:
- Function to be called to apply txdelta windows
-
- """
-
- def apply_window(
- window: Any, sbuf: bytes = sbuf, target_stream: BinaryIO = target_stream
- ):
- if window is None:
- target_stream.close()
- return # Last call
- patch = delta.apply_txdelta_window(sbuf, window)
- target_stream.write(patch)
-
- return apply_window
-
-
-def read_svn_link(data: bytes) -> Tuple[bytes, bytes]:
- """Read the svn link's content.
-
- Args:
- data: svn link's raw content
-
- Returns:
- The tuple of (filetype, destination path)
-
- """
- split_byte = b" "
- first_line = data.split(b"\n")[0]
- filetype, *src = first_line.split(split_byte)
- target = split_byte.join(src)
- return filetype, target
-
-
-def is_file_an_svnlink_p(fullpath: bytes) -> Tuple[bool, bytes]:
- """Determine if a filepath is an svnlink or something else.
-
- Args:
- fullpath: Full path to the potential symlink to check
-
- Returns:
- Tuple containing a boolean value to determine if it's indeed a symlink
- (as per svn) and the link target.
-
- """
- if os.path.islink(fullpath):
- return False, b""
- with open(fullpath, "rb") as f:
- filetype, src = read_svn_link(f.read())
- return filetype == b"link", src
-
-
def _ra_codecs_error_handler(e: UnicodeError) -> Tuple[Union[str, bytes], int]:
"""Subvertpy may fail to decode to utf-8 the user svn properties. As
they are not used by the loader, return an empty string instead
@@ -154,32 +59,6 @@ def _ra_codecs_error_handler(e: UnicodeError) -> Tuple[Union[str, bytes], int]:
return "", cast(UnicodeDecodeError, e).end
-DEFAULT_FLAG = 0
-EXEC_FLAG = 1
-NOEXEC_FLAG = 2
-
-SVN_PROPERTY_EOL = "svn:eol-style"
-
-
-@dataclass
-class FileState:
- """Persists some file states (eg. end of lines style) across revisions while
- replaying them."""
-
- eol_style: Optional[str] = None
- """EOL state check mess"""
-
- svn_special_path_non_link_data: Optional[bytes] = None
- """keep track of non link file content with svn:special property set"""
-
- # default value: 0, 1: set the flag, 2: remove the exec flag
- executable: int = DEFAULT_FLAG
- """keep track if file is executable when setting svn:executable property"""
-
- link: bool = False
- """keep track if file is a svn link when setting svn:special property"""
-
-
class FileEditor:
"""File Editor in charge of updating file on disk and memory objects."""
@@ -199,13 +78,12 @@ class FileEditor:
directory: from_disk.Directory,
rootpath: bytes,
path: bytes,
- state: FileState,
svnrepo: SvnRepo,
):
self.directory = directory
self.path = path
self.fullpath = os.path.join(rootpath, path)
- self.state = state
+
self.svnrepo = svnrepo
self.editor = svnrepo.swhreplay.editor
@@ -214,75 +92,12 @@ class FileEditor:
logger.debug(
"Setting property %s to value %s on path %s", key, value, self.path
)
- if key == properties.PROP_EXECUTABLE:
- if value is None: # bit flip off
- self.state.executable = NOEXEC_FLAG
- else:
- self.state.executable = EXEC_FLAG
- elif key == properties.PROP_SPECIAL:
- # Possibly a symbolic link. We cannot check further at
- # that moment though, patch(s) not being applied yet
- self.state.link = value is not None
- elif key == SVN_PROPERTY_EOL:
- # backup end of line style for file
- self.state.eol_style = value
-
- def __make_symlink(self, src: bytes) -> None:
- """Convert the svnlink to a symlink on disk.
-
- This function expects self.fullpath to be a svn link.
-
- Args:
- src: Path to the link's source
-
- Return:
- tuple: The svnlink's data tuple:
-
- - type (should be only 'link')
- - <path-to-src>
-
- """
- os.remove(self.fullpath)
- os.symlink(src=src, dst=self.fullpath)
-
- def __make_svnlink(self) -> bytes:
- """Convert the symlink to a svnlink on disk.
-
- Return:
- The symlink's svnlink data (``b'type <path-to-src>'``)
-
- """
- # we replace the symlink by a svnlink
- # to be able to patch the file on future commits
- src = os.readlink(self.fullpath)
- os.remove(self.fullpath)
- sbuf = b"link " + src
- with open(self.fullpath, "wb") as f:
- f.write(sbuf)
- return sbuf
def apply_textdelta(self, base_checksum) -> Callable[[Any, bytes, BinaryIO], None]:
if self.editor.debug:
logger.debug("Applying textdelta to file %s", self.path)
- # if the filepath matches an external, do not apply local patch
- if self.path in self.editor.external_paths:
- return lambda *args: None
-
- if os.path.lexists(self.fullpath):
- if os.path.islink(self.fullpath):
- # svn does not deal with symlink so we transform into
- # real svn symlink for potential patching in later
- # commits
- sbuf = self.__make_svnlink()
- self.state.link = True
- else:
- with open(self.fullpath, "rb") as f:
- sbuf = f.read()
- else:
- sbuf = b""
- t = open(self.fullpath, "wb")
- return apply_txdelta_handler(sbuf, target_stream=t)
+ return lambda *args: None
def close(self) -> None:
"""When done with the file, this is called.
@@ -298,63 +113,7 @@ class FileEditor:
if self.editor.debug:
logger.debug("Closing file %s", self.path)
- if self.state.link:
- # can only check now that the link is a real one
- # since patch has been applied
- is_link, src = is_file_an_svnlink_p(self.fullpath)
- if is_link:
- self.__make_symlink(src)
- elif not os.path.isdir(self.fullpath): # not a real link ...
- # when a file with the svn:special property set is not a svn link,
- # the svn export operation might extract a truncated version of it
- # if it is a binary file, so ensure to produce the same file as the
- # export operation.
- with open(self.fullpath, "rb") as f:
- content = f.read()
- self.svnrepo.export(
- os.path.join(self.svnrepo.remote_url, os.fsdecode(self.path)),
- to=self.fullpath,
- peg_rev=self.editor.revnum,
- ignore_keywords=True,
- overwrite=True,
- )
- with open(self.fullpath, "rb") as f:
- exported_data = f.read()
- if exported_data != content:
- # keep track of original file content in order to restore
- # it if the svn:special property gets unset in another revision
- self.state.svn_special_path_non_link_data = content
- elif os.path.islink(self.fullpath):
- # path was a symbolic link in previous revision but got the property
- # svn:special unset in current one, revert its content to svn link format
- self.__make_svnlink()
- elif self.state.svn_special_path_non_link_data is not None:
- # path was a non link file with the svn:special property previously set
- # and got truncated on export, restore its original content
- with open(self.fullpath, "wb") as f:
- f.write(self.state.svn_special_path_non_link_data)
- self.state.svn_special_path_non_link_data = None
-
- is_link = os.path.islink(self.fullpath)
- if not is_link: # if a link, do nothing regarding flag
- if self.state.executable == EXEC_FLAG:
- os.chmod(self.fullpath, 0o755)
- elif self.state.executable == NOEXEC_FLAG:
- os.chmod(self.fullpath, 0o644)
-
- # And now compute file's checksums
- if self.state.eol_style and not is_link:
- # ensure to normalize line endings as defined by svn:eol-style
- # property to get the same file checksum as after an export
- # or checkout operation with subversion
- with open(self.fullpath, "rb") as f:
- data = f.read()
- data = _normalize_line_endings(data, self.state.eol_style)
- mode = os.lstat(self.fullpath).st_mode
- self.directory[self.path] = from_disk.Content.from_bytes(
- mode=mode, data=data
- )
- else:
+ if self.editor.update_directory_model:
self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath)
@@ -384,7 +143,6 @@ class DirEditor:
"directory",
"rootpath",
"path",
- "file_states",
"dir_states",
"svnrepo",
"editor",
@@ -396,7 +154,6 @@ class DirEditor:
directory: from_disk.Directory,
rootpath: bytes,
path: bytes,
- file_states: Dict[bytes, FileState],
dir_states: Dict[bytes, DirState],
svnrepo: SvnRepo,
):
@@ -405,41 +162,11 @@ class DirEditor:
self.path = path
# build directory on init
os.makedirs(rootpath, exist_ok=True)
- self.file_states = file_states
self.dir_states = dir_states
self.svnrepo = svnrepo
self.editor = svnrepo.swhreplay.editor
self.externals: Dict[str, List[ExternalDefinition]] = {}
- def remove_child(self, path: bytes) -> None:
- """Remove a path from the current objects.
-
- The path can be resolved as link, file or directory.
-
- This function takes also care of removing the link between the
- child and the parent.
-
- Args:
- path: to remove from the current objects.
-
- """
- if path in self.directory:
- entry_removed = self.directory[path]
- del self.directory[path]
- fpath = os.path.join(self.rootpath, path)
- if isinstance(entry_removed, from_disk.Directory):
- shutil.rmtree(fpath)
- else:
- os.remove(fpath)
-
- # when deleting a directory ensure to remove any svn property for the
- # file it contains as they can be added again later in another revision
- # without the same property set
- fullpath = os.path.join(self.rootpath, path)
- for state_path in list(self.file_states):
- if state_path.startswith(fullpath + b"/"):
- del self.file_states[state_path]
-
def open_directory(self, path: str, *args) -> DirEditor:
"""Updating existing directory."""
if self.editor.debug:
@@ -448,7 +175,6 @@ class DirEditor:
self.directory,
rootpath=self.rootpath,
path=os.fsencode(path),
- file_states=self.file_states,
dir_states=self.dir_states,
svnrepo=self.svnrepo,
)
@@ -466,31 +192,23 @@ class DirEditor:
)
path_bytes = os.fsencode(path)
- fullpath = os.path.join(self.rootpath, path_bytes)
- os.makedirs(fullpath, exist_ok=True)
- if copyfrom_rev == -1:
- if path_bytes and path_bytes not in self.directory:
- self.dir_states[path_bytes] = DirState()
+ if (
+ self.editor.update_directory_model
+ and path_bytes
+ and path_bytes not in self.directory
+ ):
+ if copyfrom_rev == -1:
self.directory[path_bytes] = from_disk.Directory()
- else:
- url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path)
- self.remove_child(path_bytes)
- self.svnrepo.export(
- url,
- to=fullpath,
- peg_rev=copyfrom_rev,
- ignore_keywords=True,
- overwrite=True,
- ignore_externals=True,
- )
- self.directory[path_bytes] = from_disk.Directory.from_disk(path=fullpath)
+ else:
+ self.directory[path_bytes] = from_disk.Directory.from_disk(
+ path=os.path.join(self.editor.rootpath, path_bytes)
+ )
return DirEditor(
self.directory,
self.rootpath,
path_bytes,
- self.file_states,
self.dir_states,
svnrepo=self.svnrepo,
)
@@ -501,13 +219,13 @@ class DirEditor:
logger.debug("Opening file %s", path)
path_bytes = os.fsencode(path)
- self.directory[path_bytes] = from_disk.Content()
- fullpath = os.path.join(self.rootpath, path_bytes)
+ if self.editor.update_directory_model:
+ self.directory[path_bytes] = from_disk.Content()
+
return FileEditor(
self.directory,
rootpath=self.rootpath,
path=path_bytes,
- state=self.file_states[fullpath],
svnrepo=self.svnrepo,
)
@@ -524,28 +242,13 @@ class DirEditor:
)
path_bytes = os.fsencode(path)
- fullpath = os.path.join(self.rootpath, path_bytes)
-
- self.file_states[fullpath] = FileState()
- if copyfrom_rev == -1:
+ if self.editor.update_directory_model:
self.directory[path_bytes] = from_disk.Content()
- else:
- url = svn_urljoin(self.svnrepo.remote_url, copyfrom_path)
- self.remove_child(path_bytes)
- self.svnrepo.export(
- url,
- to=fullpath,
- peg_rev=copyfrom_rev,
- ignore_keywords=True,
- overwrite=True,
- )
- self.directory[path_bytes] = from_disk.Content.from_file(path=fullpath)
return FileEditor(
self.directory,
self.rootpath,
path_bytes,
- state=self.file_states[fullpath],
svnrepo=self.svnrepo,
)
@@ -558,6 +261,8 @@ class DirEditor:
value,
self.path,
)
+
+ self.editor.externals_modified = True
self.externals = defaultdict(list)
if value is not None:
try:
@@ -590,15 +295,6 @@ class DirEditor:
# of parsing error
self.externals = {}
- if not self.externals:
- # externals might have been unset on that directory path,
- # remove associated paths from the reconstructed filesystem
- externals = self.dir_states[self.path].externals
- for path in externals.keys():
- self.remove_external_path(os.fsencode(path))
-
- self.dir_states[self.path].externals = {}
-
def delete_entry(self, path: str, revision: int) -> None:
"""Remove a path."""
if self.editor.debug:
@@ -606,33 +302,20 @@ class DirEditor:
path_bytes = os.fsencode(path)
fullpath = os.path.join(self.rootpath, path_bytes)
-
- if os.path.isdir(fullpath):
- # remove all external paths associated to the removed directory
- # (we cannot simply remove a root external directory as externals
- # paths associated to ancestor directories can overlap)
- for external_path in self.dir_states[path_bytes].externals_paths:
- self.remove_external_path(
- external_path,
- root_path=path_bytes,
- remove_subpaths=False,
- force=True,
+ if self.editor.update_directory_model and path_bytes in self.directory:
+ if not os.path.exists(fullpath):
+ del self.directory[path_bytes]
+ else:
+ # externals might overlap with removed versioned path so ensure
+ # to update directory model
+ fullpath = os.path.join(self.rootpath, self.path)
+ self.directory[self.path] = from_disk.Directory.from_disk(
+ path=fullpath,
+ dir_filter=from_disk.ignore_directories_patterns(
+ self.rootpath, [b"*.svn"]
+ ),
)
- if path_bytes not in self.editor.external_paths:
- self.file_states.pop(fullpath, None)
- self.remove_child(path_bytes)
- elif os.path.isdir(fullpath):
- # versioned and external paths can overlap so we need to iterate on
- # all subpaths to check which ones to remove
- for root, dirs, files in os.walk(fullpath):
- for p in chain(dirs, files):
- full_repo_path = os.path.join(root, p)
- repo_path = full_repo_path.replace(self.rootpath + b"/", b"")
- if repo_path not in self.editor.external_paths:
- self.file_states.pop(full_repo_path, None)
- self.remove_child(repo_path)
-
def close(self):
"""Function called when we finish processing a repository.
@@ -641,53 +324,51 @@ class DirEditor:
if self.editor.debug:
logger.debug("Closing directory %s", self.path)
- prev_externals = self.dir_states[self.path].externals
-
- if self.externals:
- # externals definition list might have changed in the current replayed
- # revision, we need to determine if some were removed and delete the
- # associated paths
- externals = self.externals
- prev_externals_set = {
- (path, url, rev)
- for path in prev_externals.keys()
- for (url, rev, _) in prev_externals[path]
- }
- externals_set = {
- (path, url, rev)
- for path in externals.keys()
- for (url, rev, _) in externals[path]
- }
- old_externals = prev_externals_set - externals_set
- for path, _, _ in old_externals:
- self.remove_external_path(os.fsencode(path))
- else:
- # some external paths might have been removed in the current replayed
- # revision by a delete operation on an overlapping versioned path so we
- # need to restore them
- externals = prev_externals
-
- # For each external, try to export it in reconstructed filesystem
- for path, externals_def in externals.items():
- for i, external in enumerate(externals_def):
- external_url, revision, relative_url = external
- self.process_external(
- path,
- external_url,
- revision,
- relative_url,
- remove_target_path=i == 0,
- )
+ for path, externals in self.dir_states[self.path].externals.items():
+ for external_url, _, _ in externals:
+ if external_url not in self.editor.valid_external:
+ try:
+ self.svnrepo.info(external_url)
+ self.editor.valid_external[external_url] = True
+ except SubversionException:
+ self.editor.valid_external[external_url] = False
+
+ if not self.editor.valid_external[external_url]:
+ # external could not be exported, ensure to remove its path if it exists
+ fullpath = os.path.join(self.rootpath, self.path, os.fsencode(path))
+ if os.path.exists(fullpath) and os.path.isdir(fullpath):
+ shutil.rmtree(fullpath)
+ # update revision filesystem in case versioned files got removed
+ self.svnrepo.client.update(
+ path=self.editor.rootpath,
+ revision=self.editor.revnum,
+ ignore_externals=self.svnrepo.has_recursive_externals,
+ )
- # backup externals in directory state
- if self.externals:
+ if self.editor.update_directory_model and (
+ self.externals or self.dir_states[self.path].externals
+ ):
+ dir = from_disk.Directory.from_disk(
+ path=os.path.join(self.rootpath, self.path),
+ dir_filter=from_disk.ignore_directories_patterns(
+ self.rootpath, [b"*.svn"]
+ ),
+ )
+ if self.path:
+ self.directory[self.path] = dir
+ else:
+ self.directory = dir
+ elif not self.editor.update_directory_model:
self.dir_states[self.path].externals = self.externals
# do operations below only when closing the root directory
if self.path == b"":
+
self.svnrepo.has_relative_externals = any(
relative_url
- for (_, relative_url) in self.editor.valid_externals.values()
+ for path, dir_state in self.dir_states.items()
+ for external_path in dir_state.externals.keys()
+ for (_, _, relative_url) in dir_state.externals[external_path]
)
self.svnrepo.has_recursive_externals = any(
@@ -701,232 +382,6 @@ class DirEditor:
for external_path in dir_state.externals.keys()
for (external_url, _, _) in dir_state.externals[external_path]
)
- if self.svnrepo.has_recursive_externals:
- # If the repository has recursive externals, we stop processing
- # externals and remove those already exported,
- # We will then ignore externals when exporting the revision to
- # check for divergence with the reconstructed filesystem.
- for external_path in list(self.editor.external_paths):
- self.remove_external_path(external_path, force=True)
-
- def process_external(
- self,
- path: str,
- external_url: str,
- revision: Optional[int],
- relative_url: bool,
- remove_target_path: bool = True,
- ) -> None:
- external = (external_url, revision, relative_url)
- dest_path = os.fsencode(path)
- dest_fullpath = os.path.join(self.path, dest_path)
- prev_externals = self.dir_states[self.path].externals
- if (
- path in prev_externals
- and external in prev_externals[path]
- and dest_fullpath in self.directory
- ):
- # external already exported, nothing to do
- return
-
- if is_recursive_external(
- self.svnrepo.origin_url, os.fsdecode(self.path), path, external_url
- ):
- # recursive external, skip it
- return
-
- logger.debug(
- "Exporting external %s%s to path %s",
- external_url,
- f"@{revision}" if revision else "",
- dest_fullpath,
- )
-
- if external not in self.editor.externals_cache:
-
- try:
- # try to export external in a temporary path, destination path could
- # be versioned and must be overridden only if the external URL is
- # still valid
- temp_dir = os.fsencode(
- tempfile.mkdtemp(dir=self.editor.externals_cache_dir)
- )
- temp_path = os.path.join(temp_dir, dest_path)
- os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True)
- if external_url not in self.editor.dead_externals:
- url = external_url.rstrip("/")
- origin_url = self.svnrepo.origin_url.rstrip("/")
- if (
- url.startswith(origin_url + "/")
- and not self.svnrepo.has_relative_externals
- ):
- url = url.replace(origin_url, self.svnrepo.remote_url)
- self.svnrepo.export(
- url,
- to=temp_path,
- peg_rev=revision,
- ignore_keywords=True,
- )
- self.editor.externals_cache[external] = temp_path
-
- except SubversionException as se:
- # external no longer available (404)
- logger.debug(se)
- self.editor.dead_externals.add(external_url)
-
- else:
- temp_path = self.editor.externals_cache[external]
-
- # subversion export will always create the subdirectories of the external
- # path regardless the validity of the remote URL
- dest_path_split = dest_path.split(b"/")
- current_path = self.path
- self.add_directory(os.fsdecode(current_path))
- for subpath in dest_path_split[:-1]:
- current_path = os.path.join(current_path, subpath)
- self.add_directory(os.fsdecode(current_path))
-
- if os.path.exists(temp_path):
- # external successfully exported
-
- if remove_target_path:
- # remove previous path in from_disk model
- self.remove_external_path(dest_path, remove_subpaths=False)
-
- # mark external as valid
- self.editor.valid_externals[dest_fullpath] = (
- external_url,
- relative_url,
- )
-
- # copy exported path to reconstructed filesystem
- fullpath = os.path.join(self.rootpath, dest_fullpath)
-
- if os.path.isfile(temp_path):
- if os.path.islink(fullpath):
- # remove destination file if it is a link
- os.remove(fullpath)
- shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath))
- self.directory[dest_fullpath] = from_disk.Content.from_file(
- path=fullpath
- )
- else:
- self.add_directory(os.fsdecode(dest_fullpath))
-
- # copy_tree needs sub-directories to exist in destination
- for root, dirs, files in os.walk(temp_path):
- for dir in dirs:
- temp_dir_fullpath = os.path.join(root, dir)
- if os.path.islink(temp_dir_fullpath):
- # do not create folder if it's a link or copy_tree will fail
- continue
- subdir = temp_dir_fullpath.replace(temp_path + b"/", b"")
- self.add_directory(
- os.fsdecode(os.path.join(dest_fullpath, subdir))
- )
-
- copy_tree(
- os.fsdecode(temp_path),
- os.fsdecode(fullpath),
- preserve_symlinks=True,
- )
-
- # TODO: replace code above by the line below once we use Python >= 3.8 in production # noqa
- # shutil.copytree(temp_path, fullpath, symlinks=True, dirs_exist_ok=True) # noqa
-
- self.directory[dest_fullpath] = from_disk.Directory.from_disk(
- path=fullpath
- )
-
- # update set of external paths reachable from the directory
- external_paths = set()
- dest_path_part = dest_path.split(b"/")
- for i in range(1, len(dest_path_part) + 1):
- external_paths.add(b"/".join(dest_path_part[:i]))
-
- for root, dirs, files in os.walk(temp_path):
- external_paths.update(
- [
- os.path.join(
- dest_path,
- os.path.join(root, p).replace(temp_path, b"").strip(b"/"),
- )
- for p in chain(dirs, files)
- ]
- )
-
- self.dir_states[self.path].externals_paths.update(external_paths)
-
- for external_path in external_paths:
- self.editor.external_paths[os.path.join(self.path, external_path)] += 1
-
- # ensure hash update for the directory with externals set
- self.directory[self.path].update_hash(force=True)
-
- def remove_external_path(
- self,
- external_path: bytes,
- remove_subpaths: bool = True,
- force: bool = False,
- root_path: Optional[bytes] = None,
- ) -> None:
- """Remove a previously exported SVN external path from
- the reconstructed filesystem.
- """
- path = root_path if root_path else self.path
- fullpath = os.path.join(path, external_path)
-
- # decrement number of references for external path when we really remove it
- # (when remove_subpaths is False, we just cleanup the external path before
- # copying exported paths in it)
- if force or (fullpath in self.editor.external_paths and remove_subpaths):
- self.editor.external_paths[fullpath] -= 1
-
- if (
- fullpath in self.editor.external_paths
- and self.editor.external_paths[fullpath] == 0
- ):
- self.remove_child(fullpath)
- self.editor.external_paths.pop(fullpath, None)
- self.editor.valid_externals.pop(fullpath, None)
- for path in list(self.editor.external_paths):
- if path.startswith(fullpath + b"/"):
- self.editor.external_paths[path] -= 1
- if self.editor.external_paths[path] == 0:
- self.editor.external_paths.pop(path)
-
- if remove_subpaths:
- subpath_split = fullpath.split(b"/")[:-1]
- for i in reversed(range(1, len(subpath_split) + 1)):
- # delete external sub-directory only if it is not versioned
- subpath = b"/".join(subpath_split[0:i])
- try:
- self.svnrepo.client.info(
- svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)),
- peg_revision=self.editor.revnum,
- revision=self.editor.revnum,
- )
- except SubversionException:
- self.remove_child(subpath)
- else:
- break
-
- try:
- # externals can overlap with versioned files so we must restore
- # them after removing the path above
- dest_path = os.path.join(self.rootpath, fullpath)
- self.svnrepo.client.export(
- svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)),
- to=dest_path,
- peg_rev=self.editor.revnum,
- ignore_keywords=True,
- )
- if os.path.isfile(dest_path) or os.path.islink(dest_path):
- self.directory[fullpath] = from_disk.Content.from_file(path=dest_path)
- else:
- self.directory[fullpath] = from_disk.Directory.from_disk(path=dest_path)
- except SubversionException:
- pass
class Editor:
@@ -943,21 +398,17 @@ class Editor:
rootpath: bytes,
directory: from_disk.Directory,
svnrepo: SvnRepo,
- temp_dir: str,
debug: bool = False,
):
self.rootpath = rootpath
self.directory = directory
- self.file_states: Dict[bytes, FileState] = defaultdict(FileState)
self.dir_states: Dict[bytes, DirState] = defaultdict(DirState)
- self.external_paths: Dict[bytes, int] = defaultdict(int)
- self.valid_externals: Dict[bytes, Tuple[str, bool]] = {}
- self.dead_externals: Set[str] = set()
- self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir)
- self.externals_cache: Dict[ExternalDefinition, bytes] = {}
+ self.valid_external: Dict[str, bool] = {}
self.svnrepo = svnrepo
self.revnum = None
self.debug = debug
+ self.update_directory_model = False
+ self.externals_modified = False
def set_target_revision(self, revnum) -> None:
self.revnum = revnum
@@ -973,7 +424,6 @@ class Editor:
self.directory,
rootpath=self.rootpath,
path=b"",
- file_states=self.file_states,
dir_states=self.dir_states,
svnrepo=self.svnrepo,
)
@@ -987,7 +437,6 @@ class Replay:
conn: RemoteAccess,
rootpath: bytes,
svnrepo: SvnRepo,
- temp_dir: str,
directory: Optional[from_disk.Directory] = None,
debug: bool = False,
):
@@ -996,15 +445,19 @@ class Replay:
if directory is None:
directory = from_disk.Directory()
self.directory = directory
+ self.svnrepo = svnrepo
self.editor = Editor(
rootpath=rootpath,
directory=directory,
svnrepo=svnrepo,
- temp_dir=temp_dir,
debug=debug,
)
- def replay(self, rev: int, low_water_mark: int) -> from_disk.Directory:
+ def replay(
+ self,
+ rev: int,
+ low_water_mark: int,
+ ) -> from_disk.Directory:
"""Replay svn actions between rev and rev+1.
This method updates in place the self.editor.directory, as well as the
@@ -1014,8 +467,59 @@ class Replay:
The updated root directory
"""
+
+ locale.setlocale(locale.LC_ALL, "C")
+
codecs.register_error("strict", _ra_codecs_error_handler)
+
+ self.editor.update_directory_model = False
+ self.editor.externals_modified = False
+
self.conn.replay(rev, low_water_mark, self.editor)
+
+ first_checkout = not os.path.exists(os.path.join(self.editor.rootpath, b".svn"))
+
+ try:
+
+ for _ in range(2 if self.editor.externals_modified else 1):
+ self.svnrepo.checkout(
+ url=self.svnrepo.remote_url
+ if not self.svnrepo.has_relative_externals
+ else self.svnrepo.origin_url,
+ path=os.fsdecode(self.editor.rootpath),
+ rev=rev,
+ peg_rev=rev,
+ ignore_externals=(
+ not self.editor.externals_modified
+ or self.svnrepo.has_recursive_externals
+ ),
+ )
+
+ if first_checkout:
+ self.directory = self.editor.directory = from_disk.Directory.from_disk(
+ path=self.editor.rootpath,
+ dir_filter=from_disk.ignore_directories_patterns(
+ self.editor.rootpath, [b"*.svn"]
+ ),
+ )
+
+ except SubversionException as se:
+ if se.args[0].startswith(
+ (
+ "Error parsing svn:externals property",
+ "Unrecognized format for the relative external URL",
+ )
+ ):
+ pass
+ else:
+ raise
+
+ self.editor.update_directory_model = True
+ debug = self.editor.debug
+ # self.editor.debug = False
+ self.conn.replay(rev, low_water_mark, self.editor, False)
+ self.editor.debug = debug
+
codecs.register_error("strict", codecs.strict_errors)
return self.editor.directory
@@ -1051,6 +555,9 @@ class Replay:
else:
assert False, obj_type
+ logger.debug("%s contents collected", len(contents))
+ logger.debug("%s directories collected", len(directories))
+
return contents, skipped_contents, directories
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
index f88d523..3cad47d 100644
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -113,16 +113,22 @@ class SvnRepo:
# another for replay
self.conn = self.remote_access(auth)
+ self.remote_url = self.info(self.remote_url).repos_root_url
+
self.local_dirname = local_dirname
local_name = os.path.basename(self.remote_url)
self.local_url = os.path.join(self.local_dirname, local_name).encode("utf-8")
+ # compute root directory path from the remote repository URL, required to
+ # properly load the sub-tree of a repository mounted from a dump file
+ repos_root_url = self.info(self.origin_url).repos_root_url
+ self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1)
+
self.uuid = self.conn.get_uuid().encode("utf-8")
self.swhreplay = replay.Replay(
conn=self.conn,
rootpath=self.local_url,
svnrepo=self,
- temp_dir=local_dirname,
debug=debug,
)
self.max_content_length = max_content_length
@@ -130,11 +136,6 @@ class SvnRepo:
self.has_recursive_externals = False
self.replay_started = False
- # compute root directory path from the remote repository URL, required to
- # properly load the sub-tree of a repository mounted from a dump file
- repos_root_url = self.info(self.origin_url).repos_root_url
- self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1)
-
def __str__(self):
return str(
{
@@ -399,7 +400,9 @@ class SvnRepo:
"""
return self.client.propget(name, target, peg_rev, rev, recurse)
- def export_temporary(self, revision: int) -> Tuple[str, bytes]:
+ def export_temporary(
+ self, revision: int, checkout: bool = False
+ ) -> Tuple[str, bytes]:
"""Export the repository to a given revision in a temporary location. This is up
to the caller of this function to clean up the temporary location when done (cf.
self.clean_fs method)
@@ -431,59 +434,71 @@ class SvnRepo:
# properties are set from a checkout of the revision and if some
# external URLs are relative to pick the right export URL,
# recursive externals are also checked
- with tempfile.TemporaryDirectory(
- dir=self.local_dirname, prefix=f"checkout-revision-{revision}."
- ) as co_dirname:
- self.checkout(
- self.remote_url, co_dirname, revision, ignore_externals=True
- )
- # get all svn:externals properties recursively
- externals = self.propget("svn:externals", co_dirname, None, None, True)
- self.has_relative_externals = False
- self.has_recursive_externals = False
- for path, external_defs in externals.items():
- if self.has_relative_externals or self.has_recursive_externals:
+ self.checkout(self.remote_url, local_url, revision, ignore_externals=True)
+ # get all svn:externals properties recursively
+ externals = self.propget("svn:externals", local_url, None, None, True)
+ self.has_relative_externals = False
+ self.has_recursive_externals = False
+ for path, external_defs in externals.items():
+ if self.has_relative_externals or self.has_recursive_externals:
+ break
+ path = path.replace(self.remote_url.rstrip("/") + "/", "")
+ for external_def in os.fsdecode(external_defs).split("\n"):
+ # skip empty line or comment
+ if not external_def or external_def.startswith("#"):
+ continue
+ (
+ external_path,
+ external_url,
+ _,
+ relative_url,
+ ) = parse_external_definition(
+ external_def.rstrip("\r"), path, self.origin_url
+ )
+
+ if is_recursive_external(
+ self.origin_url,
+ path,
+ external_path,
+ external_url,
+ ):
+ self.has_recursive_externals = True
+ url = self.remote_url
+ break
+
+ if relative_url:
+ self.has_relative_externals = True
+ url = self.origin_url
break
- path = path.replace(self.remote_url.rstrip("/") + "/", "")
- for external_def in os.fsdecode(external_defs).split("\n"):
- # skip empty line or comment
- if not external_def or external_def.startswith("#"):
- continue
- (
- external_path,
- external_url,
- _,
- relative_url,
- ) = parse_external_definition(
- external_def.rstrip("\r"), path, self.origin_url
- )
-
- if is_recursive_external(
- self.origin_url,
- path,
- external_path,
- external_url,
- ):
- self.has_recursive_externals = True
- url = self.remote_url
- break
-
- if relative_url:
- self.has_relative_externals = True
- url = self.origin_url
- break
try:
url = url.rstrip("/")
- self.export(
- url,
- to=local_url,
- rev=revision,
- ignore_keywords=True,
- ignore_externals=self.has_recursive_externals,
- )
+ if checkout:
+ if os.path.exists(local_url):
+ # TODO: relocate if relative externals
+ self.client.update(
+ local_url,
+ revision,
+ ignore_externals=self.has_recursive_externals,
+ )
+ else:
+ self.checkout(
+ url,
+ local_url,
+ revision,
+ ignore_externals=self.has_recursive_externals,
+ )
+ else:
+ if os.path.exists(local_url):
+ shutil.rmtree(local_url)
+ self.export(
+ url,
+ to=local_url,
+ rev=revision,
+ ignore_externals=self.has_recursive_externals,
+ )
except SubversionException as se:
if se.args[0].startswith(
(
@@ -495,15 +510,15 @@ class SvnRepo:
else:
raise
- if self.from_dump:
- # when exporting a subpath of a subversion repository mounted from
- # a dump file generated by svnrdump, exported paths are relative to
- # the repository root path while they are relative to the subpath
- # otherwise, so we need to adjust the URL of the exported filesystem
- root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/"))
- # check that root directory of a subproject did not get removed in revision
- if os.path.exists(root_dir_local_url):
- local_url = root_dir_local_url
+ # if self.from_dump:
+ # when exporting a subpath of a subversion repository mounted from
+ # a dump file generated by svnrdump, exported paths are relative to
+ # the repository root path while they are relative to the subpath
+ # otherwise, so we need to adjust the URL of the exported filesystem
+ root_dir_local_url = os.path.join(local_url, self.root_directory.strip("/"))
+ # check that root directory of a subproject did not get removed in revision
+ if os.path.exists(root_dir_local_url):
+ local_url = root_dir_local_url
return local_dirname, os.fsencode(local_url)
@@ -539,7 +554,9 @@ class SvnRepo:
# path modifications from first revision to restore possible file states induced
# by setting svn properties on those files (end of line style for instance)
self.replay_started = True
- first_revision = 1 if start_revision else 0 # handle empty repository edge case
+ first_revision = (
+ start_revision if start_revision else 0
+ ) # handle empty repository edge case
for commit in self.logs(first_revision, end_revision):
rev = commit["rev"]
copyfrom_revs = (
@@ -560,19 +577,16 @@ class SvnRepo:
low_water_mark = min(copyfrom_revs)
objects = self.swhreplay.compute_objects(rev, low_water_mark)
- if rev >= start_revision:
- # start yielding new data to archive once we reached the revision to
- # resume the loading from
- if commit["has_changes"] or start_revision == 0:
- # yield data only if commit has changes or if repository is empty
- root_dir_path = self.root_directory.encode()[1:]
- if not root_dir_path or root_dir_path in self.swhreplay.directory:
- root_dir = self.swhreplay.directory[root_dir_path]
- else:
- # root directory of subproject got removed in revision, return
- # empty directory for that edge case
- root_dir = DirectoryFromDisk()
- yield rev, commit, objects, root_dir
+ if commit["has_changes"] or start_revision == 0:
+ # yield data only if commit has changes or if repository is empty
+ root_dir_path = self.root_directory.encode()[1:]
+ if not root_dir_path or root_dir_path in self.swhreplay.directory:
+ root_dir = self.swhreplay.directory[root_dir_path]
+ else:
+ # root directory of subproject got removed in revision, return
+ # empty directory for that edge case
+ root_dir = DirectoryFromDisk()
+ yield rev, commit, objects, root_dir
def swh_hash_data_at_revision(
self, revision: int
diff --git a/swh/loader/svn/tests/test_externals.py b/swh/loader/svn/tests/test_externals.py
index 4610588..6f61650 100644
--- a/swh/loader/svn/tests/test_externals.py
+++ b/swh/loader/svn/tests/test_externals.py
@@ -81,7 +81,6 @@ def test_loader_with_valid_svn_externals(
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/hello')} hello\n"
- f"{svn_urljoin(external_repo_url, 'foo.sh')} foo.sh\n"
f"{svn_urljoin(repo_url, 'trunk/bar.sh')} bar.sh"
)
},
@@ -199,12 +198,6 @@ def test_loader_with_valid_externals_modification(
properties={"svn:executable": "*"},
data=b"#!/bin/bash\necho bar",
),
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="foo.sh",
- properties={"svn:executable": "*"},
- data=b"#!/bin/bash\necho foo",
- ),
],
)
@@ -219,7 +212,6 @@ def test_loader_with_valid_externals_modification(
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/hello')} src/code/hello\n" # noqa
- f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n"
)
},
),
@@ -239,7 +231,6 @@ def test_loader_with_valid_externals_modification(
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/bar')} src/code/bar\n" # noqa
- f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n"
)
},
),
@@ -258,11 +249,11 @@ def test_loader_with_valid_externals_modification(
def test_loader_with_valid_externals_and_versioned_path(
- swh_storage, repo_url, external_repo_url, tmp_path
+ swh_storage, repo_url, tmp_path
):
- # first commit on external
+ # first commit
add_commit(
- external_repo_url,
+ repo_url,
"Create a file in an external repository",
[
CommitChange(
@@ -273,7 +264,7 @@ def test_loader_with_valid_externals_and_versioned_path(
],
)
- # first commit
+ # second commit
add_commit(
repo_url,
"Add file with same name but different content in main repository",
@@ -286,7 +277,7 @@ def test_loader_with_valid_externals_and_versioned_path(
],
)
- # second commit
+ # third commit
add_commit(
repo_url,
"Add externals targeting the versioned file",
@@ -296,14 +287,14 @@ def test_loader_with_valid_externals_and_versioned_path(
path="trunk/",
properties={
"svn:externals": (
- f"{svn_urljoin(external_repo_url, 'code/script.sh')} script.sh" # noqa
+ f"{svn_urljoin(repo_url, 'code/script.sh')} script.sh" # noqa
)
},
),
],
)
- # third commit
+ # fourth commit
add_commit(
repo_url,
"Modify the versioned file",
@@ -571,15 +562,15 @@ def test_dump_loader_relative_externals_detection(
],
)
- external_url = f"{external_repo_url.replace('file://', '//')}/project2/bar.sh"
+ external_url = f"{external_repo_url.replace('file://', '//')}/project2"
add_commit(
repo_url,
"Set external relative to URL scheme in repository to load",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
- path="project1/",
- properties={"svn:externals": (f"{external_url} bar.sh")},
+ path="trunk/",
+ properties={"svn:externals": (f"{external_url} project2")},
),
],
)
@@ -603,7 +594,7 @@ def test_dump_loader_relative_externals_detection(
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
- path="project1/",
+ path="trunk/",
properties={"svn:externals": None},
),
],
@@ -623,78 +614,6 @@ def test_dump_loader_relative_externals_detection(
assert not loader.svnrepo.has_relative_externals
-def test_loader_externals_cache(swh_storage, repo_url, external_repo_url, tmp_path):
-
- # first commit on external
- add_commit(
- external_repo_url,
- "Create some directories and files in an external repository",
- [
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="code/hello/hello-world",
- properties={"svn:executable": "*"},
- data=b"#!/bin/bash\necho Hello World !",
- ),
- ],
- )
-
- # first commit
- add_commit(
- repo_url,
- "Create repository structure.",
- [
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="project1/",
- ),
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="project2/",
- ),
- ],
- )
-
- external_url = svn_urljoin(external_repo_url, "code/hello")
-
- # second commit
- add_commit(
- repo_url,
- (
- "Set svn:externals property on trunk/externals path of repository to load."
- "One external targets a remote directory and another one a remote file."
- ),
- [
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="project1/externals/",
- properties={"svn:externals": (f"{external_url} hello\n")},
- ),
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="project2/externals/",
- properties={"svn:externals": (f"{external_url} hello\n")},
- ),
- ],
- )
-
- loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
- assert loader.load() == {"status": "eventful"}
- assert_last_visit_matches(
- loader.storage,
- repo_url,
- status="full",
- type="svn",
- )
- check_snapshot(loader.snapshot, loader.storage)
-
- assert (
- external_url,
- None,
- False,
- ) in loader.svnrepo.swhreplay.editor.externals_cache
-
-
def test_loader_remove_versioned_path_with_external_overlap(
swh_storage, repo_url, external_repo_url, tmp_path
):
@@ -753,12 +672,7 @@ def test_loader_remove_versioned_path_with_external_overlap(
],
)
- loader = SvnLoader(
- swh_storage,
- repo_url,
- temp_directory=tmp_path,
- check_revision=1,
- )
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
@@ -918,7 +832,6 @@ def test_loader_remove_external_overlapping_versioned_path(
path="", # repo root dir
properties={
"svn:externals": (
- f"{svn_urljoin(external_repo_url, 'code/foo.sh')} trunk/code/foo.sh\n" # noqa
f"{svn_urljoin(external_repo_url, 'code/link')} trunk/link"
)
},
@@ -939,12 +852,7 @@ def test_loader_remove_external_overlapping_versioned_path(
],
)
- loader = SvnLoader(
- swh_storage,
- repo_url,
- temp_directory=tmp_path,
- check_revision=1,
- )
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
@@ -1006,12 +914,7 @@ def test_loader_modify_external_same_path(
],
)
- loader = SvnLoader(
- swh_storage,
- repo_url,
- temp_directory=tmp_path,
- check_revision=1,
- )
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
@@ -1257,76 +1160,6 @@ def test_loader_external_in_versioned_path(
check_snapshot(loader.snapshot, loader.storage)
-def test_dump_loader_externals_in_loaded_repository(swh_storage, tmp_path, mocker):
- repo_url = create_repo(tmp_path, repo_name="foo")
- externa_url = create_repo(tmp_path, repo_name="foobar")
-
- # first commit on external
- add_commit(
- externa_url,
- "Create a file in an external repository",
- [
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="trunk/src/foo.sh",
- data=b"#!/bin/bash\necho foo",
- ),
- ],
- )
-
- add_commit(
- repo_url,
- (
- "Add a file and set externals on trunk/externals:"
- "one external located in this repository, the other in a remote one"
- ),
- [
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="trunk/src/bar.sh",
- data=b"#!/bin/bash\necho bar",
- ),
- CommitChange(
- change_type=CommitChangeType.AddOrUpdate,
- path="trunk/externals/",
- properties={
- "svn:externals": (
- f"{svn_urljoin(repo_url, 'trunk/src/bar.sh')} bar.sh\n"
- f"{svn_urljoin(externa_url, 'trunk/src/foo.sh')} foo.sh"
- )
- },
- ),
- ],
- )
-
- from swh.loader.svn.svn import client
-
- mock_client = mocker.MagicMock()
- mocker.patch.object(client, "Client", mock_client)
-
- class Info:
- repos_root_url = repo_url
-
- mock_client().info.return_value = {"repo": Info()}
-
- loader = SvnLoaderFromRemoteDump(swh_storage, repo_url, temp_directory=tmp_path)
- loader.load()
-
- export_call_args = mock_client().export.call_args_list
-
- # first external export should use the base URL of the local repository
- # mounted from the remote dump as it is located in loaded repository
- assert export_call_args[0][0][0] != svn_urljoin(
- loader.svnrepo.origin_url, "trunk/src/bar.sh"
- )
- assert export_call_args[0][0][0] == svn_urljoin(
- loader.svnrepo.remote_url, "trunk/src/bar.sh"
- )
-
- # second external export should use the remote URL of the external repository
- assert export_call_args[1][0][0] == svn_urljoin(externa_url, "trunk/src/foo.sh")
-
-
def test_loader_externals_add_remove_readd_on_subpath(
swh_storage, repo_url, external_repo_url, tmp_path
):
@@ -1567,9 +1400,11 @@ def test_loader_with_externals_parsing_error(
check_snapshot(loader.snapshot, loader.storage)
-@pytest.mark.parametrize("remote_external_path", ["src/main/project", "src/main"])
def test_loader_overlapping_external_paths_removal(
- swh_storage, repo_url, external_repo_url, tmp_path, remote_external_path
+ swh_storage,
+ repo_url,
+ external_repo_url,
+ tmp_path,
):
add_commit(
external_repo_url,
@@ -1602,7 +1437,7 @@ def test_loader_overlapping_external_paths_removal(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/src/main/",
properties={
- "svn:externals": f"{svn_urljoin(external_repo_url, remote_external_path)} project" # noqa
+ "svn:externals": f"{svn_urljoin(external_repo_url, 'src/main/project')} project" # noqa
},
),
CommitChange(
@@ -1626,12 +1461,7 @@ def test_loader_overlapping_external_paths_removal(
],
)
- loader = SvnLoader(
- swh_storage,
- repo_url,
- temp_directory=tmp_path,
- check_revision=1,
- )
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
index fb1401d..aef9ba8 100644
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -512,14 +512,15 @@ def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path):
assert loader.load() == {"status": "eventful"}
mediawiki_snapshot = Snapshot(
- id=hash_to_bytes("d6d6e9703f157c5702d9a4a5dec878926ed4ab76"),
+ id=hash_to_bytes("161180207e7c2b3ff7ee8312428b0f8e3e1bd594"),
branches={
b"HEAD": SnapshotBranch(
- target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"),
+ target=hash_to_bytes("c2652c87b7c27cafa840524cf9841feefb13908b"),
target_type=TargetType.REVISION,
)
},
)
+
check_snapshot(mediawiki_snapshot, loader.storage)
assert_last_visit_matches(
@@ -1295,18 +1296,18 @@ def test_loader_first_revision_is_not_number_one(
loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path)
# post loading will detect an issue and make a partial visit with a snapshot
- assert loader.load() == {"status": "failed"}
+ assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
- status="partial",
+ status="full",
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
assert get_stats(loader.storage) == {
- "content": 2,
+ "content": 3,
"directory": 2,
"origin": 1,
"origin_visit": 1,
@@ -2315,13 +2316,7 @@ def test_loader_check_tree_divergence(swh_storage, repo_url, tmp_path, caplog):
)
# load it
- loader = SvnLoader(
- swh_storage,
- repo_url,
- temp_directory=tmp_path,
- debug=True,
- check_revision=1,
- )
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
assert loader.load() == {"status": "eventful"}
# export it to a temporary directory