diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ click iso8601 subvertpy >= 0.9.4 +tenacity >= 6.2 typing-extensions diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py --- a/swh/loader/svn/replay.py +++ b/swh/loader/svn/replay.py @@ -307,7 +307,7 @@ # export operation. with open(self.fullpath, "rb") as f: content = f.read() - self.svnrepo.client.export( + self.svnrepo.export( os.path.join(self.svnrepo.remote_url.encode(), self.path), to=self.fullpath, peg_rev=self.editor.revnum, @@ -682,12 +682,7 @@ and not self.svnrepo.has_relative_externals ): url = url.replace(origin_url, self.svnrepo.remote_url) - logger.debug( - "svn export --ignore-keywords %s%s", - url, - f"@{revision}" if revision else "", - ) - self.svnrepo.client.export( + self.svnrepo.export( url, to=temp_path, peg_rev=revision, ignore_keywords=True, ) self.editor.externals_cache[external] = temp_path diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -8,7 +8,6 @@ commit. """ - import logging import os import shutil @@ -28,12 +27,12 @@ ) from . import converters, replay +from .svn_retry import svn_retry from .utils import is_recursive_external, parse_external_definition # When log message contains empty data DEFAULT_AUTHOR_MESSAGE = "" - logger = logging.getLogger(__name__) @@ -61,9 +60,9 @@ auth = Auth([get_username_provider()]) # one connection for log iteration - self.conn_log = RemoteAccess(self.remote_url, auth=auth) + self.conn_log = self.remote_access(auth) # another for replay - self.conn = RemoteAccess(self.remote_url, auth=auth) + self.conn = self.remote_access(auth) # one client for update operation self.client = client.Client(auth=auth) @@ -213,6 +212,125 @@ ): yield self.__to_entry(log_entry) + @svn_retry() + def remote_access(self, auth: Auth) -> RemoteAccess: + """Simple wrapper around subvertpy.ra.RemoteAccess creation + enabling to retry the operation if a network error occurs.""" + return RemoteAccess(self.remote_url, auth=auth) + + @svn_retry() + def export( + self, + url: str, + to: str, + rev: Optional[int] = None, + peg_rev: Optional[int] = None, + recurse: bool = True, + ignore_externals: bool = False, + overwrite: bool = False, + ignore_keywords: bool = False, + ) -> int: + """Simple wrapper around subvertpy.client.Client.export enabling to retry + the command if a network error occurs. + + See documentation of svn_client_export5 function from subversion C API + to get details about parameters. + """ + # remove export path as command can be retried + if os.path.isfile(to) or os.path.islink(to): + os.remove(to) + elif os.path.isdir(to): + shutil.rmtree(to) + options = [] + if rev is not None: + options.append(f"-r {rev}") + if recurse: + options.append("--depth infinity") + if ignore_externals: + options.append("--ignore-externals") + if overwrite: + options.append("--force") + if ignore_keywords: + options.append("--ignore-keywords") + logger.debug( + "svn export %s %s%s %s", + " ".join(options), + url, + f"@{peg_rev}" if peg_rev else "", + to, + ) + return self.client.export( + url, + to=to, + rev=rev, + peg_rev=peg_rev, + recurse=recurse, + ignore_externals=ignore_externals, + overwrite=overwrite, + ignore_keywords=ignore_keywords, + ) + + @svn_retry() + def checkout( + self, + url: str, + path: str, + rev: Optional[int] = None, + peg_rev: Optional[int] = None, + recurse: bool = True, + ignore_externals: bool = False, + allow_unver_obstructions: bool = False, + ) -> int: + """Simple wrapper around subvertpy.client.Client.checkout enabling to retry + the command if a network error occurs. + + See documentation of svn_client_checkout3 function from subversion C API + to get details about parameters. + """ + # remove checkout path as command can be retried + if os.path.isdir(path): + shutil.rmtree(path) + options = [] + if rev is not None: + options.append(f"-r {rev}") + if recurse: + options.append("--depth infinity") + if ignore_externals: + options.append("--ignore-externals") + logger.debug( + "svn checkout %s %s%s %s", + " ".join(options), + self.remote_url, + f"@{peg_rev}" if peg_rev else "", + path, + ) + return self.client.checkout( + url, + path=path, + rev=rev, + peg_rev=peg_rev, + recurse=recurse, + ignore_externals=ignore_externals, + allow_unver_obstructions=allow_unver_obstructions, + ) + + @svn_retry() + def propget( + self, + name: str, + target: str, + peg_rev: Optional[int], + rev: Optional[int] = None, + recurse: bool = False, + ): + """Simple wrapper around subvertpy.client.Client.propget enabling to retry + the command if a network error occurs. + + See documentation of svn_client_propget5 function from subversion C API + to get details about parameters. + """ + return self.client.propget(name, target, peg_rev, rev, recurse) + def export_temporary(self, revision: int) -> Tuple[str, bytes]: """Export the repository to a given revision in a temporary location. This is up to the caller of this function to clean up the temporary location when done (cf. @@ -248,16 +366,12 @@ with tempfile.TemporaryDirectory( dir=self.local_dirname, prefix=f"checkout-revision-{revision}." ) as co_dirname: - logger.debug( - "svn checkout --ignore-externals %s@%s", self.remote_url, revision, - ) - self.client.checkout( + + self.checkout( self.remote_url, co_dirname, revision, ignore_externals=True ) # get all svn:externals properties recursively - externals = self.client.propget( - "svn:externals", co_dirname, None, None, True - ) + externals = self.propget("svn:externals", co_dirname, None, None, True) self.has_relative_externals = False self.has_recursive_externals = False for path, external_defs in externals.items(): @@ -291,10 +405,8 @@ try: url = url.rstrip("/") - logger.debug( - "svn export --ignore-keywords %s@%s", url, revision, - ) - self.client.export( + + self.export( url, to=local_url, rev=revision, diff --git a/swh/loader/svn/svn_retry.py b/swh/loader/svn/svn_retry.py new file mode 100644 --- /dev/null +++ b/swh/loader/svn/svn_retry.py @@ -0,0 +1,41 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +import logging + +from subvertpy import SubversionException +from tenacity import retry +from tenacity.before_sleep import before_sleep_log +from tenacity.retry import retry_if_exception +from tenacity.stop import stop_after_attempt +from tenacity.wait import wait_exponential + +logger = logging.getLogger(__name__) + +SVN_RETRY_WAIT_EXP_BASE = 10 +SVN_RETRY_MAX_ATTEMPTS = 5 + + +def is_retryable_svn_exception(exception): + if isinstance(exception, SubversionException): + return exception.args[0].startswith( + ( + "Connection timed out", + "Unable to connect to a repository at URL", + "Error running context: The server unexpectedly closed the connection", + ) + ) + return isinstance(exception, (ConnectionResetError, TimeoutError)) + + +def svn_retry(): + return retry( + retry=retry_if_exception(is_retryable_svn_exception), + wait=wait_exponential(exp_base=SVN_RETRY_WAIT_EXP_BASE), + stop=stop_after_attempt(max_attempt_number=SVN_RETRY_MAX_ATTEMPTS), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, + ) diff --git a/swh/loader/svn/tests/test_externals.py b/swh/loader/svn/tests/test_externals.py --- a/swh/loader/svn/tests/test_externals.py +++ b/swh/loader/svn/tests/test_externals.py @@ -5,13 +5,21 @@ import pytest -from swh.loader.svn.loader import SvnLoader, SvnLoaderFromRemoteDump +from swh.loader.svn.loader import SvnLoader, SvnLoaderFromRemoteDump, SvnRepo from swh.loader.svn.utils import svn_urljoin from swh.loader.tests import assert_last_visit_matches, check_snapshot from .utils import CommitChange, CommitChangeType, add_commit, create_repo +@pytest.fixture(autouse=True) +def svn_retry_sleep_mocker(mocker): + mocker.patch.object(SvnRepo.export.retry, "sleep") + mocker.patch.object(SvnRepo.checkout.retry, "sleep") + mocker.patch.object(SvnRepo.propget.retry, "sleep") + mocker.patch.object(SvnRepo.remote_access.retry, "sleep") + + @pytest.fixture def external_repo_url(tmpdir_factory): # create a repository @@ -110,7 +118,7 @@ check_snapshot(loader.snapshot, loader.storage) -def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path): +def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path, mocker): # first commit add_commit( diff --git a/swh/loader/svn/tests/test_svn_retry.py b/swh/loader/svn/tests/test_svn_retry.py new file mode 100644 --- /dev/null +++ b/swh/loader/svn/tests/test_svn_retry.py @@ -0,0 +1,285 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os + +import pytest +from subvertpy import SubversionException +from subvertpy.ra import Auth, RemoteAccess, get_username_provider + +from swh.loader.svn.svn import SvnRepo +from swh.loader.svn.svn_retry import SVN_RETRY_MAX_ATTEMPTS, SVN_RETRY_WAIT_EXP_BASE +from swh.loader.tests import prepare_repository_from_archive + + +def _get_repo_url(archive_name, datadir, tmp_path): + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + return prepare_repository_from_archive(archive_path, "pkg-gourmet", tmp_path) + + +@pytest.fixture() +def sample_repo_url(datadir, tmp_path): + return _get_repo_url("pkg-gourmet", datadir, tmp_path) + + +@pytest.fixture() +def sample_repo_with_externals_url(datadir, tmp_path): + return _get_repo_url("pkg-gourmet-with-external-id", datadir, tmp_path) + + +class SVNClientWrapper: + """Methods of subvertpy.client.Client cannot be patched by mocker fixture + as they are read only attributes due to subvertpy.client module being + a C extension module. So we use that wrapper class instead to simulate + mocking behavior. + """ + + def __init__(self, client, exception, nb_failed_calls): + self.client = client + self.exception = exception + self.nb_failed_calls = nb_failed_calls + self.nb_calls = 0 + + def _wrapped_svn_cmd(self, svn_cmd, *args, **kwargs): + self.nb_calls = self.nb_calls + 1 + if self.nb_calls <= self.nb_failed_calls: + raise self.exception + else: + return svn_cmd(*args, **kwargs) + + def export(self, *args, **kwargs): + return self._wrapped_svn_cmd(self.client.export, *args, **kwargs) + + def checkout(self, *args, **kwargs): + return self._wrapped_svn_cmd(self.client.checkout, *args, **kwargs) + + def propget(self, *args, **kwargs): + return self._wrapped_svn_cmd(self.client.propget, *args, **kwargs) + + +def assert_sleep_calls(mock_sleep, mocker, nb_failures): + mock_sleep.assert_has_calls( + [ + mocker.call(param) + for param in [SVN_RETRY_WAIT_EXP_BASE ** i for i in range(nb_failures)] + ] + ) + + +RETRYABLE_EXCEPTIONS = [ + SubversionException( + "Error running context: The server unexpectedly closed the connection.", 120108, + ), + SubversionException("Connection timed out", 175012), + SubversionException("Unable to connect to a repository at URL", 170013), + ConnectionResetError(), + TimeoutError(), +] + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_export_retry_success( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000 + ) + + mock_sleep = mocker.patch.object(svnrepo.export.retry, "sleep") + + nb_failed_calls = 2 + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + export_path = os.path.join(tmp_path, "export") + svnrepo.export(sample_repo_url, export_path) + assert os.path.exists(export_path) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_export_retry_failure( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000 + ) + + mock_sleep = mocker.patch.object(svnrepo.export.retry, "sleep") + + nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + with pytest.raises(type(exception_to_retry)): + export_path = os.path.join(tmp_path, "export") + svnrepo.export(sample_repo_url, export_path) + + assert not os.path.exists(export_path) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls - 1) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_checkout_retry_success( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000 + ) + + mock_sleep = mocker.patch.object(svnrepo.checkout.retry, "sleep") + + nb_failed_calls = 2 + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + checkout_path = os.path.join(tmp_path, "checkout") + svnrepo.checkout(sample_repo_url, checkout_path, svnrepo.head_revision()) + assert os.path.exists(checkout_path) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_checkout_retry_failure( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000 + ) + + mock_sleep = mocker.patch.object(svnrepo.checkout.retry, "sleep") + + nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + checkout_path = os.path.join(tmp_path, "checkout") + with pytest.raises(type(exception_to_retry)): + svnrepo.checkout(sample_repo_url, checkout_path, svnrepo.head_revision()) + + assert not os.path.exists(checkout_path) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls - 1) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_propget_retry_success( + mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_with_externals_url, + sample_repo_with_externals_url, + tmp_path, + max_content_length=100000, + ) + + checkout_path = os.path.join(tmp_path, "checkout") + svnrepo.checkout( + sample_repo_with_externals_url, + checkout_path, + svnrepo.head_revision(), + ignore_externals=True, + ) + + mock_sleep = mocker.patch.object(svnrepo.propget.retry, "sleep") + + nb_failed_calls = 2 + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + externals = svnrepo.propget("svn:externals", checkout_path, None, None, True) + + assert externals + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_svn_propget_retry_failure( + mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry +): + svnrepo = SvnRepo( + sample_repo_with_externals_url, + sample_repo_with_externals_url, + tmp_path, + max_content_length=100000, + ) + + checkout_path = os.path.join(tmp_path, "checkout") + svnrepo.checkout( + sample_repo_with_externals_url, + checkout_path, + svnrepo.head_revision(), + ignore_externals=True, + ) + + mock_sleep = mocker.patch.object(svnrepo.propget.retry, "sleep") + + nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS + svnrepo.client = SVNClientWrapper( + svnrepo.client, exception_to_retry, nb_failed_calls + ) + + with pytest.raises(type(exception_to_retry)): + svnrepo.propget("svn:externals", checkout_path, None, None, True) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls - 1) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_remote_access_retry_success( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + + nb_failed_calls = 2 + mock_ra = mocker.patch("swh.loader.svn.svn.RemoteAccess") + remote_access = RemoteAccess(sample_repo_url, auth=Auth([get_username_provider()])) + mock_ra.side_effect = ( + [exception_to_retry] * nb_failed_calls + + [remote_access] + + [exception_to_retry] * nb_failed_calls + + [remote_access] + ) + + mock_sleep = mocker.patch.object(SvnRepo.remote_access.retry, "sleep") + + SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000, + ) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls) + + +@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS) +def test_remote_access_retry_failure( + mocker, tmp_path, sample_repo_url, exception_to_retry +): + + nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS + mock_ra = mocker.patch("swh.loader.svn.svn.RemoteAccess") + remote_access = RemoteAccess(sample_repo_url, auth=Auth([get_username_provider()])) + mock_ra.side_effect = ( + [exception_to_retry] * nb_failed_calls + + [remote_access] + + [exception_to_retry] * nb_failed_calls + + [remote_access] + ) + + mock_sleep = mocker.patch.object(SvnRepo.remote_access.retry, "sleep") + + with pytest.raises(type(exception_to_retry)): + SvnRepo( + sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000, + ) + + assert_sleep_calls(mock_sleep, mocker, nb_failed_calls - 1)