diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html +tenacity diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -15,6 +15,9 @@ import time from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast +from tenacity import retry +from tenacity.retry import retry_if_exception_type +from tenacity.stop import stop_after_attempt from urllib3.util import parse_url from swh.loader.core.loader import BaseLoader @@ -54,6 +57,14 @@ TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." +def rsync_retry(): + return retry( + retry=retry_if_exception_type(subprocess.CalledProcessError), + stop=stop_after_attempt(max_attempt_number=4), + reraise=True, + ) + + class BadPathException(Exception): pass @@ -337,11 +348,20 @@ for k in excluded_keywords: self.excluded_keywords.append(k.strip()) + @rsync_retry() + def execute_rsync( + self, rsync_cmd: List[str], **run_opts + ) -> subprocess.CompletedProcess: + rsync = subprocess.run(rsync_cmd, **run_opts) + rsync.check_returncode() + return rsync + def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None: # URL *must* end with a trailing slash in order to get CVSROOT listed url = "rsync://%s%s/" % (host, os.path.dirname(path)) - rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii") - rsync.check_returncode() + rsync = self.execute_rsync( + ["rsync", url], capture_output=True, encoding="ascii" + ) have_cvsroot = False have_module = False for line in rsync.stdout.split("\n"): @@ -362,12 +382,12 @@ for d in ("CVSROOT", self.cvs_module_name): target_dir = os.path.join(self.cvsroot_path, d) os.makedirs(target_dir, exist_ok=True) - subprocess.run( - # Append trailing path separators ("/" in the URL and os.path.sep in the - # local target directory path) to ensure that rsync will place files - # directly within our target directory . - ["rsync", "-a", url + d + "/", target_dir + os.path.sep] - ).check_returncode() + # Append trailing path separators ("/" in the URL and os.path.sep in the + # local target directory path) to ensure that rsync will place files + # directly within our target directory . + self.execute_rsync( + ["rsync", "-az", url + d + "/", target_dir + os.path.sep] + ) def prepare(self) -> None: self._last_revision = None diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py --- a/swh/loader/cvs/tests/test_loader.py +++ b/swh/loader/cvs/tests/test_loader.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import os +import subprocess import tempfile from typing import Any, Dict @@ -1178,3 +1179,60 @@ rlog_file_override.close() os.unlink(rlog_file_path) + + +def test_loader_rsync_retry(swh_storage, mocker, tmp_path): + module_name = "module" + host = "example.org" + path = f"/cvsroot/{module_name}" + repo_url = f"rsync://{host}{path}/" + + rsync_first_call = ["rsync", repo_url] + rsync_second_call = [ + "rsync", + "-az", + f"{repo_url}CVSROOT/", + os.path.join(tmp_path, "CVSROOT/"), + ] + rsync_third_call = [ + "rsync", + "-az", + f"{repo_url}{module_name}/", + os.path.join(tmp_path, f"{module_name}/"), + ] + + mock_subprocess = mocker.patch("swh.loader.cvs.loader.subprocess") + mock_subprocess.run.side_effect = [ + subprocess.CompletedProcess(args=rsync_first_call, returncode=23), + subprocess.CompletedProcess( + args=rsync_first_call, + returncode=0, + stdout=f""" + drwxr-xr-x 21 2012/11/04 06:58:58 . + drwxr-xr-x 39 2021/01/22 10:21:05 CVSROOT + drwxr-xr-x 15 2020/12/28 00:50:21 {module_name}""", + ), + subprocess.CompletedProcess( + args=rsync_second_call, + returncode=23, + ), + subprocess.CompletedProcess( + args=rsync_second_call, + returncode=23, + ), + subprocess.CompletedProcess(args=rsync_second_call, returncode=0), + subprocess.CompletedProcess( + args=rsync_third_call, + returncode=23, + ), + subprocess.CompletedProcess( + args=rsync_third_call, + returncode=23, + ), + subprocess.CompletedProcess(args=rsync_third_call, returncode=0), + ] + + loader = CvsLoader(swh_storage, repo_url) + loader.cvs_module_name = module_name + loader.cvsroot_path = tmp_path + loader.fetch_cvs_repo_with_rsync(host, path)