Page MenuHomeSoftware Heritage

D7586.id27467.diff
No OneTemporary

D7586.id27467.diff

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@
# should match https://pypi.python.org/pypi names. For the full spec or
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
+tenacity
diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -15,6 +15,9 @@
import time
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast
+from tenacity import retry
+from tenacity.retry import retry_if_exception_type
+from tenacity.stop import stop_after_attempt
from urllib3.util import parse_url
from swh.loader.core.loader import BaseLoader
@@ -54,6 +57,14 @@
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs."
+def rsync_retry():
+ return retry(
+ retry=retry_if_exception_type(subprocess.CalledProcessError),
+ stop=stop_after_attempt(max_attempt_number=4),
+ reraise=True,
+ )
+
+
class BadPathException(Exception):
pass
@@ -337,11 +348,20 @@
for k in excluded_keywords:
self.excluded_keywords.append(k.strip())
+ @rsync_retry()
+ def execute_rsync(
+ self, rsync_cmd: List[str], **run_opts
+ ) -> subprocess.CompletedProcess:
+ rsync = subprocess.run(rsync_cmd, **run_opts)
+ rsync.check_returncode()
+ return rsync
+
def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None:
# URL *must* end with a trailing slash in order to get CVSROOT listed
url = "rsync://%s%s/" % (host, os.path.dirname(path))
- rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii")
- rsync.check_returncode()
+ rsync = self.execute_rsync(
+ ["rsync", url], capture_output=True, encoding="ascii"
+ )
have_cvsroot = False
have_module = False
for line in rsync.stdout.split("\n"):
@@ -362,12 +382,12 @@
for d in ("CVSROOT", self.cvs_module_name):
target_dir = os.path.join(self.cvsroot_path, d)
os.makedirs(target_dir, exist_ok=True)
- subprocess.run(
- # Append trailing path separators ("/" in the URL and os.path.sep in the
- # local target directory path) to ensure that rsync will place files
- # directly within our target directory .
- ["rsync", "-a", url + d + "/", target_dir + os.path.sep]
- ).check_returncode()
+ # Append trailing path separators ("/" in the URL and os.path.sep in the
+ # local target directory path) to ensure that rsync will place files
+ # directly within our target directory .
+ self.execute_rsync(
+ ["rsync", "-az", url + d + "/", target_dir + os.path.sep]
+ )
def prepare(self) -> None:
self._last_revision = None
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
import os
+import subprocess
import tempfile
from typing import Any, Dict
@@ -1178,3 +1179,61 @@
rlog_file_override.close()
os.unlink(rlog_file_path)
+
+
+def test_loader_rsync_retry(swh_storage, mocker, tmp_path):
+
+ module_name = "module"
+ host = "example.org"
+ path = f"/cvsroot/{module_name}"
+ repo_url = f"rsync://{host}{path}/"
+
+ rsync_first_call = ["rsync", repo_url]
+ rsync_second_call = [
+ "rsync",
+ "-az",
+ f"{repo_url}CVSROOT/",
+ os.path.join(tmp_path, "CVSROOT/"),
+ ]
+ rsync_third_call = [
+ "rsync",
+ "-az",
+ f"{repo_url}{module_name}/",
+ os.path.join(tmp_path, f"{module_name}/"),
+ ]
+
+ mock_subprocess = mocker.patch("swh.loader.cvs.loader.subprocess")
+ mock_subprocess.run.side_effect = [
+ subprocess.CompletedProcess(args=rsync_first_call, returncode=23),
+ subprocess.CompletedProcess(
+ args=rsync_first_call,
+ returncode=0,
+ stdout=f"""
+ drwxr-xr-x 21 2012/11/04 06:58:58 .
+ drwxr-xr-x 39 2021/01/22 10:21:05 CVSROOT
+ drwxr-xr-x 15 2020/12/28 00:50:21 {module_name}""",
+ ),
+ subprocess.CompletedProcess(
+ args=rsync_second_call,
+ returncode=23,
+ ),
+ subprocess.CompletedProcess(
+ args=rsync_second_call,
+ returncode=23,
+ ),
+ subprocess.CompletedProcess(args=rsync_second_call, returncode=0),
+ subprocess.CompletedProcess(
+ args=rsync_third_call,
+ returncode=23,
+ ),
+ subprocess.CompletedProcess(
+ args=rsync_third_call,
+ returncode=23,
+ ),
+ subprocess.CompletedProcess(args=rsync_third_call, returncode=0),
+ ]
+
+ loader = CvsLoader(swh_storage, repo_url)
+ loader.cvs_module_name = module_name
+ loader.cvsroot_path = tmp_path
+ loader.fetch_cvs_repo_with_rsync(host, path)

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 9:40 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225924

Event Timeline