Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/cvs/loader.py
Show All 9 Lines | |||||
from datetime import datetime | from datetime import datetime | ||||
import os | import os | ||||
import os.path | import os.path | ||||
import subprocess | import subprocess | ||||
import tempfile | import tempfile | ||||
import time | import time | ||||
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast | from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast | ||||
from tenacity import retry | |||||
from tenacity.retry import retry_if_exception_type | |||||
from tenacity.stop import stop_after_attempt | |||||
from urllib3.util import parse_url | from urllib3.util import parse_url | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ( | from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ( | ||||
CHANGESET_FUZZ_SEC, | CHANGESET_FUZZ_SEC, | ||||
ChangeSetKey, | ChangeSetKey, | ||||
CvsConv, | CvsConv, | ||||
Show All 23 Lines | |||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
DEFAULT_BRANCH = b"HEAD" | DEFAULT_BRANCH = b"HEAD" | ||||
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." | TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." | ||||
def rsync_retry(): | |||||
return retry( | |||||
retry=retry_if_exception_type(subprocess.CalledProcessError), | |||||
stop=stop_after_attempt(max_attempt_number=4), | |||||
reraise=True, | |||||
) | |||||
class BadPathException(Exception): | class BadPathException(Exception): | ||||
pass | pass | ||||
class CvsLoader(BaseLoader): | class CvsLoader(BaseLoader): | ||||
"""Swh cvs loader. | """Swh cvs loader. | ||||
The repository is local. The loader deals with | The repository is local. The loader deals with | ||||
▲ Show 20 Lines • Show All 267 Lines • ▼ Show 20 Lines | def configure_custom_id_keyword(self, cvsconfig): | ||||
continue | continue | ||||
if kwname.strip() in ("Id", "CVSHeader"): | if kwname.strip() in ("Id", "CVSHeader"): | ||||
self.custom_id_keyword = custom_kwname.strip() | self.custom_id_keyword = custom_kwname.strip() | ||||
elif config_key == "KeywordExpand" and value.startswith("e"): | elif config_key == "KeywordExpand" and value.startswith("e"): | ||||
excluded_keywords = value[1:].split(",") | excluded_keywords = value[1:].split(",") | ||||
for k in excluded_keywords: | for k in excluded_keywords: | ||||
self.excluded_keywords.append(k.strip()) | self.excluded_keywords.append(k.strip()) | ||||
@rsync_retry() | |||||
def execute_rsync( | |||||
self, rsync_cmd: List[str], **run_opts | |||||
) -> subprocess.CompletedProcess: | |||||
rsync = subprocess.run(rsync_cmd, **run_opts) | |||||
rsync.check_returncode() | |||||
return rsync | |||||
def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None: | def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None: | ||||
# URL *must* end with a trailing slash in order to get CVSROOT listed | # URL *must* end with a trailing slash in order to get CVSROOT listed | ||||
url = "rsync://%s%s/" % (host, os.path.dirname(path)) | url = "rsync://%s%s/" % (host, os.path.dirname(path)) | ||||
rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii") | rsync = self.execute_rsync( | ||||
rsync.check_returncode() | ["rsync", url], capture_output=True, encoding="ascii" | ||||
) | |||||
have_cvsroot = False | have_cvsroot = False | ||||
have_module = False | have_module = False | ||||
for line in rsync.stdout.split("\n"): | for line in rsync.stdout.split("\n"): | ||||
self.log.debug("rsync server: %s", line) | self.log.debug("rsync server: %s", line) | ||||
if line.endswith(" CVSROOT"): | if line.endswith(" CVSROOT"): | ||||
have_cvsroot = True | have_cvsroot = True | ||||
elif line.endswith(" %s" % self.cvs_module_name): | elif line.endswith(" %s" % self.cvs_module_name): | ||||
have_module = True | have_module = True | ||||
if have_module and have_cvsroot: | if have_module and have_cvsroot: | ||||
break | break | ||||
if not have_module: | if not have_module: | ||||
raise NotFound(f"CVS module {self.cvs_module_name} not found at {url}") | raise NotFound(f"CVS module {self.cvs_module_name} not found at {url}") | ||||
if not have_cvsroot: | if not have_cvsroot: | ||||
raise NotFound(f"No CVSROOT directory found at {url}") | raise NotFound(f"No CVSROOT directory found at {url}") | ||||
# Fetch the CVSROOT directory and the desired CVS module. | # Fetch the CVSROOT directory and the desired CVS module. | ||||
assert self.cvsroot_path | assert self.cvsroot_path | ||||
for d in ("CVSROOT", self.cvs_module_name): | for d in ("CVSROOT", self.cvs_module_name): | ||||
target_dir = os.path.join(self.cvsroot_path, d) | target_dir = os.path.join(self.cvsroot_path, d) | ||||
os.makedirs(target_dir, exist_ok=True) | os.makedirs(target_dir, exist_ok=True) | ||||
subprocess.run( | |||||
# Append trailing path separators ("/" in the URL and os.path.sep in the | # Append trailing path separators ("/" in the URL and os.path.sep in the | ||||
# local target directory path) to ensure that rsync will place files | # local target directory path) to ensure that rsync will place files | ||||
# directly within our target directory . | # directly within our target directory . | ||||
["rsync", "-a", url + d + "/", target_dir + os.path.sep] | self.execute_rsync( | ||||
).check_returncode() | ["rsync", "-az", url + d + "/", target_dir + os.path.sep] | ||||
) | |||||
def prepare(self) -> None: | def prepare(self) -> None: | ||||
self._last_revision = None | self._last_revision = None | ||||
self.tempdir_path = tempfile.mkdtemp( | self.tempdir_path = tempfile.mkdtemp( | ||||
suffix="-%s" % os.getpid(), | suffix="-%s" % os.getpid(), | ||||
prefix=TEMPORARY_DIR_PREFIX_PATTERN, | prefix=TEMPORARY_DIR_PREFIX_PATTERN, | ||||
dir=self.temp_directory, | dir=self.temp_directory, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 262 Lines • Show Last 20 Lines |