Page MenuHomeSoftware Heritage

D6299.diff
No OneTemporary

D6299.diff

diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -12,7 +12,7 @@
import subprocess
import tempfile
import time
-from typing import Iterator, List, Optional, Sequence, Tuple
+from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple
from urllib3.util import parse_url
@@ -37,6 +37,7 @@
Person,
Revision,
RevisionType,
+ Sha1Git,
SkippedContent,
Snapshot,
SnapshotBranch,
@@ -61,6 +62,16 @@
visit_type = "cvs"
+ cvs_module_name: str
+ cvsclient: cvsclient.CVSClient
+
+ # remote CVS repository access (history is parsed from CVS rlog):
+ rlog_file: BinaryIO
+
+ swh_revision_gen: Iterator[
+ Tuple[List[Content], List[SkippedContent], List[Directory], Revision]
+ ]
+
def __init__(
self,
storage: StorageInterface,
@@ -80,31 +91,31 @@
# origin url as unique identifier for origin in swh archive
self.origin_url = origin_url if origin_url else self.cvsroot_url
self.temp_directory = temp_directory
- self.done = False
-
- self.cvs_module_name = None
-
- # remote CVS repository access (history is parsed from CVS rlog):
- self.cvsclient = None
- self.rlog_file = None
# internal state used to store swh objects
self._contents: List[Content] = []
self._skipped_contents: List[SkippedContent] = []
self._directories: List[Directory] = []
self._revisions: List[Revision] = []
- self.swh_revision_gen = None
# internal state, current visit
- self._last_revision = None
+ self._last_revision: Optional[Revision] = None
self._visit_status = "full"
self.visit_date = visit_date
+
+ if not cvsroot_path:
+ cvsroot_path = tempfile.mkdtemp(
+ suffix="-%s" % os.getpid(),
+ prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+ dir=self.temp_directory,
+ )
self.cvsroot_path = cvsroot_path
- self.snapshot = None
+
+ self.snapshot: Optional[Snapshot] = None
self.last_snapshot: Optional[Snapshot] = snapshot_get_latest(
self.storage, self.origin_url
)
- def compute_swh_revision(self, k, logmsg):
+ def compute_swh_revision(self, k, logmsg) -> Tuple[Revision, from_disk.Directory]:
"""Compute swh hash data per CVS changeset.
Returns:
@@ -115,6 +126,7 @@
"""
# Compute SWH revision from the on-disk state
swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path))
+ parents: Tuple[Sha1Git, ...]
if self._last_revision:
parents = (self._last_revision.id,)
else:
@@ -234,12 +246,12 @@
)
yield contents, skipped_contents, directories, revision
- def prepare_origin_visit(self):
+ def prepare_origin_visit(self) -> None:
self.origin = Origin(
url=self.origin_url if self.origin_url else self.cvsroot_url
)
- def pre_cleanup(self):
+ def pre_cleanup(self) -> None:
"""Cleanup potential dangling files from prior runs (e.g. OOM killed
tasks)
@@ -250,10 +262,10 @@
log=self.log,
)
- def cleanup(self):
+ def cleanup(self) -> None:
self.log.info("cleanup")
- def fetch_cvs_repo_with_rsync(self, host, path):
+ def fetch_cvs_repo_with_rsync(self, host: str, path: str) -> None:
# URL *must* end with a trailing slash in order to get CVSROOT listed
url = "rsync://%s%s/" % (host, os.path.dirname(path))
rsync = subprocess.run(["rsync", url], capture_output=True, encoding="ascii")
@@ -275,18 +287,10 @@
if not have_cvsroot:
raise NotFound("No CVSROOT directory found at %s" % url)
- rsync = subprocess.run(["rsync", "-a", url, self.cvsroot_path])
- rsync.check_returncode()
+ subprocess.run(["rsync", "-a", url, self.cvsroot_path]).check_returncode()
- def prepare(self):
+ def prepare(self) -> None:
self._last_revision = None
- self.swh_revision_gen = None
- if not self.cvsroot_path:
- self.cvsroot_path = tempfile.mkdtemp(
- suffix="-%s" % os.getpid(),
- prefix=TEMPORARY_DIR_PREFIX_PATTERN,
- dir=self.temp_directory,
- )
self.worktree_path = tempfile.mkdtemp(
suffix="-%s" % os.getpid(),
prefix=TEMPORARY_DIR_PREFIX_PATTERN,
@@ -390,7 +394,7 @@
else:
raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
- def fetch_data(self):
+ def fetch_data(self) -> bool:
"""Fetch the next CVS revision."""
try:
data = next(self.swh_revision_gen)
@@ -455,7 +459,7 @@
self.storage.snapshot_add([snap])
return snap
- def store_data(self):
+ def store_data(self) -> None:
"Add our current CVS changeset to the archive."
self.storage.skipped_content_add(self._skipped_contents)
self.storage.content_add(self._contents)
@@ -470,7 +474,7 @@
self._directories = []
self._revisions = []
- def load_status(self):
+ def load_status(self) -> Dict[str, Any]:
assert self.snapshot is not None
if self.last_snapshot == self.snapshot:
load_status = "uneventful"

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 17, 4:32 AM (21 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216536

Event Timeline