Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/cvs/loader.py
Show All 37 Lines | from swh.model.model import ( | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
SkippedContent, | SkippedContent, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | |||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
DEFAULT_BRANCH = b"HEAD" | DEFAULT_BRANCH = b"HEAD" | ||||
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." | TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." | ||||
class CvsLoader(BaseLoader): | class CvsLoader(BaseLoader): | ||||
Show All 37 Lines | ): | ||||
self._contents: List[Content] = [] | self._contents: List[Content] = [] | ||||
self._skipped_contents: List[SkippedContent] = [] | self._skipped_contents: List[SkippedContent] = [] | ||||
self._directories: List[Directory] = [] | self._directories: List[Directory] = [] | ||||
self._revisions: List[Revision] = [] | self._revisions: List[Revision] = [] | ||||
self.swh_revision_gen = None | self.swh_revision_gen = None | ||||
# internal state, current visit | # internal state, current visit | ||||
self._last_revision = None | self._last_revision = None | ||||
self._visit_status = "full" | self._visit_status = "full" | ||||
self._load_status = "uneventful" | |||||
self.visit_date = visit_date | self.visit_date = visit_date | ||||
self.cvsroot_path = cvsroot_path | self.cvsroot_path = cvsroot_path | ||||
self.snapshot = None | self.snapshot = None | ||||
self.last_snapshot: Optional[Snapshot] = snapshot_get_latest( | |||||
self.storage, self.origin_url | |||||
) | |||||
def compute_swh_revision(self, k, logmsg): | def compute_swh_revision(self, k, logmsg): | ||||
"""Compute swh hash data per CVS changeset. | """Compute swh hash data per CVS changeset. | ||||
Returns: | Returns: | ||||
tuple (rev, swh_directory) | tuple (rev, swh_directory) | ||||
- rev: current SWH revision computed from checked out work tree | - rev: current SWH revision computed from checked out work tree | ||||
- swh_directory: dictionary of path, swh hash data with type | - swh_directory: dictionary of path, swh hash data with type | ||||
""" | """ | ||||
# Compute SWH revision from the on-disk state | # Compute SWH revision from the on-disk state | ||||
swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path)) | swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path)) | ||||
if self._last_revision: | if self._last_revision: | ||||
parents = (self._last_revision.id,) | parents = (self._last_revision.id,) | ||||
else: | else: | ||||
parents = () | parents = () | ||||
revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents) | revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents) | ||||
self.log.debug("SWH revision ID: %s" % hashutil.hash_to_hex(revision.id)) | self.log.debug("SWH revision ID: %s" % hashutil.hash_to_hex(revision.id)) | ||||
self._last_revision = revision | self._last_revision = revision | ||||
if self._load_status == "uneventful": | |||||
# We have an eventful load if this revision is not already | |||||
# present in the archive | |||||
if not self.storage.revision_get([revision.id])[0]: | |||||
self._load_status = "eventful" | |||||
return (revision, swh_dir) | return (revision, swh_dir) | ||||
def process_cvs_changesets( | def process_cvs_changesets( | ||||
self, cvs_changesets, | self, cvs_changesets, | ||||
) -> Iterator[ | ) -> Iterator[ | ||||
Tuple[List[Content], List[SkippedContent], List[Directory], Revision] | Tuple[List[Content], List[SkippedContent], List[Directory], Revision] | ||||
]: | ]: | ||||
"""Process CVS revisions. | """Process CVS revisions. | ||||
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | def fetch_cvs_repo_with_rsync(self, host, path): | ||||
if not have_cvsroot: | if not have_cvsroot: | ||||
raise NotFound("No CVSROOT directory found at %s" % url) | raise NotFound("No CVSROOT directory found at %s" % url) | ||||
rsync = subprocess.run(["rsync", "-a", url, self.cvsroot_path]) | rsync = subprocess.run(["rsync", "-a", url, self.cvsroot_path]) | ||||
rsync.check_returncode() | rsync.check_returncode() | ||||
def prepare(self): | def prepare(self): | ||||
self._last_revision = None | self._last_revision = None | ||||
self._load_status = "uneventful" | |||||
self.swh_revision_gen = None | self.swh_revision_gen = None | ||||
if not self.cvsroot_path: | if not self.cvsroot_path: | ||||
self.cvsroot_path = tempfile.mkdtemp( | self.cvsroot_path = tempfile.mkdtemp( | ||||
suffix="-%s" % os.getpid(), | suffix="-%s" % os.getpid(), | ||||
prefix=TEMPORARY_DIR_PREFIX_PATTERN, | prefix=TEMPORARY_DIR_PREFIX_PATTERN, | ||||
dir=self.temp_directory, | dir=self.temp_directory, | ||||
) | ) | ||||
self.worktree_path = tempfile.mkdtemp( | self.worktree_path = tempfile.mkdtemp( | ||||
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines | def store_data(self): | ||||
self.flush() | self.flush() | ||||
self.loaded_snapshot_id = self.snapshot.id | self.loaded_snapshot_id = self.snapshot.id | ||||
self._skipped_contents = [] | self._skipped_contents = [] | ||||
self._contents = [] | self._contents = [] | ||||
self._directories = [] | self._directories = [] | ||||
self._revisions = [] | self._revisions = [] | ||||
def load_status(self): | def load_status(self): | ||||
assert self.snapshot is not None | |||||
if self.last_snapshot == self.snapshot: | |||||
load_status = "uneventful" | |||||
else: | |||||
load_status = "eventful" | |||||
return { | return { | ||||
"status": self._load_status, | "status": load_status, | ||||
} | } | ||||
def visit_status(self): | def visit_status(self): | ||||
return self._visit_status | return self._visit_status |