diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ # Add here internal Software Heritage dependencies, one per line. swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin swh.storage >= 0.11.3 -swh.model >= 0.4.0 +swh.model >= 6.6.0 swh.scheduler >= 0.0.39 swh.loader.core >= 3.0.0 diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -128,6 +128,7 @@ self.cvsroot_path = cvsroot_path self.custom_id_keyword: Optional[str] = None self.excluded_keywords: List[str] = [] + self.swh_dir = from_disk.Directory() self.snapshot: Optional[Snapshot] = None self.last_snapshot: Optional[Snapshot] = snapshot_get_latest( @@ -146,12 +147,12 @@ """ # Compute SWH revision from the on-disk state - swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path)) parents: Tuple[Sha1Git, ...] if self._last_revision: parents = (self._last_revision.id,) else: parents = () + swh_dir = self.swh_dir[self.cvs_module_name.encode()] revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents) self.log.debug("SWH revision ID: %s", hashutil.hash_to_hex(revision.id)) self._last_revision = revision @@ -169,6 +170,15 @@ else: return True + def add_content(self, path: bytes, wtpath: bytes): + path_parts = path.split(b"/") + current_path = b"" + for p in path_parts[:-1]: + current_path = os.path.join(current_path, p) + if current_path not in self.swh_dir: + self.swh_dir[current_path] = from_disk.Directory() + self.swh_dir[path] = from_disk.Content.from_file(path=wtpath) + def checkout_file_with_rcsparse( self, k: ChangeSetKey, f: FileRevision, rcsfile: rcsparse.rcsfile ) -> None: @@ -185,6 +195,8 @@ os.remove(wtpath) except FileNotFoundError: pass + if path in self.swh_dir: + del self.swh_dir[path] else: # create, or update, this file in the work tree if not rcsfile: @@ -226,6 +238,8 @@ outfile.write(contents) outfile.close() + self.add_content(path, wtpath) + def checkout_file_with_cvsclient( self, k: ChangeSetKey, f: FileRevision, cvsclient: CVSClient ): @@ -241,6 +255,8 @@ os.remove(wtpath) except FileNotFoundError: pass + if path in self.swh_dir: + del self.swh_dir[path] else: dirname = os.path.dirname(wtpath) os.makedirs(dirname, exist_ok=True) @@ -253,6 +269,8 @@ # Well, we have just renamed the file... pass + self.add_content(path, wtpath) + def process_cvs_changesets( self, cvs_changesets: List[ChangeSetKey], @@ -295,9 +313,24 @@ # TODO: prune empty directories? (revision, swh_dir) = self.compute_swh_revision(k, logmsg) - (contents, skipped_contents, directories) = from_disk.iter_directory( - swh_dir - ) + + contents: List[Content] = [] + skipped_contents: List[SkippedContent] = [] + directories: List[Directory] = [] + + for obj_node in swh_dir.collect(): + obj = obj_node.to_model() # type: ignore + obj_type = obj.object_type + if obj_type in ( + Content.object_type, + from_disk.DiskBackedContent.object_type, + ): + contents.append(obj.with_data()) + elif obj_type == SkippedContent.object_type: + skipped_contents.append(obj) + elif obj_type == Directory.object_type: + directories.append(obj) + yield contents, skipped_contents, directories, revision def pre_cleanup(self) -> None: