diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -117,6 +117,7 @@ self.cvsroot_path = cvsroot_path self.custom_id_keyword = None self.excluded_keywords: List[str] = [] + self.swh_dir = from_disk.Directory() self.snapshot: Optional[Snapshot] = None self.last_snapshot: Optional[Snapshot] = snapshot_get_latest( @@ -135,12 +136,12 @@ """ # Compute SWH revision from the on-disk state - swh_dir = from_disk.Directory.from_disk(path=os.fsencode(self.worktree_path)) parents: Tuple[Sha1Git, ...] if self._last_revision: parents = (self._last_revision.id,) else: parents = () + swh_dir = self.swh_dir[self.cvs_module_name.encode()] revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents) self.log.debug("SWH revision ID: %s", hashutil.hash_to_hex(revision.id)) self._last_revision = revision @@ -158,6 +159,15 @@ else: return True + def add_content(self, path: bytes, wtpath: bytes): + path_parts = path.split(b"/") + current_path = b"" + for p in path_parts[:-1]: + current_path = os.path.join(current_path, p) + if current_path not in self.swh_dir: + self.swh_dir[current_path] = from_disk.Directory() + self.swh_dir[path] = from_disk.Content.from_file(path=wtpath) + def checkout_file_with_rcsparse( self, k: ChangeSetKey, f: FileRevision, rcsfile: rcsparse.rcsfile ) -> None: @@ -174,6 +184,8 @@ os.remove(wtpath) except FileNotFoundError: pass + if path in self.swh_dir: + del self.swh_dir[path] else: # create, or update, this file in the work tree if not rcsfile: @@ -215,6 +227,8 @@ outfile.write(contents) outfile.close() + self.add_content(path, wtpath) + def checkout_file_with_cvsclient( self, k: ChangeSetKey, f: FileRevision, cvsclient: CVSClient ): @@ -230,6 +244,8 @@ os.remove(wtpath) except FileNotFoundError: pass + if path in self.swh_dir: + del self.swh_dir[path] else: dirname = os.path.dirname(wtpath) os.makedirs(dirname, exist_ok=True) @@ -242,6 +258,8 @@ # Well, we have just renamed the file... pass + self.add_content(path, wtpath) + def process_cvs_changesets( self, cvs_changesets: List[ChangeSetKey], @@ -259,6 +277,7 @@ """ for k in cvs_changesets: + modified_paths = set() tstr = time.strftime("%c", time.gmtime(k.max_time)) self.log.debug( "changeset from %s by %s on branch %s", tstr, k.author, k.branch @@ -282,11 +301,36 @@ logmsg = self.rlog.getlog(self.rlog_file, f.path, k.revs[0].rev) self.checkout_file_with_cvsclient(k, f, self.cvsclient) + assert self.cvsroot_path + path = file_path(os.fsencode(self.cvsroot_path), f.path) + # skip CVS module name in file path + path_parts = path.split(b"/")[1:] + for i in range(len(path_parts)): + modified_paths.add(b"/".join(path_parts[:i])) + if f.state != "dead": + modified_paths.add(b"/".join(path_parts)) + # TODO: prune empty directories? (revision, swh_dir) = self.compute_swh_revision(k, logmsg) - (contents, skipped_contents, directories) = from_disk.iter_directory( - swh_dir - ) + + contents: List[Content] = [] + skipped_contents: List[SkippedContent] = [] + directories: List[Directory] = [] + + directories.append(swh_dir.to_model()) + for path in modified_paths: + obj = swh_dir[path].to_model() + obj_type = obj.object_type + if obj_type in ( + Content.object_type, + from_disk.DiskBackedContent.object_type, + ): + contents.append(obj.with_data()) + elif obj_type == SkippedContent.object_type: + skipped_contents.append(obj) + elif obj_type == Directory.object_type: + directories.append(obj) + yield contents, skipped_contents, directories, revision def pre_cleanup(self) -> None: