diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py --- a/swh/loader/mercurial/from_disk.py +++ b/swh/loader/mercurial/from_disk.py @@ -79,6 +79,28 @@ part = parts.pop(0) current_dir[part] = content + def update_file(self, path: bytes, content: Content) -> None: + """Update existing file content. + + As directories already exists. there is no creation involved. + """ + self[path] = content + + def remove_file(self, path: bytes) -> None: + """Remove existing file and empty directories.""" + del self[path] + + # remove empty directories + parts = [part for part in path.split(os.path.sep.encode()) if part] + parts.pop() # remove file name + current_dir = self + while parts: + name = parts.pop(0) + if len(current_dir[name]) == 0: + del current_dir[name] + break # No need to check for subdirectory + current_dir = current_dir[name] + class HgLoaderFromDisk(BaseLoader): """Load a mercurial repository from a local repository.""" @@ -116,6 +138,8 @@ self._repo: Optional[hgutil.Repository] = None self.revision_nodeid_to_swhid: Dict[HgNodeId, Sha1Git] = {} + self.last_hg_nodeid = hgutil.NULLID + self.root = HgRootDirectory() # Cache the content hash across revisions to avoid recalculation. self.content_hash_cache: lrucachedict = lrucachedict( @@ -385,13 +409,26 @@ the swhid of the top level directory. """ rev_ctx = self.repo[hg_nodeid] + prev_ctx = self.repo[self.last_hg_nodeid] + + # TODO diff on parents + status = prev_ctx.status(rev_ctx) + + for file_path in status.removed: + self.root.remove_file(file_path) - root = HgRootDirectory() - for file_path in rev_ctx.manifest(): + for file_path in status.added: content = self.store_content(hg_nodeid, file_path) - root.add_file(file_path, content) + self.root.add_file(file_path, content) + + for file_path in status.modified: + content = self.store_content(hg_nodeid, file_path) + self.root.update_file(file_path, content) + + self.last_hg_nodeid = hg_nodeid - directories: List[Directory] = [root] + self.root.invalidate_hash() + directories: List[Directory] = [self.root] while directories: directory = directories.pop(0) self.storage.directory_add([directory.to_model()]) @@ -399,7 +436,7 @@ [item for item in directory.values() if isinstance(item, Directory)] ) - return root.hash + return self.root.hash class HgArchiveLoaderFromDisk(HgLoaderFromDisk):