Page MenuHomeSoftware Heritage

D4540.id16094.diff
No OneTemporary

D4540.id16094.diff

diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py
--- a/swh/loader/mercurial/from_disk.py
+++ b/swh/loader/mercurial/from_disk.py
@@ -79,6 +79,28 @@
part = parts.pop(0)
current_dir[part] = content
+ def update_file(self, path: bytes, content: Content) -> None:
+ """Update existing file content.
+
+ As directories already exists. there is no creation involved.
+ """
+ self[path] = content
+
+ def remove_file(self, path: bytes) -> None:
+ """Remove existing file and empty directories."""
+ del self[path]
+
+ # remove empty directories
+ parts = [part for part in path.split(os.path.sep.encode()) if part]
+ parts.pop() # remove file name
+ current_dir = self
+ while parts:
+ name = parts.pop(0)
+ if len(current_dir[name]) == 0:
+ del current_dir[name]
+ break # No need to check for subdirectory
+ current_dir = current_dir[name]
+
class HgLoaderFromDisk(BaseLoader):
"""Load a mercurial repository from a local repository."""
@@ -116,6 +138,8 @@
self._repo: Optional[hgutil.Repository] = None
self.revision_nodeid_to_swhid: Dict[HgNodeId, Sha1Git] = {}
+ self.last_hg_nodeid = hgutil.NULLID
+ self.root = HgRootDirectory()
# Cache the content hash across revisions to avoid recalculation.
self.content_hash_cache: lrucachedict = lrucachedict(
@@ -385,13 +409,26 @@
the swhid of the top level directory.
"""
rev_ctx = self.repo[hg_nodeid]
+ prev_ctx = self.repo[self.last_hg_nodeid]
+
+ # TODO diff on parents
+ status = prev_ctx.status(rev_ctx)
+
+ for file_path in status.removed:
+ self.root.remove_file(file_path)
- root = HgRootDirectory()
- for file_path in rev_ctx.manifest():
+ for file_path in status.added:
content = self.store_content(hg_nodeid, file_path)
- root.add_file(file_path, content)
+ self.root.add_file(file_path, content)
+
+ for file_path in status.modified:
+ content = self.store_content(hg_nodeid, file_path)
+ self.root.update_file(file_path, content)
+
+ self.last_hg_nodeid = hg_nodeid
- directories: List[Directory] = [root]
+ self.root.invalidate_hash()
+ directories: List[Directory] = [self.root]
while directories:
directory = directories.pop(0)
self.storage.directory_add([directory.to_model()])
@@ -399,7 +436,7 @@
[item for item in directory.values() if isinstance(item, Directory)]
)
- return root.hash
+ return self.root.hash
class HgArchiveLoaderFromDisk(HgLoaderFromDisk):

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 5:10 AM (10 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223042

Event Timeline