diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -268,18 +268,27 @@ name: str = field(init=False, default=".status") mode: int = field(init=False, default=int(EntryMode.RDONLY_FILE)) - done: int - todo: int + history_swhid: SWHID + + def __post_init__(self): + super().__post_init__() + # This is the only case where we do not want the kernel to cache the file + self.file_info_attrs["keep_cache"] = False + self.file_info_attrs["direct_io"] = True async def get_content(self) -> bytes: - fmt = f"Done: {self.done}/{self.todo}\n" + history_full = await self.fuse.get_history(self.history_swhid) + history_cached = await self.fuse.cache.history.get_with_date_prefix( + self.history_swhid, date_prefix="" + ) + fmt = f"Done: {len(history_cached)}/{len(history_full)}\n" return fmt.encode() async def compute_entries(self) -> AsyncIterator[FuseEntry]: - history = await self.fuse.get_history(self.history_swhid) + history_full = await self.fuse.get_history(self.history_swhid) # Only check for cached revisions with the appropriate prefix, since # fetching all of them with the Web API would take too long - swhids = await self.fuse.cache.history.get_with_date_prefix( + history_cached = await self.fuse.cache.history.get_with_date_prefix( self.history_swhid, date_prefix=self.prefix ) @@ -287,7 +296,7 @@ root_path = self.get_relative_root_path() sharded_dirs = set() - for (swhid, sharded_name) in swhids: + for (swhid, sharded_name) in history_cached: if not sharded_name.startswith(self.prefix): continue @@ -310,13 +319,10 @@ history_swhid=self.history_swhid, ) - # TODO: store len(history) somewhere to avoid recompute? - self.is_status_done = len(swhids) == len(history) + self.is_status_done = len(history_cached) == len(history_full) if not self.is_status_done and depth == 0: yield self.create_child( - RevisionHistoryShardByDate.StatusFile, - done=len(swhids), - todo=len(history), + RevisionHistoryShardByDate.StatusFile, history_swhid=self.history_swhid ) diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -10,7 +10,7 @@ from pathlib import Path import re from stat import S_IFDIR, S_IFLNK, S_IFREG -from typing import Any, AsyncIterator, Optional, Pattern, Sequence, Union +from typing import Any, AsyncIterator, Dict, Optional, Pattern, Sequence, Union # Avoid cycling import Fuse = "Fuse" @@ -45,9 +45,12 @@ depth: int fuse: Fuse inode: int = field(init=False) + file_info_attrs: Dict[str, Any] = field(init=False, default_factory=dict) def __post_init__(self): self.inode = self.fuse._alloc_inode(self) + # By default, let the kernel cache previously accessed data + self.file_info_attrs["keep_cache"] = True async def size(self) -> int: """ Return the size (in bytes) of an entry """ diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -250,7 +250,8 @@ # Re-use inode as file handle self.logger.debug("open(inode=%d)", inode) - return pyfuse3.FileInfo(fh=inode, keep_cache=True) + entry = self.inode2entry(inode) + return pyfuse3.FileInfo(fh=inode, **entry.file_info_attrs) async def read(self, fh: int, offset: int, length: int) -> bytes: """ Read `length` bytes from file handle `fh` at position `offset` """