Page MenuHomeSoftware Heritage

D4763.id16877.diff
No OneTemporary

D4763.id16877.diff

diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py
--- a/swh/fuse/fs/artifact.py
+++ b/swh/fuse/fs/artifact.py
@@ -268,18 +268,35 @@
name: str = field(init=False, default=".status")
mode: int = field(init=False, default=int(EntryMode.RDONLY_FILE))
- done: int
- todo: int
+ history_swhid: SWHID
+
+ def __post_init__(self):
+ super().__post_init__()
+ # This is the only case where we do not want the kernel to cache the file
+ self.file_info_attrs["keep_cache"] = False
+ self.file_info_attrs["direct_io"] = True
async def get_content(self) -> bytes:
- fmt = f"Done: {self.done}/{self.todo}\n"
+ history_full = await self.fuse.get_history(self.history_swhid)
+ history_cached = await self.fuse.cache.history.get_with_date_prefix(
+ self.history_swhid, date_prefix=""
+ )
+ fmt = f"Done: {len(history_cached)}/{len(history_full)}\n"
return fmt.encode()
+ def __post_init__(self):
+ super().__post_init__()
+ # Create the status file only once so we can easily remove it when the
+ # entire history is fetched
+ self.status_file = self.create_child(
+ RevisionHistoryShardByDate.StatusFile, history_swhid=self.history_swhid
+ )
+
async def compute_entries(self) -> AsyncIterator[FuseEntry]:
- history = await self.fuse.get_history(self.history_swhid)
+ history_full = await self.fuse.get_history(self.history_swhid)
# Only check for cached revisions with the appropriate prefix, since
# fetching all of them with the Web API would take too long
- swhids = await self.fuse.cache.history.get_with_date_prefix(
+ history_cached = await self.fuse.cache.history.get_with_date_prefix(
self.history_swhid, date_prefix=self.prefix
)
@@ -287,7 +304,7 @@
root_path = self.get_relative_root_path()
sharded_dirs = set()
- for (swhid, sharded_name) in swhids:
+ for (swhid, sharded_name) in history_cached:
if not sharded_name.startswith(self.prefix):
continue
@@ -310,14 +327,11 @@
history_swhid=self.history_swhid,
)
- # TODO: store len(history) somewhere to avoid recompute?
- self.is_status_done = len(swhids) == len(history)
- if not self.is_status_done and depth == 0:
- yield self.create_child(
- RevisionHistoryShardByDate.StatusFile,
- done=len(swhids),
- todo=len(history),
- )
+ self.is_status_done = len(history_cached) == len(history_full)
+ if self.is_status_done:
+ self.fuse._remove_inode(self.status_file.inode)
+ elif not self.is_status_done and depth == 0:
+ yield self.status_file
@dataclass
diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py
--- a/swh/fuse/fs/entry.py
+++ b/swh/fuse/fs/entry.py
@@ -10,7 +10,7 @@
from pathlib import Path
import re
from stat import S_IFDIR, S_IFLNK, S_IFREG
-from typing import Any, AsyncIterator, Optional, Pattern, Sequence, Union
+from typing import Any, AsyncIterator, Dict, Optional, Pattern, Sequence, Union
# Avoid cycling import
Fuse = "Fuse"
@@ -45,9 +45,12 @@
depth: int
fuse: Fuse
inode: int = field(init=False)
+ file_info_attrs: Dict[str, Any] = field(init=False, default_factory=dict)
def __post_init__(self):
self.inode = self.fuse._alloc_inode(self)
+ # By default, let the kernel cache previously accessed data
+ self.file_info_attrs["keep_cache"] = True
async def size(self) -> int:
""" Return the size (in bytes) of an entry """
diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py
--- a/swh/fuse/fuse.py
+++ b/swh/fuse/fuse.py
@@ -65,6 +65,12 @@
return inode
+ def _remove_inode(self, inode: int) -> None:
+ try:
+ del self._inode2entry[inode]
+ except KeyError:
+ pass
+
def inode2entry(self, inode: int) -> FuseEntry:
""" Return the entry matching a given inode """
@@ -250,7 +256,8 @@
# Re-use inode as file handle
self.logger.debug("open(inode=%d)", inode)
- return pyfuse3.FileInfo(fh=inode, keep_cache=True)
+ entry = self.inode2entry(inode)
+ return pyfuse3.FileInfo(fh=inode, **entry.file_info_attrs)
async def read(self, fh: int, offset: int, length: int) -> bytes:
""" Read `length` bytes from file handle `fh` at position `offset` """
diff --git a/swh/fuse/tests/test_revision.py b/swh/fuse/tests/test_revision.py
--- a/swh/fuse/tests/test_revision.py
+++ b/swh/fuse/tests/test_revision.py
@@ -84,3 +84,4 @@
depth2 = str(swhid)
assert (dir_by_date / depth1).exists()
assert depth2 in (os.listdir(dir_by_date / depth1))
+ assert not (dir_by_date / ".status").exists()

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:39 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217009

Event Timeline