Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/artifact.py
Show First 20 Lines • Show All 262 Lines • ▼ Show 20 Lines | class RevisionHistoryShardByDate(FuseDirEntry): | ||||
ENTRIES_REGEXP = re.compile(r"^([0-9]{2,4})|(" + SWHID_REGEXP + ")$") | ENTRIES_REGEXP = re.compile(r"^([0-9]{2,4})|(" + SWHID_REGEXP + ")$") | ||||
@dataclass | @dataclass | ||||
class StatusFile(FuseFileEntry): | class StatusFile(FuseFileEntry): | ||||
""" Temporary file used to indicate loading progress in by-date/ """ | """ Temporary file used to indicate loading progress in by-date/ """ | ||||
name: str = field(init=False, default=".status") | name: str = field(init=False, default=".status") | ||||
mode: int = field(init=False, default=int(EntryMode.RDONLY_FILE)) | mode: int = field(init=False, default=int(EntryMode.RDONLY_FILE)) | ||||
done: int | history_swhid: SWHID | ||||
todo: int | |||||
def __post_init__(self): | |||||
super().__post_init__() | |||||
# This is the only case where we do not want the kernel to cache the file | |||||
zack: I don't like that you are //setting// file_info_attrs, potentially overwriting //everything//… | |||||
self.file_info_attrs["keep_cache"] = False | |||||
self.file_info_attrs["direct_io"] = True | |||||
async def get_content(self) -> bytes: | async def get_content(self) -> bytes: | ||||
fmt = f"Done: {self.done}/{self.todo}\n" | history_full = await self.fuse.get_history(self.history_swhid) | ||||
history_cached = await self.fuse.cache.history.get_with_date_prefix( | |||||
self.history_swhid, date_prefix="" | |||||
) | |||||
Not Done Inline Actionswill this ever be shown? zack: will this ever be shown?
IIRC the idea was to remove from the dir listing the `.status` file… | |||||
fmt = f"Done: {len(history_cached)}/{len(history_full)}\n" | |||||
return fmt.encode() | return fmt.encode() | ||||
def __post_init__(self): | |||||
super().__post_init__() | |||||
# Create the status file only once so we can easily remove it when the | |||||
# entire history is fetched | |||||
self.status_file = self.create_child( | |||||
RevisionHistoryShardByDate.StatusFile, history_swhid=self.history_swhid | |||||
) | |||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
history = await self.fuse.get_history(self.history_swhid) | history_full = await self.fuse.get_history(self.history_swhid) | ||||
# Only check for cached revisions with the appropriate prefix, since | # Only check for cached revisions with the appropriate prefix, since | ||||
# fetching all of them with the Web API would take too long | # fetching all of them with the Web API would take too long | ||||
swhids = await self.fuse.cache.history.get_with_date_prefix( | history_cached = await self.fuse.cache.history.get_with_date_prefix( | ||||
self.history_swhid, date_prefix=self.prefix | self.history_swhid, date_prefix=self.prefix | ||||
) | ) | ||||
depth = self.prefix.count("/") | depth = self.prefix.count("/") | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
sharded_dirs = set() | sharded_dirs = set() | ||||
for (swhid, sharded_name) in swhids: | for (swhid, sharded_name) in history_cached: | ||||
if not sharded_name.startswith(self.prefix): | if not sharded_name.startswith(self.prefix): | ||||
continue | continue | ||||
if depth == 3: | if depth == 3: | ||||
yield self.create_child( | yield self.create_child( | ||||
FuseSymlinkEntry, | FuseSymlinkEntry, | ||||
name=str(swhid), | name=str(swhid), | ||||
target=Path(root_path, f"archive/{swhid}"), | target=Path(root_path, f"archive/{swhid}"), | ||||
) | ) | ||||
# Create sharded directories | # Create sharded directories | ||||
else: | else: | ||||
next_prefix = sharded_name.split("/")[depth] | next_prefix = sharded_name.split("/")[depth] | ||||
if next_prefix not in sharded_dirs: | if next_prefix not in sharded_dirs: | ||||
sharded_dirs.add(next_prefix) | sharded_dirs.add(next_prefix) | ||||
yield self.create_child( | yield self.create_child( | ||||
RevisionHistoryShardByDate, | RevisionHistoryShardByDate, | ||||
name=next_prefix, | name=next_prefix, | ||||
mode=int(EntryMode.RDONLY_DIR), | mode=int(EntryMode.RDONLY_DIR), | ||||
prefix=f"{self.prefix}{next_prefix}/", | prefix=f"{self.prefix}{next_prefix}/", | ||||
history_swhid=self.history_swhid, | history_swhid=self.history_swhid, | ||||
) | ) | ||||
# TODO: store len(history) somewhere to avoid recompute? | self.is_status_done = len(history_cached) == len(history_full) | ||||
self.is_status_done = len(swhids) == len(history) | if self.is_status_done: | ||||
if not self.is_status_done and depth == 0: | self.fuse._remove_inode(self.status_file.inode) | ||||
yield self.create_child( | elif not self.is_status_done and depth == 0: | ||||
RevisionHistoryShardByDate.StatusFile, | yield self.status_file | ||||
done=len(swhids), | |||||
todo=len(history), | |||||
) | |||||
@dataclass | @dataclass | ||||
class RevisionHistoryShardByHash(FuseDirEntry): | class RevisionHistoryShardByHash(FuseDirEntry): | ||||
""" Revision virtual `history/by-hash` sharded directory """ | """ Revision virtual `history/by-hash` sharded directory """ | ||||
history_swhid: SWHID | history_swhid: SWHID | ||||
prefix: str = field(default="") | prefix: str = field(default="") | ||||
▲ Show 20 Lines • Show All 276 Lines • Show Last 20 Lines |
I don't like that you are setting file_info_attrs, potentially overwriting everything that was in there.
You should selectively set/unset what you need. I guess in this case this would mean setting direct_io and clearing keep_cache, but I'll let you verify if that's true.