diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -353,6 +353,10 @@ self.move_to_end(key) return value + def __delitem__(self, key: Any) -> None: + self.used_ram -= self.sizeof(self[key]) + super().__delitem__(key) + def __setitem__(self, key: Any, value: Any) -> None: if key in self: self.move_to_end(key) @@ -363,7 +367,6 @@ while self.used_ram > self.max_ram and self: oldest = next(iter(self)) - self.used_ram -= self.sizeof(oldest) del self[oldest] def __init__(self, conf: Dict[str, Any]): @@ -390,12 +393,11 @@ if isinstance(direntry, (CacheDir, CacheDir.ArtifactShardBySwhid, OriginDir)): # The `cache/` and `origin/` directories are populated on the fly pass - elif ( - isinstance(direntry, RevisionHistoryShardByDate) - and not direntry.is_status_done - ): - # The `by-date/' directory is populated in parallel so only cache it - # once it has finished fetching all data from the API - pass else: self.lru_cache[direntry.inode] = entries + + def invalidate(self, direntry: FuseDirEntry) -> None: + try: + del self.lru_cache[direntry.inode] + except KeyError: + pass diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -220,22 +220,36 @@ swhid: SWHID - async def prefill_caches(self) -> None: + async def prefill_by_date_cache(self, by_date_dir: FuseDirEntry) -> None: history = await self.fuse.get_history(self.swhid) + nb_api_calls = 0 for swhid in history: + cache = await self.fuse.cache.metadata.get(swhid) + if cache: + continue + await self.fuse.get_metadata(swhid) + # The by-date/ directory is cached temporarily in direntry, and + # invalidated + updated every 100 API calls + nb_api_calls += 1 + if nb_api_calls % 100 == 0: + self.fuse.cache.direntry.invalidate(by_date_dir) + # Make sure to have the latest entries once the prefilling is done + self.fuse.cache.direntry.invalidate(by_date_dir) async def compute_entries(self) -> AsyncIterator[FuseEntry]: - # Run it concurrently because of the many API calls necessary - asyncio.create_task(self.prefill_caches()) - - yield self.create_child( + by_date_dir = self.create_child( RevisionHistoryShardByDate, name="by-date", mode=int(EntryMode.RDONLY_DIR), history_swhid=self.swhid, ) + # Run it concurrently because of the many API calls necessary + asyncio.create_task(self.prefill_by_date_cache(by_date_dir)) + + yield by_date_dir + yield self.create_child( RevisionHistoryShardByHash, name="by-hash",