diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -112,7 +112,8 @@ async def set(self, swhid: SWHID, metadata: Any) -> None: await self.conn.execute( "insert into metadata_cache values (?, ?)", - (str(swhid), json.dumps(metadata)), + # Keep the keys sorted so we can always retrieve them in the same order + (str(swhid), json.dumps(metadata, sort_keys=True)), ) await self.conn.commit() diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -69,9 +69,9 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) - for entry in metadata: + for entry in metadata[offset:]: name = entry["name"] swhid = entry["target"] mode = ( @@ -147,42 +147,54 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) directory = metadata["directory"] parents = metadata["parents"] root_path = self.get_relative_root_path() - yield self.create_child( - FuseSymlinkEntry, - name="root", - target=Path(root_path, f"archive/{directory}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="root", + target=Path(root_path, f"archive/{directory}"), + ) ) - yield self.create_child( - FuseSymlinkEntry, - name="meta.json", - target=Path(root_path, f"meta/{self.swhid}.json"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="meta.json", + target=Path(root_path, f"meta/{self.swhid}.json"), + ) ) - yield self.create_child( - RevisionParents, - name="parents", - mode=int(EntryMode.RDONLY_DIR), - parents=[x["id"] for x in parents], + entries.append( + self.create_child( + RevisionParents, + name="parents", + mode=int(EntryMode.RDONLY_DIR), + parents=[x["id"] for x in parents], + ) ) if len(parents) >= 1: - yield self.create_child( - FuseSymlinkEntry, name="parent", target="parents/1/", + entries.append( + self.create_child(FuseSymlinkEntry, name="parent", target="parents/1/",) ) - yield self.create_child( - RevisionHistory, - name="history", - mode=int(EntryMode.RDONLY_DIR), - swhid=self.swhid, + entries.append( + self.create_child( + RevisionHistory, + name="history", + mode=int(EntryMode.RDONLY_DIR), + swhid=self.swhid, + ) ) + for entry in entries[offset:]: + yield entry + @dataclass class RevisionParents(FuseDirEntry): @@ -190,9 +202,9 @@ parents: List[SWHID] - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: root_path = self.get_relative_root_path() - for i, parent in enumerate(self.parents): + for i, parent in enumerate(self.parents[offset:]): yield self.create_child( FuseSymlinkEntry, name=str(i + 1), @@ -206,10 +218,10 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: history = await self.fuse.get_history(self.swhid) root_path = self.get_relative_root_path() - for swhid in history: + for swhid in history[offset:]: yield self.create_child( FuseSymlinkEntry, name=str(swhid), @@ -249,35 +261,49 @@ else: return None - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) root_path = self.get_relative_root_path() - yield self.create_child( - FuseSymlinkEntry, - name="meta.json", - target=Path(root_path, f"meta/{self.swhid}.json"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="meta.json", + target=Path(root_path, f"meta/{self.swhid}.json"), + ) ) target = metadata["target"] - yield self.create_child( - FuseSymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") + entries.append( + self.create_child( + FuseSymlinkEntry, + name="target", + target=Path(root_path, f"archive/{target}"), + ) ) - yield self.create_child( - ReleaseType, - name="target_type", - mode=int(EntryMode.RDONLY_FILE), - target_type=target.object_type, + entries.append( + self.create_child( + ReleaseType, + name="target_type", + mode=int(EntryMode.RDONLY_FILE), + target_type=target.object_type, + ) ) target_dir = await self.find_root_directory(target) if target_dir is not None: - yield self.create_child( - FuseSymlinkEntry, - name="root", - target=Path(root_path, f"archive/{target_dir}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="root", + target=Path(root_path, f"archive/{target_dir}"), + ) ) + for entry in entries[offset:]: + yield entry + @dataclass class ReleaseType(FuseFileEntry): @@ -306,11 +332,12 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) + metadata = list(metadata.items()) root_path = self.get_relative_root_path() - for branch_name, branch_meta in metadata.items(): + for (branch_name, branch_meta) in metadata[offset:]: # Mangle branch name to create a valid UNIX filename name = urllib.parse.quote_plus(branch_name) yield self.create_child( diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -9,7 +9,7 @@ from enum import IntEnum from pathlib import Path from stat import S_IFDIR, S_IFLNK, S_IFREG -from typing import Any, Union +from typing import Any, AsyncIterator, Union # Avoid cycling import Fuse = "Fuse" @@ -75,10 +75,15 @@ async def size(self) -> int: return 0 - async def __aiter__(self): + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: """ Return the child entries of a directory entry """ raise NotImplementedError + yield None + + async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async for entry in self.get_entries(offset=0): + yield entry async def lookup(self, name: str) -> FuseEntry: """ Look up a FUSE entry by name """ diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -21,9 +21,10 @@ mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) depth: int = field(init=False, default=1) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: - yield self.create_child(ArchiveDir) - yield self.create_child(MetaDir) + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: + entries = [self.create_child(ArchiveDir), self.create_child(MetaDir)] + for entry in entries[offset:]: + yield entry @dataclass @@ -46,9 +47,13 @@ swhid=swhid, ) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: + entries = [] async for swhid in self.fuse.cache.get_cached_swhids(): - yield self.create_child(swhid) + entries.append(self.create_child(swhid)) + + for entry in entries[offset:]: + yield entry async def lookup(self, name: str) -> FuseEntry: entry = await super().lookup(name) @@ -76,15 +81,21 @@ name: str = field(init=False, default="meta") mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> AsyncIterator[FuseEntry]: + entries = [] async for swhid in self.fuse.cache.get_cached_swhids(): - yield self.create_child( - MetaEntry, - name=f"{swhid}.json", - mode=int(EntryMode.RDONLY_FILE), - swhid=swhid, + entries.append( + self.create_child( + MetaEntry, + name=f"{swhid}.json", + mode=int(EntryMode.RDONLY_FILE), + swhid=swhid, + ) ) + for entry in entries[offset:]: + yield entry + @dataclass class MetaEntry(FuseFileEntry): diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -174,13 +174,8 @@ direntry = self.inode2entry(inode) assert isinstance(direntry, FuseDirEntry) next_id = offset + 1 - i = 0 try: - async for entry in direntry: - if i < offset: - i += 1 - continue - + async for entry in direntry.get_entries(offset): name = os.fsencode(entry.name) attrs = await self.get_attrs(entry) if not pyfuse3.readdir_reply(token, name, attrs, next_id): diff --git a/swh/fuse/tests/test_meta.py b/swh/fuse/tests/test_meta.py --- a/swh/fuse/tests/test_meta.py +++ b/swh/fuse/tests/test_meta.py @@ -12,5 +12,5 @@ file_path_meta = fuse_mntdir / f"meta/{swhid}.json" assert file_path_meta.exists() - expected = json.dumps(get_data_from_web_archive(swhid)) + expected = json.dumps(get_data_from_web_archive(swhid), sort_keys=True) assert file_path_meta.read_text() == expected diff --git a/swh/fuse/tests/test_release.py b/swh/fuse/tests/test_release.py --- a/swh/fuse/tests/test_release.py +++ b/swh/fuse/tests/test_release.py @@ -14,7 +14,7 @@ def test_access_meta(fuse_mntdir): file_path = fuse_mntdir / "archive" / ROOT_REL / "meta.json" - expected = json.dumps(get_data_from_web_archive(ROOT_REL)) + expected = json.dumps(get_data_from_web_archive(ROOT_REL), sort_keys=True) assert file_path.read_text() == expected diff --git a/swh/fuse/tests/test_revision.py b/swh/fuse/tests/test_revision.py --- a/swh/fuse/tests/test_revision.py +++ b/swh/fuse/tests/test_revision.py @@ -12,7 +12,7 @@ def test_access_meta(fuse_mntdir): file_path = fuse_mntdir / "archive" / ROOT_REV / "meta.json" - expected = json.dumps(get_data_from_web_archive(ROOT_REV)) + expected = json.dumps(get_data_from_web_archive(ROOT_REV), sort_keys=True) assert file_path.read_text() == expected