diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -112,7 +112,8 @@ async def set(self, swhid: SWHID, metadata: Any) -> None: await self.conn.execute( "insert into metadata_cache values (?, ?)", - (str(swhid), json.dumps(metadata)), + # Keep the keys sorted so we can always retrieve them in the same order + (str(swhid), json.dumps(metadata, sort_keys=True)), ) await self.conn.commit() diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from pathlib import Path -from typing import Any, AsyncIterator, List +from typing import Any, List import urllib.parse from swh.fuse.fs.entry import ( @@ -69,9 +69,10 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) - for entry in metadata: + for entry in metadata[offset:]: name = entry["name"] swhid = entry["target"] mode = ( @@ -84,41 +85,49 @@ # 1. Regular file if swhid.object_type == CONTENT: - yield self.create_child( - Content, - name=name, - mode=mode, - swhid=swhid, - # The directory API has extra info we can use to set - # attributes without additional Software Heritage API call - prefetch=entry, + entries.append( + self.create_child( + Content, + name=name, + mode=mode, + swhid=swhid, + # The directory API has extra info we can use to set + # attributes without additional Software Heritage API call + prefetch=entry, + ) ) # 2. Regular directory elif swhid.object_type == DIRECTORY: - yield self.create_child( - Directory, name=name, mode=mode, swhid=swhid, + entries.append( + self.create_child(Directory, name=name, mode=mode, swhid=swhid,) ) # 3. Symlink elif mode == DentryPerms.symlink: - yield self.create_child( - FuseSymlinkEntry, - name=name, - # Symlink target is stored in the blob content - target=await self.fuse.get_blob(swhid), + entries.append( + self.create_child( + FuseSymlinkEntry, + name=name, + # Symlink target is stored in the blob content + target=await self.fuse.get_blob(swhid), + ) ) # 4. Submodule elif swhid.object_type == REVISION: # Make sure the revision metadata is fetched and create a # symlink to distinguish it with regular directories await self.fuse.get_metadata(swhid) - yield self.create_child( - FuseSymlinkEntry, - name=name, - target=Path(self.get_relative_root_path(), f"archive/{swhid}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name=name, + target=Path(self.get_relative_root_path(), f"archive/{swhid}"), + ) ) else: raise ValueError("Unknown directory entry type: {swhid.object_type}") + return entries + @dataclass class Revision(FuseDirEntry): @@ -147,42 +156,53 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) directory = metadata["directory"] parents = metadata["parents"] root_path = self.get_relative_root_path() - yield self.create_child( - FuseSymlinkEntry, - name="root", - target=Path(root_path, f"archive/{directory}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="root", + target=Path(root_path, f"archive/{directory}"), + ) ) - yield self.create_child( - FuseSymlinkEntry, - name="meta.json", - target=Path(root_path, f"meta/{self.swhid}.json"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="meta.json", + target=Path(root_path, f"meta/{self.swhid}.json"), + ) ) - yield self.create_child( - RevisionParents, - name="parents", - mode=int(EntryMode.RDONLY_DIR), - parents=[x["id"] for x in parents], + entries.append( + self.create_child( + RevisionParents, + name="parents", + mode=int(EntryMode.RDONLY_DIR), + parents=[x["id"] for x in parents], + ) ) if len(parents) >= 1: - yield self.create_child( - FuseSymlinkEntry, name="parent", target="parents/1/", + entries.append( + self.create_child(FuseSymlinkEntry, name="parent", target="parents/1/",) ) - yield self.create_child( - RevisionHistory, - name="history", - mode=int(EntryMode.RDONLY_DIR), - swhid=self.swhid, + entries.append( + self.create_child( + RevisionHistory, + name="history", + mode=int(EntryMode.RDONLY_DIR), + swhid=self.swhid, + ) ) + return entries[offset:] + @dataclass class RevisionParents(FuseDirEntry): @@ -190,14 +210,18 @@ parents: List[SWHID] - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] root_path = self.get_relative_root_path() - for i, parent in enumerate(self.parents): - yield self.create_child( - FuseSymlinkEntry, - name=str(i + 1), - target=Path(root_path, f"archive/{parent}"), + for i, parent in enumerate(self.parents[offset:]): + entries.append( + self.create_child( + FuseSymlinkEntry, + name=str(i + 1), + target=Path(root_path, f"archive/{parent}"), + ) ) + return entries @dataclass @@ -206,15 +230,19 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] history = await self.fuse.get_history(self.swhid) root_path = self.get_relative_root_path() - for swhid in history: - yield self.create_child( - FuseSymlinkEntry, - name=str(swhid), - target=Path(root_path, f"archive/{swhid}"), + for swhid in history[offset:]: + entries.append( + self.create_child( + FuseSymlinkEntry, + name=str(swhid), + target=Path(root_path, f"archive/{swhid}"), + ) ) + return entries @dataclass @@ -249,35 +277,48 @@ else: return None - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) root_path = self.get_relative_root_path() - yield self.create_child( - FuseSymlinkEntry, - name="meta.json", - target=Path(root_path, f"meta/{self.swhid}.json"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="meta.json", + target=Path(root_path, f"meta/{self.swhid}.json"), + ) ) target = metadata["target"] - yield self.create_child( - FuseSymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") + entries.append( + self.create_child( + FuseSymlinkEntry, + name="target", + target=Path(root_path, f"archive/{target}"), + ) ) - yield self.create_child( - ReleaseType, - name="target_type", - mode=int(EntryMode.RDONLY_FILE), - target_type=target.object_type, + entries.append( + self.create_child( + ReleaseType, + name="target_type", + mode=int(EntryMode.RDONLY_FILE), + target_type=target.object_type, + ) ) target_dir = await self.find_root_directory(target) if target_dir is not None: - yield self.create_child( - FuseSymlinkEntry, - name="root", - target=Path(root_path, f"archive/{target_dir}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name="root", + target=Path(root_path, f"archive/{target_dir}"), + ) ) + return entries[offset:] + @dataclass class ReleaseType(FuseFileEntry): @@ -306,18 +347,23 @@ swhid: SWHID - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] metadata = await self.fuse.get_metadata(self.swhid) + metadata = list(metadata.items()) root_path = self.get_relative_root_path() - for branch_name, branch_meta in metadata.items(): + for (branch_name, branch_meta) in metadata[offset:]: # Mangle branch name to create a valid UNIX filename name = urllib.parse.quote_plus(branch_name) - yield self.create_child( - FuseSymlinkEntry, - name=name, - target=Path(root_path, f"archive/{branch_meta['target']}"), + entries.append( + self.create_child( + FuseSymlinkEntry, + name=name, + target=Path(root_path, f"archive/{branch_meta['target']}"), + ) ) + return entries OBJTYPE_GETTERS = { diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -9,7 +9,7 @@ from enum import IntEnum from pathlib import Path from stat import S_IFDIR, S_IFLNK, S_IFREG -from typing import Any, Union +from typing import Any, List, Union # Avoid cycling import Fuse = "Fuse" @@ -75,7 +75,7 @@ async def size(self) -> int: return 0 - async def __aiter__(self): + async def get_entries(self, offset: int) -> List[FuseEntry]: """ Return the child entries of a directory entry """ raise NotImplementedError @@ -83,7 +83,7 @@ async def lookup(self, name: str) -> FuseEntry: """ Look up a FUSE entry by name """ - async for entry in self: + for entry in await self.get_entries(offset=0): if entry.name == name: return entry return None diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field import json -from typing import AsyncIterator +from typing import Any, List from swh.fuse.fs.artifact import OBJTYPE_GETTERS from swh.fuse.fs.entry import EntryMode, FuseDirEntry, FuseEntry, FuseFileEntry @@ -20,10 +20,11 @@ name: str = field(init=False, default=None) mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) depth: int = field(init=False, default=1) + key: Any = field(default=slice(0, None)) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: - yield self.create_child(ArchiveDir) - yield self.create_child(MetaDir) + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [self.create_child(ArchiveDir), self.create_child(MetaDir)] + return entries[offset:] @dataclass @@ -46,9 +47,11 @@ swhid=swhid, ) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] async for swhid in self.fuse.cache.get_cached_swhids(): - yield self.create_child(swhid) + entries.append(self.create_child(swhid)) + return entries[offset:] async def lookup(self, name: str) -> FuseEntry: entry = await super().lookup(name) @@ -76,14 +79,18 @@ name: str = field(init=False, default="meta") mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def get_entries(self, offset: int) -> List[FuseEntry]: + entries = [] async for swhid in self.fuse.cache.get_cached_swhids(): - yield self.create_child( - MetaEntry, - name=f"{swhid}.json", - mode=int(EntryMode.RDONLY_FILE), - swhid=swhid, + entries.append( + self.create_child( + MetaEntry, + name=f"{swhid}.json", + mode=int(EntryMode.RDONLY_FILE), + swhid=swhid, + ) ) + return entries[offset:] @dataclass diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -174,13 +174,8 @@ direntry = self.inode2entry(inode) assert isinstance(direntry, FuseDirEntry) next_id = offset + 1 - i = 0 try: - async for entry in direntry: - if i < offset: - i += 1 - continue - + for entry in await direntry.get_entries(offset): name = os.fsencode(entry.name) attrs = await self.get_attrs(entry) if not pyfuse3.readdir_reply(token, name, attrs, next_id): diff --git a/swh/fuse/tests/test_meta.py b/swh/fuse/tests/test_meta.py --- a/swh/fuse/tests/test_meta.py +++ b/swh/fuse/tests/test_meta.py @@ -12,5 +12,5 @@ file_path_meta = fuse_mntdir / f"meta/{swhid}.json" assert file_path_meta.exists() - expected = json.dumps(get_data_from_web_archive(swhid)) + expected = json.dumps(get_data_from_web_archive(swhid), sort_keys=True) assert file_path_meta.read_text() == expected diff --git a/swh/fuse/tests/test_release.py b/swh/fuse/tests/test_release.py --- a/swh/fuse/tests/test_release.py +++ b/swh/fuse/tests/test_release.py @@ -14,7 +14,7 @@ def test_access_meta(fuse_mntdir): file_path = fuse_mntdir / "archive" / ROOT_REL / "meta.json" - expected = json.dumps(get_data_from_web_archive(ROOT_REL)) + expected = json.dumps(get_data_from_web_archive(ROOT_REL), sort_keys=True) assert file_path.read_text() == expected diff --git a/swh/fuse/tests/test_revision.py b/swh/fuse/tests/test_revision.py --- a/swh/fuse/tests/test_revision.py +++ b/swh/fuse/tests/test_revision.py @@ -12,7 +12,7 @@ def test_access_meta(fuse_mntdir): file_path = fuse_mntdir / "archive" / ROOT_REV / "meta.json" - expected = json.dumps(get_data_from_web_archive(ROOT_REV)) + expected = json.dumps(get_data_from_web_archive(ROOT_REV), sort_keys=True) assert file_path.read_text() == expected