diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -7,27 +7,24 @@ from pathlib import Path from typing import Any, AsyncIterator, List -from swh.fuse.fs.entry import EntryMode, FuseEntry -from swh.fuse.fs.symlink import SymlinkEntry +from swh.fuse.fs.entry import ( + EntryMode, + FuseDirEntry, + FuseEntry, + FuseFileEntry, + FuseSymlinkEntry, +) from swh.model.from_disk import DentryPerms from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SWHID @dataclass -class ArtifactEntry(FuseEntry): - """ FUSE virtual entry for a Software Heritage Artifact +class Content(FuseFileEntry): + """ Software Heritage content artifact. Attributes: swhid: Software Heritage persistent identifier prefetch: optional prefetched metadata used to set entry attributes - """ - - swhid: SWHID - prefetch: Any = None - - -class Content(ArtifactEntry): - """ Software Heritage content artifact. Content leaves (AKA blobs) are represented on disks as regular files, containing the corresponding bytes, as archived. @@ -37,6 +34,9 @@ directory, the permissions of the `archive/SWHID` file will be arbitrary and not meaningful (e.g., `0x644`). """ + swhid: SWHID + prefetch: Any = None + async def get_content(self) -> bytes: data = await self.fuse.get_blob(self.swhid) if not self.prefetch: @@ -49,13 +49,14 @@ else: return len(await self.get_content()) - async def __aiter__(self): - raise ValueError("Cannot iterate over a content type artifact") - -class Directory(ArtifactEntry): +@dataclass +class Directory(FuseDirEntry): """ Software Heritage directory artifact. + Attributes: + swhid: Software Heritage persistent identifier + Directory nodes are represented as directories on the file-system, containing one entry for each entry of the archived directory. Entry names and other metadata, including permissions, will correspond to the archived @@ -65,6 +66,8 @@ So it is possible that, in the context of a directory, a file is presented as writable, whereas actually writing to it will fail with `EPERM`. """ + swhid: SWHID + async def __aiter__(self) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) for entry in metadata: @@ -78,40 +81,51 @@ else entry["perms"] ) - # 1. Symlinks - if mode == DentryPerms.symlink: + # 1. Regular file + if swhid.object_type == CONTENT: yield self.create_child( - SymlinkEntry, + Content, + name=name, + mode=mode, + swhid=swhid, + # The directory API has extra info we can use to set + # attributes without additional Software Heritage API call + prefetch=entry, + ) + # 2. Regular directory + elif swhid.object_type == DIRECTORY: + yield self.create_child( + Directory, name=name, mode=mode, swhid=swhid, + ) + # 3. Symlink + elif mode == DentryPerms.symlink: + yield self.create_child( + FuseSymlinkEntry, name=name, # Symlink target is stored in the blob content target=await self.fuse.get_blob(swhid), ) - # 2. Submodules + # 4. Submodule elif swhid.object_type == REVISION: # Make sure the revision metadata is fetched and create a # symlink to distinguish it with regular directories await self.fuse.get_metadata(swhid) yield self.create_child( - SymlinkEntry, + FuseSymlinkEntry, name=name, target=Path(self.get_relative_root_path(), f"archive/{swhid}"), ) - # 3. Regular entries (directories, contents) else: - yield self.create_child( - OBJTYPE_GETTERS[swhid.object_type], - name=name, - mode=mode, - swhid=swhid, - # The directory API has extra info we can use to set - # attributes without additional Software Heritage API call - prefetch=entry, - ) + raise ValueError("Unknown directory entry type: {swhid.object_type}") -class Revision(ArtifactEntry): +@dataclass +class Revision(FuseDirEntry): """ Software Heritage revision artifact. + Attributes: + swhid: Software Heritage persistent identifier + Revision (AKA commit) nodes are represented on the file-system as directories with the following entries: @@ -127,6 +141,8 @@ - `meta.json`: metadata for the current node, as a symlink pointing to the relevant `meta/.json` file """ + swhid: SWHID + async def __aiter__(self) -> AsyncIterator[FuseEntry]: metadata = await self.fuse.get_metadata(self.swhid) directory = metadata["directory"] @@ -140,10 +156,12 @@ root_path = self.get_relative_root_path() yield self.create_child( - SymlinkEntry, name="root", target=Path(root_path, f"archive/{directory}"), + FuseSymlinkEntry, + name="root", + target=Path(root_path, f"archive/{directory}"), ) yield self.create_child( - SymlinkEntry, + FuseSymlinkEntry, name="meta.json", target=Path(root_path, f"meta/{self.swhid}.json"), ) @@ -156,12 +174,12 @@ if len(parents) >= 1: yield self.create_child( - SymlinkEntry, name="parent", target="parents/1/", + FuseSymlinkEntry, name="parent", target="parents/1/", ) @dataclass -class RevisionParents(FuseEntry): +class RevisionParents(FuseDirEntry): """ Revision virtual `parents/` directory """ parents: List[SWHID] @@ -170,15 +188,19 @@ root_path = self.get_relative_root_path() for i, parent in enumerate(self.parents): yield self.create_child( - SymlinkEntry, + FuseSymlinkEntry, name=str(i + 1), target=Path(root_path, f"archive/{parent}"), ) -class Release(ArtifactEntry): +@dataclass +class Release(FuseDirEntry): """ Software Heritage release artifact. + Attributes: + swhid: Software Heritage persistent identifier + Release nodes are represented on the file-system as directories with the following entries: @@ -190,6 +212,8 @@ - `meta.json`: metadata for the current node, as a symlink pointing to the relevant `meta/.json` file """ + swhid: SWHID + async def find_root_directory(self, swhid: SWHID) -> SWHID: if swhid.object_type == RELEASE: metadata = await self.fuse.get_metadata(swhid) @@ -207,14 +231,14 @@ root_path = self.get_relative_root_path() yield self.create_child( - SymlinkEntry, + FuseSymlinkEntry, name="meta.json", target=Path(root_path, f"meta/{self.swhid}.json"), ) target = metadata["target"] yield self.create_child( - SymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") + FuseSymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") ) yield self.create_child( ReleaseType, @@ -226,14 +250,14 @@ target_dir = await self.find_root_directory(target) if target_dir is not None: yield self.create_child( - SymlinkEntry, + FuseSymlinkEntry, name="root", target=Path(root_path, f"archive/{target_dir}"), ) @dataclass -class ReleaseType(FuseEntry): +class ReleaseType(FuseFileEntry): """ Release type virtual file """ target_type: str diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -9,7 +9,7 @@ from enum import IntEnum from pathlib import Path from stat import S_IFDIR, S_IFLNK, S_IFREG -from typing import Any, AsyncIterator, Union +from typing import Any, Union # Avoid cycling import Fuse = "Fuse" @@ -48,20 +48,34 @@ def __post_init__(self): self.inode = self.fuse._alloc_inode(self) + def get_relative_root_path(self) -> str: + return "../" * (self.depth - 1) + + def create_child(self, constructor: Any, **kwargs) -> FuseEntry: + return constructor(depth=self.depth + 1, fuse=self.fuse, **kwargs) + + +class FuseFileEntry(FuseEntry): + """ FUSE virtual file entry """ + async def get_content(self) -> bytes: """ Return the content of a file entry """ - return None + raise NotImplementedError async def size(self) -> int: """ Return the size of a file entry """ - return 0 + raise NotImplementedError + + +class FuseDirEntry(FuseEntry): + """ FUSE virtual directory entry """ - async def __aiter__(self) -> AsyncIterator[FuseEntry]: + async def __aiter__(self): """ Return the child entries of a directory entry """ - yield None + raise NotImplementedError async def lookup(self, name: str) -> FuseEntry: """ Look up a FUSE entry by name """ @@ -71,13 +85,24 @@ return entry return None - def get_target(self) -> Union[str, bytes, Path]: - """ Return the path target of a symlink entry """ - return None +@dataclass +class FuseSymlinkEntry(FuseEntry): + """ FUSE virtual symlink entry - def get_relative_root_path(self) -> str: - return "../" * (self.depth - 1) + Attributes: + target: path to symlink target + """ - def create_child(self, constructor: Any, **kwargs) -> FuseEntry: - return constructor(depth=self.depth + 1, fuse=self.fuse, **kwargs) + mode: int = field(init=False, default=int(EntryMode.SYMLINK)) + target: Union[str, bytes, Path] + + async def size(self) -> int: + """ Return the size of a symlink entry """ + + return len(str(self.target)) + + def get_target(self) -> Union[str, bytes, Path]: + """ Return the path target of a symlink entry """ + + return self.target diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -8,13 +8,13 @@ from typing import AsyncIterator from swh.fuse.fs.artifact import OBJTYPE_GETTERS -from swh.fuse.fs.entry import EntryMode, FuseEntry +from swh.fuse.fs.entry import EntryMode, FuseDirEntry, FuseEntry, FuseFileEntry from swh.model.exceptions import ValidationError from swh.model.identifiers import CONTENT, SWHID, parse_swhid @dataclass -class Root(FuseEntry): +class Root(FuseDirEntry): """ The FUSE mountpoint, consisting of the archive/ and meta/ directories """ name: str = field(init=False, default=None) @@ -27,7 +27,7 @@ @dataclass -class ArchiveDir(FuseEntry): +class ArchiveDir(FuseDirEntry): """ The archive/ directory is lazily populated with one entry per accessed SWHID, having actual SWHIDs as names """ @@ -65,7 +65,7 @@ @dataclass -class MetaDir(FuseEntry): +class MetaDir(FuseDirEntry): """ The meta/ directory contains one SWHID.json file for each SWHID entry under archive/. The JSON file contain all available meta information about the given SWHID, as returned by the Software Heritage Web API for that @@ -87,7 +87,7 @@ @dataclass -class MetaEntry(FuseEntry): +class MetaEntry(FuseFileEntry): """ An entry from the meta/ directory, containing for each accessed SWHID a corresponding SWHID.json file with all the metadata from the Software Heritage archive. """ diff --git a/swh/fuse/fs/symlink.py b/swh/fuse/fs/symlink.py deleted file mode 100644 --- a/swh/fuse/fs/symlink.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2020 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Union - -from swh.fuse.fs.entry import EntryMode, FuseEntry - - -@dataclass -class SymlinkEntry(FuseEntry): - """ FUSE virtual entry for symlinks - - Attributes: - target: path to symlink target - """ - - mode: int = field(init=False, default=int(EntryMode.SYMLINK)) - target: Union[str, bytes, Path] - - async def size(self) -> int: - return len(str(self.target)) - - def get_target(self) -> Union[str, bytes, Path]: - return self.target diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -16,7 +16,7 @@ import requests from swh.fuse.cache import FuseCache -from swh.fuse.fs.entry import FuseEntry +from swh.fuse.fs.entry import FuseDirEntry, FuseEntry, FuseFileEntry, FuseSymlinkEntry from swh.fuse.fs.mountpoint import Root from swh.model.identifiers import CONTENT, SWHID from swh.web.client.client import WebAPIClient @@ -119,7 +119,10 @@ attrs.st_uid = self.uid attrs.st_ino = entry.inode attrs.st_mode = entry.mode - attrs.st_size = await entry.size() + if isinstance(entry, FuseFileEntry) or isinstance(entry, FuseSymlinkEntry): + attrs.st_size = await entry.size() + else: + attrs.st_size = 0 return attrs async def getattr( @@ -144,6 +147,7 @@ # TODO: add cache on direntry list? direntry = self.inode2entry(inode) + assert isinstance(direntry, FuseDirEntry) next_id = offset + 1 i = 0 async for entry in direntry: @@ -174,6 +178,7 @@ inode = fh entry = self.inode2entry(inode) + assert isinstance(entry, FuseFileEntry) data = await entry.get_content() return data[offset : offset + length] @@ -184,6 +189,7 @@ name = os.fsdecode(name) parent_entry = self.inode2entry(parent_inode) + assert isinstance(parent_entry, FuseDirEntry) lookup_entry = await parent_entry.lookup(name) if lookup_entry: return await self.get_attrs(lookup_entry) @@ -193,6 +199,7 @@ async def readlink(self, inode: int, _ctx: pyfuse3.RequestContext) -> bytes: entry = self.inode2entry(inode) + assert isinstance(entry, FuseSymlinkEntry) return os.fsencode(entry.get_target())