diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -3,15 +3,17 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, AsyncIterator +from dataclasses import dataclass +from pathlib import Path +from typing import Any, AsyncIterator, List from swh.fuse.fs.entry import EntryMode, FuseEntry -from swh.model.identifiers import CONTENT, DIRECTORY, SWHID - -# Avoid cycling import -Fuse = "Fuse" +from swh.fuse.fs.symlink import SymlinkEntry +from swh.model.from_disk import DentryPerms +from swh.model.identifiers import CONTENT, DIRECTORY, REVISION, SWHID +@dataclass class ArtifactEntry(FuseEntry): """ FUSE virtual entry for a Software Heritage Artifact @@ -20,21 +22,8 @@ prefetch: optional prefetched metadata used to set entry attributes """ - def __init__( - self, name: str, mode: int, fuse: Fuse, swhid: SWHID, prefetch: Any = None - ): - super().__init__(name, mode, fuse) - self.swhid = swhid - self.prefetch = prefetch - - -def typify( - name: str, mode: int, fuse: Fuse, swhid: SWHID, prefetch: Any = None -) -> ArtifactEntry: - """ Create an artifact entry corresponding to the given artifact type """ - - getters = {CONTENT: Content, DIRECTORY: Directory} - return getters[swhid.object_type](name, mode, fuse, swhid, prefetch) + swhid: SWHID + prefetch: Any = None class Content(ArtifactEntry): @@ -76,18 +65,127 @@ async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: metadata = await self.fuse.get_metadata(self.swhid) for entry in metadata: - yield typify( - name=entry["name"], - # Use default read-only permissions for directories, and - # archived permissions for contents - mode=( - entry["perms"] - if entry["target"].object_type == CONTENT - else int(EntryMode.RDONLY_DIR) - ), - fuse=self.fuse, - swhid=entry["target"], - # The directory API has extra info we can use to set attributes - # without additional Software Heritage API call - prefetch=entry, + name = entry["name"] + swhid = entry["target"] + mode = ( + # Archived permissions for directories are always set to + # 0o040000 so use a read-only permission instead + int(EntryMode.RDONLY_DIR) + if swhid.object_type == DIRECTORY + else entry["perms"] + ) + + # 1. Symlinks + if mode == DentryPerms.symlink: + yield self.create_child( + SymlinkEntry, + name=name, + # Symlink target is stored in the blob content + target=await self.fuse.get_blob(swhid), + ) + # 2. Submodules + elif swhid.object_type == REVISION: + # Make sure the revision metadata is fetched and create a + # symlink to distinguish it with regular directories + await self.fuse.get_metadata(swhid) + yield self.create_child( + SymlinkEntry, + name=name, + target=Path(self.get_root_path(), f"archive/{swhid}"), + ) + # 3. Regular entries (directories, contents) + else: + yield self.create_child( + OBJTYPE_GETTERS[swhid.object_type], + name=name, + mode=mode, + swhid=swhid, + # The directory API has extra info we can use to set + # attributes without additional Software Heritage API call + prefetch=entry, + ) + + +class Revision(ArtifactEntry): + """ Software Heritage revision artifact. + + Revision (AKA commit) nodes are represented on the file-system as + directories with the following entries: + + - `root`: source tree at the time of the commit, as a symlink pointing into + `archive/`, to a SWHID of type `dir` + - `parents/` (note the plural): a virtual directory containing entries named + `1`, `2`, `3`, etc., one for each parent commit. Each of these entry is a + symlink pointing into `archive/`, to the SWHID file for the given parent + commit + - `parent` (note the singular): present if and only if the current commit + has a single parent commit (which is the most common case). When present + it is a symlink pointing into `archive/` to the SWHID for the sole parent + commit + - `meta.json`: metadata for the current node, as a symlink pointing to the + relevant `meta/.json` file """ + + async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: + metadata = await self.fuse.get_metadata(self.swhid) + directory = metadata["directory"] + parents = metadata["parents"] + + # Make sure all necessary metadatas are fetched + await self.fuse.get_metadata(directory) + for parent in parents: + await self.fuse.get_metadata(parent["id"]) + + root_path = self.get_root_path() + + entries = [ + self.create_child( + SymlinkEntry, + name="root", + target=Path(root_path, f"archive/{directory}"), + ), + self.create_child( + SymlinkEntry, + name="meta.json", + target=Path(root_path, f"meta/{self.swhid}.json"), + ), + ] + + if len(parents) == 1: + entries.append( + self.create_child( + SymlinkEntry, + name="parent", + target=Path(root_path, f"archive/{parents[0]['id']}"), + ) + ) + elif len(parents) > 1: + entries.append( + self.create_child( + RevisionParents, + name="parents", + mode=int(EntryMode.RDONLY_DIR), + parents=[x["id"] for x in parents], + ) ) + + for entry in entries: + yield entry + + +@dataclass +class RevisionParents(FuseEntry): + """ Revision virtual `parents/` directory """ + + parents: List[SWHID] + + async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: + root_path = self.get_root_path() + for i, parent in enumerate(self.parents): + yield self.create_child( + SymlinkEntry, + name=str(i + 1), + target=Path(root_path, f"archive/{parent}"), + ) + + +OBJTYPE_GETTERS = {CONTENT: Content, DIRECTORY: Directory, REVISION: Revision} diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -3,8 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + +from dataclasses import dataclass, field from enum import IntEnum -from stat import S_IFDIR, S_IFREG +from stat import S_IFDIR, S_IFLNK, S_IFREG +from typing import Any # Avoid cycling import Fuse = "Fuse" @@ -20,8 +24,10 @@ RDONLY_FILE = S_IFREG | 0o444 RDONLY_DIR = S_IFDIR | 0o555 + SYMLINK = S_IFLNK | 0o444 +@dataclass class FuseEntry: """ Main wrapper class to manipulate virtual FUSE entries @@ -32,11 +38,14 @@ inode: unique integer identifying the entry """ - def __init__(self, name: str, mode: int, fuse: Fuse): - self.name = name - self.mode = mode - self.fuse = fuse - self.inode = fuse._alloc_inode(self) + name: str + mode: int + depth: int + fuse: Fuse + inode: int = field(init=False) + + def __post_init__(self): + self.inode = self.fuse._alloc_inode(self) async def length(self) -> int: return 0 @@ -46,3 +55,9 @@ async def __aiter__(self): return None + + def get_root_path(self) -> str: + return "../" * (self.depth - 1) + + def create_child(self, constructor: Any, **kwargs) -> FuseEntry: + return constructor(depth=self.depth + 1, fuse=self.fuse, **kwargs) diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -3,34 +3,36 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from dataclasses import dataclass, field import json from typing import AsyncIterator -from swh.fuse.fs.artifact import typify +from swh.fuse.fs.artifact import OBJTYPE_GETTERS from swh.fuse.fs.entry import EntryMode, FuseEntry from swh.model.identifiers import CONTENT, SWHID -# Avoid cycling import -Fuse = "Fuse" - +@dataclass class Root(FuseEntry): """ The FUSE mountpoint, consisting of the archive/ and meta/ directories """ - def __init__(self, fuse: Fuse): - super().__init__(name="root", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + name: str = field(init=False, default="root") + mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) + depth: int = field(init=False, default=1) async def __aiter__(self) -> AsyncIterator[FuseEntry]: - for entry in [ArchiveDir(self.fuse), MetaDir(self.fuse)]: + entries = [self.create_child(ArchiveDir), self.create_child(MetaDir)] + for entry in entries: yield entry +@dataclass class ArchiveDir(FuseEntry): """ The archive/ directory is lazily populated with one entry per accessed SWHID, having actual SWHIDs as names """ - def __init__(self, fuse: Fuse): - super().__init__(name="archive", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + name: str = field(init=False, default="archive") + mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) async def __aiter__(self) -> AsyncIterator[FuseEntry]: async for swhid in self.fuse.cache.get_cached_swhids(): @@ -38,9 +40,15 @@ mode = EntryMode.RDONLY_FILE else: mode = EntryMode.RDONLY_DIR - yield typify(str(swhid), int(mode), self.fuse, swhid) + yield self.create_child( + OBJTYPE_GETTERS[swhid.object_type], + name=str(swhid), + mode=int(mode), + swhid=swhid, + ) +@dataclass class MetaDir(FuseEntry): """ The meta/ directory contains one SWHID.json file for each SWHID entry under archive/. The JSON file contain all available meta information about @@ -49,24 +57,26 @@ branches) the JSON file will contain a complete version with all pages merged together. """ - def __init__(self, fuse: Fuse): - super().__init__(name="meta", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + name: str = field(init=False, default="meta") + mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) async def __aiter__(self) -> AsyncIterator[FuseEntry]: async for swhid in self.fuse.cache.get_cached_swhids(): - yield MetaEntry(swhid, self.fuse) + yield self.create_child( + MetaEntry, + name=f"{swhid}.json", + mode=int(EntryMode.RDONLY_FILE), + swhid=swhid, + ) +@dataclass class MetaEntry(FuseEntry): """ An entry from the meta/ directory, containing for each accessed SWHID a corresponding SWHID.json file with all the metadata from the Software Heritage archive. """ - def __init__(self, swhid: SWHID, fuse: Fuse): - super().__init__( - name=str(swhid) + ".json", mode=int(EntryMode.RDONLY_FILE), fuse=fuse - ) - self.swhid = swhid + swhid: SWHID async def content(self) -> bytes: # Get raw JSON metadata from API (un-typified) diff --git a/swh/fuse/fs/symlink.py b/swh/fuse/fs/symlink.py new file mode 100644 --- /dev/null +++ b/swh/fuse/fs/symlink.py @@ -0,0 +1,25 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Union + +from swh.fuse.fs.entry import EntryMode, FuseEntry + + +@dataclass +class SymlinkEntry(FuseEntry): + """ FUSE virtual entry for symlinks + + Attributes: + target: path to symlink target + """ + + mode: int = field(init=False, default=int(EntryMode.SYMLINK)) + target: Union[str, bytes, Path] + + async def length(self) -> int: + return len(str(self.target)) diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -193,6 +193,10 @@ logging.error(f"Unknown name during lookup: '{name}'") raise pyfuse3.FUSEError(errno.ENOENT) + async def readlink(self, inode: int, _ctx: pyfuse3.RequestContext) -> bytes: + entry = self.inode2entry(inode) + return os.fsencode(entry.target) + async def main(swhids: List[SWHID], root_path: Path, conf: Dict[str, Any]) -> None: """ swh-fuse CLI entry-point """