diff --git a/swh/fuse/fs/artifact.py b/swh/fuse/fs/artifact.py --- a/swh/fuse/fs/artifact.py +++ b/swh/fuse/fs/artifact.py @@ -3,10 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, AsyncIterator +from pathlib import Path +from typing import Any, AsyncIterator, List from swh.fuse.fs.entry import EntryMode, FuseEntry -from swh.model.identifiers import CONTENT, DIRECTORY, SWHID +from swh.fuse.fs.symlink import SymlinkEntry +from swh.model.from_disk import DentryPerms +from swh.model.identifiers import CONTENT, DIRECTORY, REVISION, SWHID # Avoid cycling import Fuse = "Fuse" @@ -21,20 +24,26 @@ """ def __init__( - self, name: str, mode: int, fuse: Fuse, swhid: SWHID, prefetch: Any = None + self, + name: str, + mode: int, + depth: int, + fuse: Fuse, + swhid: SWHID, + prefetch: Any = None, ): - super().__init__(name, mode, fuse) + super().__init__(name, mode, depth, fuse) self.swhid = swhid self.prefetch = prefetch def typify( - name: str, mode: int, fuse: Fuse, swhid: SWHID, prefetch: Any = None -) -> ArtifactEntry: + name: str, mode: int, depth: int, fuse: Fuse, swhid: SWHID, prefetch: Any = None +) -> FuseEntry: """ Create an artifact entry corresponding to the given artifact type """ - getters = {CONTENT: Content, DIRECTORY: Directory} - return getters[swhid.object_type](name, mode, fuse, swhid, prefetch) + getters = {CONTENT: Content, DIRECTORY: Directory, REVISION: Revision} + return getters[swhid.object_type](name, mode, depth, fuse, swhid, prefetch) class Content(ArtifactEntry): @@ -76,18 +85,122 @@ async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: metadata = await self.fuse.get_metadata(self.swhid) for entry in metadata: - yield typify( - name=entry["name"], - # Use default read-only permissions for directories, and - # archived permissions for contents - mode=( - entry["perms"] - if entry["target"].object_type == CONTENT - else int(EntryMode.RDONLY_DIR) - ), + name = entry["name"] + swhid = entry["target"] + mode = ( + # Archived permissions for directories are always set to + # 0o040000 so use a read-only permission instead + int(EntryMode.RDONLY_DIR) + if swhid.object_type == DIRECTORY + else entry["perms"] + ) + depth = self.depth + 1 + fuse = self.fuse + # The directory API has extra info we can use to set attributes + # without additional Software Heritage API call + prefetch = entry + + # 1. Symlinks + if mode == DentryPerms.symlink: + # Symlink target is stored in the blob content + target = await fuse.get_blob(swhid) + yield SymlinkEntry(name, depth, fuse, target) + # 2. Submodules + elif swhid.object_type == REVISION: + # Create a symlink to distinguish it with regular directories + await fuse.get_metadata(swhid) + target = Path(self.get_root_path(), f"archive/{swhid}") + yield SymlinkEntry(name, depth, fuse, target) + # 3. Regular entries (directories, contents) + else: + yield typify(name, mode, depth, fuse, swhid, prefetch) + + +class Revision(ArtifactEntry): + """ Software Heritage revision artifact. + + Revision (AKA commit) nodes are represented on the file-system as + directories with the following entries: + + - `root`: source tree at the time of the commit, as a symlink pointing into + `archive/`, to a SWHID of type `dir` + - `parents/` (note the plural): a virtual directory containing entries named + `1`, `2`, `3`, etc., one for each parent commit. Each of these entry is a + symlink pointing into `archive/`, to the SWHID file for the given parent + commit + - `parent` (note the singular): present if and only if the current commit + has a single parent commit (which is the most common case). When present + it is a symlink pointing into `archive/` to the SWHID for the sole parent + commit + - `meta.json`: metadata for the current node, as a symlink pointing to the + relevant `meta/.json` file """ + + async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: + metadata = await self.fuse.get_metadata(self.swhid) + directory = metadata["directory"] + parents = metadata["parents"] + + # Make sure all necessary metadatas are fetched + await self.fuse.get_metadata(directory) + for parent in parents: + await self.fuse.get_metadata(parent["id"]) + + root_path = self.get_root_path() + + entries = [ + SymlinkEntry( + name="root", + depth=self.depth, + fuse=self.fuse, + target=Path(root_path, f"archive/{directory}"), + ), + SymlinkEntry( + name="meta.json", + depth=self.depth, + fuse=self.fuse, + target=Path(root_path, f"meta/{self.swhid}.json"), + ), + ] + + if len(parents) == 1: + entries.append( + SymlinkEntry( + name="parent", + depth=self.depth, + fuse=self.fuse, + target=Path(root_path, f"archive/{parents[0]['id']}"), + ) + ) + elif len(parents) > 1: + entries.append( + Revision.RevisionParents( + name="parents", + mode=int(EntryMode.RDONLY_DIR), + depth=self.depth + 1, + fuse=self.fuse, + parents=[x["id"] for x in parents], + ) + ) + + for entry in entries: + yield entry + + +class RevisionParents(FuseEntry): + """ Revision virtual `parents/` directory """ + + def __init__( + self, name: str, mode: int, depth: int, fuse: Fuse, parents: List[SWHID] + ): + super().__init__(name, mode, depth, fuse) + self.parents = parents + + async def __aiter__(self) -> AsyncIterator[ArtifactEntry]: + root_path = self.get_root_path() + for i, parent in enumerate(self.parents): + yield SymlinkEntry( + name=str(i + 1), + depth=self.depth, fuse=self.fuse, - swhid=entry["target"], - # The directory API has extra info we can use to set attributes - # without additional Software Heritage API call - prefetch=entry, + target=Path(root_path, f"archive/{parent}"), ) diff --git a/swh/fuse/fs/entry.py b/swh/fuse/fs/entry.py --- a/swh/fuse/fs/entry.py +++ b/swh/fuse/fs/entry.py @@ -4,7 +4,7 @@ # See top-level LICENSE file for more information from enum import IntEnum -from stat import S_IFDIR, S_IFREG +from stat import S_IFDIR, S_IFLNK, S_IFREG # Avoid cycling import Fuse = "Fuse" @@ -20,6 +20,7 @@ RDONLY_FILE = S_IFREG | 0o444 RDONLY_DIR = S_IFDIR | 0o555 + SYMLINK = S_IFLNK | 0o444 class FuseEntry: @@ -32,9 +33,10 @@ inode: unique integer identifying the entry """ - def __init__(self, name: str, mode: int, fuse: Fuse): + def __init__(self, name: str, mode: int, depth: int, fuse: Fuse): self.name = name self.mode = mode + self.depth = depth self.fuse = fuse self.inode = fuse._alloc_inode(self) @@ -46,3 +48,6 @@ async def __aiter__(self): return None + + def get_root_path(self): + return "../" * self.depth diff --git a/swh/fuse/fs/mountpoint.py b/swh/fuse/fs/mountpoint.py --- a/swh/fuse/fs/mountpoint.py +++ b/swh/fuse/fs/mountpoint.py @@ -18,10 +18,16 @@ """ The FUSE mountpoint, consisting of the archive/ and meta/ directories """ def __init__(self, fuse: Fuse): - super().__init__(name="root", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + super().__init__( + name="root", mode=int(EntryMode.RDONLY_DIR), depth=0, fuse=fuse + ) async def __aiter__(self) -> AsyncIterator[FuseEntry]: - for entry in [ArchiveDir(self.fuse), MetaDir(self.fuse)]: + entries = [ + ArchiveDir(self.depth + 1, self.fuse), + MetaDir(self.depth + 1, self.fuse), + ] + for entry in entries: yield entry @@ -29,8 +35,10 @@ """ The archive/ directory is lazily populated with one entry per accessed SWHID, having actual SWHIDs as names """ - def __init__(self, fuse: Fuse): - super().__init__(name="archive", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + def __init__(self, depth: int, fuse: Fuse): + super().__init__( + name="archive", mode=int(EntryMode.RDONLY_DIR), depth=depth, fuse=fuse + ) async def __aiter__(self) -> AsyncIterator[FuseEntry]: async for swhid in self.fuse.cache.get_cached_swhids(): @@ -38,7 +46,7 @@ mode = EntryMode.RDONLY_FILE else: mode = EntryMode.RDONLY_DIR - yield typify(str(swhid), int(mode), self.fuse, swhid) + yield typify(str(swhid), int(mode), self.depth + 1, self.fuse, swhid) class MetaDir(FuseEntry): @@ -49,12 +57,14 @@ branches) the JSON file will contain a complete version with all pages merged together. """ - def __init__(self, fuse: Fuse): - super().__init__(name="meta", mode=int(EntryMode.RDONLY_DIR), fuse=fuse) + def __init__(self, depth: int, fuse: Fuse): + super().__init__( + name="meta", mode=int(EntryMode.RDONLY_DIR), depth=depth, fuse=fuse + ) async def __aiter__(self) -> AsyncIterator[FuseEntry]: async for swhid in self.fuse.cache.get_cached_swhids(): - yield MetaEntry(swhid, self.fuse) + yield MetaEntry(swhid, self.depth + 1, self.fuse) class MetaEntry(FuseEntry): @@ -62,9 +72,12 @@ corresponding SWHID.json file with all the metadata from the Software Heritage archive. """ - def __init__(self, swhid: SWHID, fuse: Fuse): + def __init__(self, swhid: SWHID, depth: int, fuse: Fuse): super().__init__( - name=str(swhid) + ".json", mode=int(EntryMode.RDONLY_FILE), fuse=fuse + name=str(swhid) + ".json", + mode=int(EntryMode.RDONLY_FILE), + depth=depth, + fuse=fuse, ) self.swhid = swhid diff --git a/swh/fuse/fs/symlink.py b/swh/fuse/fs/symlink.py new file mode 100644 --- /dev/null +++ b/swh/fuse/fs/symlink.py @@ -0,0 +1,29 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from pathlib import Path +from typing import Union + +from swh.fuse.fs.entry import EntryMode, FuseEntry + +# Avoid cycling import +Fuse = "Fuse" + + +class SymlinkEntry(FuseEntry): + """ FUSE virtual entry for symlinks + + Attributes: + target: path to symlink target + """ + + def __init__( + self, name: str, depth: int, fuse: Fuse, target: Union[str, bytes, Path] + ): + super().__init__(name, int(EntryMode.SYMLINK), depth, fuse) + self.target = target + + async def length(self) -> int: + return len(str(self.target)) diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -193,6 +193,10 @@ logging.error(f"Unknown name during lookup: '{name}'") raise pyfuse3.FUSEError(errno.ENOENT) + async def readlink(self, inode: int, _ctx: pyfuse3.RequestContext) -> bytes: + entry = self.inode2entry(inode) + return os.fsencode(entry.target) + async def main(swhids: List[SWHID], root_path: Path, conf: Dict[str, Any]) -> None: """ swh-fuse CLI entry-point """