Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/artifact.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import dataclass | from dataclasses import dataclass | ||||
from pathlib import Path | from pathlib import Path | ||||
from typing import Any, AsyncIterator, List | from typing import Any, AsyncIterator, List | ||||
from swh.fuse.fs.entry import EntryMode, FuseEntry | from swh.fuse.fs.entry import ( | ||||
from swh.fuse.fs.symlink import SymlinkEntry | EntryMode, | ||||
FuseDirEntry, | |||||
FuseEntry, | |||||
FuseFileEntry, | |||||
FuseSymlinkEntry, | |||||
) | |||||
from swh.model.from_disk import DentryPerms | from swh.model.from_disk import DentryPerms | ||||
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SWHID | from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SWHID | ||||
@dataclass | @dataclass | ||||
class ArtifactEntry(FuseEntry): | class ArtifactEntry(FuseEntry): | ||||
""" FUSE virtual entry for a Software Heritage Artifact | """ FUSE virtual entry for a Software Heritage Artifact | ||||
Attributes: | Attributes: | ||||
swhid: Software Heritage persistent identifier | swhid: Software Heritage persistent identifier | ||||
prefetch: optional prefetched metadata used to set entry attributes | prefetch: optional prefetched metadata used to set entry attributes | ||||
""" | """ | ||||
swhid: SWHID | swhid: SWHID | ||||
prefetch: Any = None | prefetch: Any = None | ||||
class Content(ArtifactEntry): | class Content(ArtifactEntry, FuseFileEntry): | ||||
""" Software Heritage content artifact. | """ Software Heritage content artifact. | ||||
Content leaves (AKA blobs) are represented on disks as regular files, | Content leaves (AKA blobs) are represented on disks as regular files, | ||||
containing the corresponding bytes, as archived. | containing the corresponding bytes, as archived. | ||||
Note that permissions are associated to blobs only in the context of | Note that permissions are associated to blobs only in the context of | ||||
directories. Hence, when accessing blobs from the top-level `archive/` | directories. Hence, when accessing blobs from the top-level `archive/` | ||||
directory, the permissions of the `archive/SWHID` file will be arbitrary and | directory, the permissions of the `archive/SWHID` file will be arbitrary and | ||||
not meaningful (e.g., `0x644`). """ | not meaningful (e.g., `0x644`). """ | ||||
async def get_content(self) -> bytes: | async def get_content(self) -> bytes: | ||||
data = await self.fuse.get_blob(self.swhid) | data = await self.fuse.get_blob(self.swhid) | ||||
if not self.prefetch: | if not self.prefetch: | ||||
self.prefetch = {"length": len(data)} | self.prefetch = {"length": len(data)} | ||||
return data | return data | ||||
async def size(self) -> int: | async def size(self) -> int: | ||||
if self.prefetch: | if self.prefetch: | ||||
return self.prefetch["length"] | return self.prefetch["length"] | ||||
else: | else: | ||||
return len(await self.get_content()) | return len(await self.get_content()) | ||||
async def __aiter__(self): | |||||
raise ValueError("Cannot iterate over a content type artifact") | |||||
class Directory(ArtifactEntry): | class Directory(ArtifactEntry, FuseDirEntry): | ||||
""" Software Heritage directory artifact. | """ Software Heritage directory artifact. | ||||
Directory nodes are represented as directories on the file-system, | Directory nodes are represented as directories on the file-system, | ||||
containing one entry for each entry of the archived directory. Entry names | containing one entry for each entry of the archived directory. Entry names | ||||
and other metadata, including permissions, will correspond to the archived | and other metadata, including permissions, will correspond to the archived | ||||
entry metadata. | entry metadata. | ||||
Note that the FUSE mount is read-only, no matter what the permissions say. | Note that the FUSE mount is read-only, no matter what the permissions say. | ||||
Show All 11 Lines | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
int(EntryMode.RDONLY_DIR) | int(EntryMode.RDONLY_DIR) | ||||
if swhid.object_type == DIRECTORY | if swhid.object_type == DIRECTORY | ||||
else entry["perms"] | else entry["perms"] | ||||
) | ) | ||||
# 1. Symlinks | # 1. Symlinks | ||||
if mode == DentryPerms.symlink: | if mode == DentryPerms.symlink: | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=name, | name=name, | ||||
# Symlink target is stored in the blob content | # Symlink target is stored in the blob content | ||||
target=await self.fuse.get_blob(swhid), | target=await self.fuse.get_blob(swhid), | ||||
) | ) | ||||
# 2. Submodules | # 2. Submodules | ||||
elif swhid.object_type == REVISION: | elif swhid.object_type == REVISION: | ||||
# Make sure the revision metadata is fetched and create a | # Make sure the revision metadata is fetched and create a | ||||
# symlink to distinguish it with regular directories | # symlink to distinguish it with regular directories | ||||
await self.fuse.get_metadata(swhid) | await self.fuse.get_metadata(swhid) | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=name, | name=name, | ||||
target=Path(self.get_relative_root_path(), f"archive/{swhid}"), | target=Path(self.get_relative_root_path(), f"archive/{swhid}"), | ||||
) | ) | ||||
# 3. Regular entries (directories, contents) | # 3. Regular entries (directories, contents) | ||||
else: | else: | ||||
yield self.create_child( | yield self.create_child( | ||||
OBJTYPE_GETTERS[swhid.object_type], | OBJTYPE_GETTERS[swhid.object_type], | ||||
name=name, | name=name, | ||||
mode=mode, | mode=mode, | ||||
swhid=swhid, | swhid=swhid, | ||||
# The directory API has extra info we can use to set | # The directory API has extra info we can use to set | ||||
# attributes without additional Software Heritage API call | # attributes without additional Software Heritage API call | ||||
prefetch=entry, | prefetch=entry, | ||||
) | ) | ||||
class Revision(ArtifactEntry): | class Revision(ArtifactEntry, FuseDirEntry): | ||||
""" Software Heritage revision artifact. | """ Software Heritage revision artifact. | ||||
Revision (AKA commit) nodes are represented on the file-system as | Revision (AKA commit) nodes are represented on the file-system as | ||||
directories with the following entries: | directories with the following entries: | ||||
- `root`: source tree at the time of the commit, as a symlink pointing into | - `root`: source tree at the time of the commit, as a symlink pointing into | ||||
`archive/`, to a SWHID of type `dir` | `archive/`, to a SWHID of type `dir` | ||||
- `parents/` (note the plural): a virtual directory containing entries named | - `parents/` (note the plural): a virtual directory containing entries named | ||||
Show All 14 Lines | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
# Make sure all necessary metadatas are fetched | # Make sure all necessary metadatas are fetched | ||||
await self.fuse.get_metadata(directory) | await self.fuse.get_metadata(directory) | ||||
for parent in parents: | for parent in parents: | ||||
await self.fuse.get_metadata(parent["id"]) | await self.fuse.get_metadata(parent["id"]) | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="root", target=Path(root_path, f"archive/{directory}"), | FuseSymlinkEntry, | ||||
name="root", | |||||
target=Path(root_path, f"archive/{directory}"), | |||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="meta.json", | name="meta.json", | ||||
target=Path(root_path, f"meta/{self.swhid}.json"), | target=Path(root_path, f"meta/{self.swhid}.json"), | ||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
RevisionParents, | RevisionParents, | ||||
name="parents", | name="parents", | ||||
mode=int(EntryMode.RDONLY_DIR), | mode=int(EntryMode.RDONLY_DIR), | ||||
parents=[x["id"] for x in parents], | parents=[x["id"] for x in parents], | ||||
) | ) | ||||
if len(parents) >= 1: | if len(parents) >= 1: | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="parent", target="parents/1/", | FuseSymlinkEntry, name="parent", target="parents/1/", | ||||
) | ) | ||||
@dataclass | @dataclass | ||||
class RevisionParents(FuseEntry): | class RevisionParents(FuseDirEntry): | ||||
""" Revision virtual `parents/` directory """ | """ Revision virtual `parents/` directory """ | ||||
parents: List[SWHID] | parents: List[SWHID] | ||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
for i, parent in enumerate(self.parents): | for i, parent in enumerate(self.parents): | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=str(i + 1), | name=str(i + 1), | ||||
target=Path(root_path, f"archive/{parent}"), | target=Path(root_path, f"archive/{parent}"), | ||||
) | ) | ||||
class Release(ArtifactEntry): | class Release(ArtifactEntry, FuseDirEntry): | ||||
""" Software Heritage release artifact. | """ Software Heritage release artifact. | ||||
Release nodes are represented on the file-system as directories with the | Release nodes are represented on the file-system as directories with the | ||||
following entries: | following entries: | ||||
- `target`: target node, as a symlink to `archive/<SWHID>` | - `target`: target node, as a symlink to `archive/<SWHID>` | ||||
- `target_type`: regular file containing the type of the target SWHID | - `target_type`: regular file containing the type of the target SWHID | ||||
- `root`: present if and only if the release points to something that | - `root`: present if and only if the release points to something that | ||||
Show All 14 Lines | async def find_root_directory(self, swhid: SWHID) -> SWHID: | ||||
else: | else: | ||||
return None | return None | ||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
metadata = await self.fuse.get_metadata(self.swhid) | metadata = await self.fuse.get_metadata(self.swhid) | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="meta.json", | name="meta.json", | ||||
target=Path(root_path, f"meta/{self.swhid}.json"), | target=Path(root_path, f"meta/{self.swhid}.json"), | ||||
) | ) | ||||
target = metadata["target"] | target = metadata["target"] | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") | FuseSymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") | ||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
ReleaseType, | ReleaseType, | ||||
name="target_type", | name="target_type", | ||||
mode=int(EntryMode.RDONLY_FILE), | mode=int(EntryMode.RDONLY_FILE), | ||||
target_type=target.object_type, | target_type=target.object_type, | ||||
) | ) | ||||
target_dir = await self.find_root_directory(target) | target_dir = await self.find_root_directory(target) | ||||
if target_dir is not None: | if target_dir is not None: | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="root", | name="root", | ||||
target=Path(root_path, f"archive/{target_dir}"), | target=Path(root_path, f"archive/{target_dir}"), | ||||
) | ) | ||||
@dataclass | @dataclass | ||||
class ReleaseType(FuseEntry): | class ReleaseType(FuseFileEntry): | ||||
""" Release type virtual file """ | """ Release type virtual file """ | ||||
target_type: str | target_type: str | ||||
async def get_content(self) -> bytes: | async def get_content(self) -> bytes: | ||||
return str.encode(self.target_type + "\n") | return str.encode(self.target_type + "\n") | ||||
async def size(self) -> int: | async def size(self) -> int: | ||||
Show All 9 Lines |