Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/artifact.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import dataclass | from dataclasses import dataclass | ||||
from pathlib import Path | from pathlib import Path | ||||
from typing import Any, AsyncIterator, List | from typing import Any, AsyncIterator, List | ||||
from swh.fuse.fs.entry import EntryMode, FuseEntry | from swh.fuse.fs.entry import ( | ||||
from swh.fuse.fs.symlink import SymlinkEntry | EntryMode, | ||||
FuseDirEntry, | |||||
FuseEntry, | |||||
FuseFileEntry, | |||||
FuseSymlinkEntry, | |||||
) | |||||
from swh.model.from_disk import DentryPerms | from swh.model.from_disk import DentryPerms | ||||
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SWHID | from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SWHID | ||||
@dataclass | @dataclass | ||||
class ArtifactEntry(FuseEntry): | class Content(FuseFileEntry): | ||||
""" FUSE virtual entry for a Software Heritage Artifact | """ Software Heritage content artifact. | ||||
Attributes: | Attributes: | ||||
swhid: Software Heritage persistent identifier | swhid: Software Heritage persistent identifier | ||||
prefetch: optional prefetched metadata used to set entry attributes | prefetch: optional prefetched metadata used to set entry attributes | ||||
""" | |||||
swhid: SWHID | |||||
prefetch: Any = None | |||||
class Content(ArtifactEntry): | |||||
""" Software Heritage content artifact. | |||||
Content leaves (AKA blobs) are represented on disks as regular files, | Content leaves (AKA blobs) are represented on disks as regular files, | ||||
containing the corresponding bytes, as archived. | containing the corresponding bytes, as archived. | ||||
Note that permissions are associated to blobs only in the context of | Note that permissions are associated to blobs only in the context of | ||||
directories. Hence, when accessing blobs from the top-level `archive/` | directories. Hence, when accessing blobs from the top-level `archive/` | ||||
directory, the permissions of the `archive/SWHID` file will be arbitrary and | directory, the permissions of the `archive/SWHID` file will be arbitrary and | ||||
not meaningful (e.g., `0x644`). """ | not meaningful (e.g., `0x644`). """ | ||||
swhid: SWHID | |||||
prefetch: Any = None | |||||
async def get_content(self) -> bytes: | async def get_content(self) -> bytes: | ||||
data = await self.fuse.get_blob(self.swhid) | data = await self.fuse.get_blob(self.swhid) | ||||
if not self.prefetch: | if not self.prefetch: | ||||
self.prefetch = {"length": len(data)} | self.prefetch = {"length": len(data)} | ||||
return data | return data | ||||
async def size(self) -> int: | async def size(self) -> int: | ||||
if self.prefetch: | if self.prefetch: | ||||
return self.prefetch["length"] | return self.prefetch["length"] | ||||
else: | else: | ||||
return len(await self.get_content()) | return len(await self.get_content()) | ||||
async def __aiter__(self): | |||||
raise ValueError("Cannot iterate over a content type artifact") | |||||
class Directory(ArtifactEntry): | @dataclass | ||||
class Directory(FuseDirEntry): | |||||
""" Software Heritage directory artifact. | """ Software Heritage directory artifact. | ||||
Attributes: | |||||
swhid: Software Heritage persistent identifier | |||||
Directory nodes are represented as directories on the file-system, | Directory nodes are represented as directories on the file-system, | ||||
containing one entry for each entry of the archived directory. Entry names | containing one entry for each entry of the archived directory. Entry names | ||||
and other metadata, including permissions, will correspond to the archived | and other metadata, including permissions, will correspond to the archived | ||||
entry metadata. | entry metadata. | ||||
Note that the FUSE mount is read-only, no matter what the permissions say. | Note that the FUSE mount is read-only, no matter what the permissions say. | ||||
So it is possible that, in the context of a directory, a file is presented | So it is possible that, in the context of a directory, a file is presented | ||||
as writable, whereas actually writing to it will fail with `EPERM`. """ | as writable, whereas actually writing to it will fail with `EPERM`. """ | ||||
swhid: SWHID | |||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
metadata = await self.fuse.get_metadata(self.swhid) | metadata = await self.fuse.get_metadata(self.swhid) | ||||
for entry in metadata: | for entry in metadata: | ||||
name = entry["name"] | name = entry["name"] | ||||
swhid = entry["target"] | swhid = entry["target"] | ||||
mode = ( | mode = ( | ||||
# Archived permissions for directories are always set to | # Archived permissions for directories are always set to | ||||
# 0o040000 so use a read-only permission instead | # 0o040000 so use a read-only permission instead | ||||
int(EntryMode.RDONLY_DIR) | int(EntryMode.RDONLY_DIR) | ||||
if swhid.object_type == DIRECTORY | if swhid.object_type == DIRECTORY | ||||
else entry["perms"] | else entry["perms"] | ||||
) | ) | ||||
# 1. Symlinks | # 1. Regular file | ||||
if mode == DentryPerms.symlink: | if swhid.object_type == CONTENT: | ||||
yield self.create_child( | |||||
Content, | |||||
name=name, | |||||
mode=mode, | |||||
swhid=swhid, | |||||
# The directory API has extra info we can use to set | |||||
# attributes without additional Software Heritage API call | |||||
prefetch=entry, | |||||
) | |||||
# 2. Regular directory | |||||
elif swhid.object_type == DIRECTORY: | |||||
yield self.create_child( | |||||
Directory, name=name, mode=mode, swhid=swhid, | |||||
) | |||||
# 3. Symlink | |||||
elif mode == DentryPerms.symlink: | |||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=name, | name=name, | ||||
# Symlink target is stored in the blob content | # Symlink target is stored in the blob content | ||||
target=await self.fuse.get_blob(swhid), | target=await self.fuse.get_blob(swhid), | ||||
) | ) | ||||
# 2. Submodules | # 4. Submodule | ||||
elif swhid.object_type == REVISION: | elif swhid.object_type == REVISION: | ||||
# Make sure the revision metadata is fetched and create a | # Make sure the revision metadata is fetched and create a | ||||
# symlink to distinguish it with regular directories | # symlink to distinguish it with regular directories | ||||
await self.fuse.get_metadata(swhid) | await self.fuse.get_metadata(swhid) | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=name, | name=name, | ||||
target=Path(self.get_relative_root_path(), f"archive/{swhid}"), | target=Path(self.get_relative_root_path(), f"archive/{swhid}"), | ||||
) | ) | ||||
# 3. Regular entries (directories, contents) | |||||
else: | else: | ||||
yield self.create_child( | raise ValueError("Unknown directory entry type: {swhid.object_type}") | ||||
OBJTYPE_GETTERS[swhid.object_type], | |||||
name=name, | |||||
mode=mode, | |||||
swhid=swhid, | |||||
# The directory API has extra info we can use to set | |||||
# attributes without additional Software Heritage API call | |||||
prefetch=entry, | |||||
) | |||||
class Revision(ArtifactEntry): | @dataclass | ||||
class Revision(FuseDirEntry): | |||||
""" Software Heritage revision artifact. | """ Software Heritage revision artifact. | ||||
Attributes: | |||||
swhid: Software Heritage persistent identifier | |||||
Revision (AKA commit) nodes are represented on the file-system as | Revision (AKA commit) nodes are represented on the file-system as | ||||
directories with the following entries: | directories with the following entries: | ||||
- `root`: source tree at the time of the commit, as a symlink pointing into | - `root`: source tree at the time of the commit, as a symlink pointing into | ||||
`archive/`, to a SWHID of type `dir` | `archive/`, to a SWHID of type `dir` | ||||
- `parents/` (note the plural): a virtual directory containing entries named | - `parents/` (note the plural): a virtual directory containing entries named | ||||
`1`, `2`, `3`, etc., one for each parent commit. Each of these entry is a | `1`, `2`, `3`, etc., one for each parent commit. Each of these entry is a | ||||
symlink pointing into `archive/`, to the SWHID file for the given parent | symlink pointing into `archive/`, to the SWHID file for the given parent | ||||
commit | commit | ||||
- `parent` (note the singular): present if and only if the current commit | - `parent` (note the singular): present if and only if the current commit | ||||
has at least one parent commit (which is the most common case). When | has at least one parent commit (which is the most common case). When | ||||
present it is a symlink pointing into `parents/1/` | present it is a symlink pointing into `parents/1/` | ||||
- `meta.json`: metadata for the current node, as a symlink pointing to the | - `meta.json`: metadata for the current node, as a symlink pointing to the | ||||
relevant `meta/<SWHID>.json` file """ | relevant `meta/<SWHID>.json` file """ | ||||
swhid: SWHID | |||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
metadata = await self.fuse.get_metadata(self.swhid) | metadata = await self.fuse.get_metadata(self.swhid) | ||||
directory = metadata["directory"] | directory = metadata["directory"] | ||||
parents = metadata["parents"] | parents = metadata["parents"] | ||||
# Make sure all necessary metadatas are fetched | # Make sure all necessary metadatas are fetched | ||||
await self.fuse.get_metadata(directory) | await self.fuse.get_metadata(directory) | ||||
for parent in parents: | for parent in parents: | ||||
await self.fuse.get_metadata(parent["id"]) | await self.fuse.get_metadata(parent["id"]) | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="root", target=Path(root_path, f"archive/{directory}"), | FuseSymlinkEntry, | ||||
name="root", | |||||
target=Path(root_path, f"archive/{directory}"), | |||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="meta.json", | name="meta.json", | ||||
target=Path(root_path, f"meta/{self.swhid}.json"), | target=Path(root_path, f"meta/{self.swhid}.json"), | ||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
RevisionParents, | RevisionParents, | ||||
name="parents", | name="parents", | ||||
mode=int(EntryMode.RDONLY_DIR), | mode=int(EntryMode.RDONLY_DIR), | ||||
parents=[x["id"] for x in parents], | parents=[x["id"] for x in parents], | ||||
) | ) | ||||
if len(parents) >= 1: | if len(parents) >= 1: | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="parent", target="parents/1/", | FuseSymlinkEntry, name="parent", target="parents/1/", | ||||
) | ) | ||||
@dataclass | @dataclass | ||||
class RevisionParents(FuseEntry): | class RevisionParents(FuseDirEntry): | ||||
""" Revision virtual `parents/` directory """ | """ Revision virtual `parents/` directory """ | ||||
parents: List[SWHID] | parents: List[SWHID] | ||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
for i, parent in enumerate(self.parents): | for i, parent in enumerate(self.parents): | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name=str(i + 1), | name=str(i + 1), | ||||
target=Path(root_path, f"archive/{parent}"), | target=Path(root_path, f"archive/{parent}"), | ||||
) | ) | ||||
class Release(ArtifactEntry): | @dataclass | ||||
class Release(FuseDirEntry): | |||||
""" Software Heritage release artifact. | """ Software Heritage release artifact. | ||||
Attributes: | |||||
swhid: Software Heritage persistent identifier | |||||
Release nodes are represented on the file-system as directories with the | Release nodes are represented on the file-system as directories with the | ||||
following entries: | following entries: | ||||
- `target`: target node, as a symlink to `archive/<SWHID>` | - `target`: target node, as a symlink to `archive/<SWHID>` | ||||
- `target_type`: regular file containing the type of the target SWHID | - `target_type`: regular file containing the type of the target SWHID | ||||
- `root`: present if and only if the release points to something that | - `root`: present if and only if the release points to something that | ||||
(transitively) resolves to a directory. When present it is a symlink | (transitively) resolves to a directory. When present it is a symlink | ||||
pointing into `archive/` to the SWHID of the given directory | pointing into `archive/` to the SWHID of the given directory | ||||
- `meta.json`: metadata for the current node, as a symlink pointing to the | - `meta.json`: metadata for the current node, as a symlink pointing to the | ||||
relevant `meta/<SWHID>.json` file """ | relevant `meta/<SWHID>.json` file """ | ||||
swhid: SWHID | |||||
async def find_root_directory(self, swhid: SWHID) -> SWHID: | async def find_root_directory(self, swhid: SWHID) -> SWHID: | ||||
if swhid.object_type == RELEASE: | if swhid.object_type == RELEASE: | ||||
metadata = await self.fuse.get_metadata(swhid) | metadata = await self.fuse.get_metadata(swhid) | ||||
return await self.find_root_directory(metadata["target"]) | return await self.find_root_directory(metadata["target"]) | ||||
elif swhid.object_type == REVISION: | elif swhid.object_type == REVISION: | ||||
metadata = await self.fuse.get_metadata(swhid) | metadata = await self.fuse.get_metadata(swhid) | ||||
return metadata["directory"] | return metadata["directory"] | ||||
elif swhid.object_type == DIRECTORY: | elif swhid.object_type == DIRECTORY: | ||||
return swhid | return swhid | ||||
else: | else: | ||||
return None | return None | ||||
async def __aiter__(self) -> AsyncIterator[FuseEntry]: | async def __aiter__(self) -> AsyncIterator[FuseEntry]: | ||||
metadata = await self.fuse.get_metadata(self.swhid) | metadata = await self.fuse.get_metadata(self.swhid) | ||||
root_path = self.get_relative_root_path() | root_path = self.get_relative_root_path() | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="meta.json", | name="meta.json", | ||||
target=Path(root_path, f"meta/{self.swhid}.json"), | target=Path(root_path, f"meta/{self.swhid}.json"), | ||||
) | ) | ||||
target = metadata["target"] | target = metadata["target"] | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") | FuseSymlinkEntry, name="target", target=Path(root_path, f"archive/{target}") | ||||
) | ) | ||||
yield self.create_child( | yield self.create_child( | ||||
ReleaseType, | ReleaseType, | ||||
name="target_type", | name="target_type", | ||||
mode=int(EntryMode.RDONLY_FILE), | mode=int(EntryMode.RDONLY_FILE), | ||||
target_type=target.object_type, | target_type=target.object_type, | ||||
) | ) | ||||
target_dir = await self.find_root_directory(target) | target_dir = await self.find_root_directory(target) | ||||
if target_dir is not None: | if target_dir is not None: | ||||
yield self.create_child( | yield self.create_child( | ||||
SymlinkEntry, | FuseSymlinkEntry, | ||||
name="root", | name="root", | ||||
target=Path(root_path, f"archive/{target_dir}"), | target=Path(root_path, f"archive/{target_dir}"), | ||||
) | ) | ||||
@dataclass | @dataclass | ||||
class ReleaseType(FuseEntry): | class ReleaseType(FuseFileEntry): | ||||
""" Release type virtual file """ | """ Release type virtual file """ | ||||
target_type: str | target_type: str | ||||
async def get_content(self) -> bytes: | async def get_content(self) -> bytes: | ||||
return str.encode(self.target_type + "\n") | return str.encode(self.target_type + "\n") | ||||
async def size(self) -> int: | async def size(self) -> int: | ||||
Show All 9 Lines |