Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/mountpoint.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||
import json | import json | ||||
from pathlib import Path | |||||
import re | import re | ||||
from typing import AsyncIterator | from typing import AsyncIterator | ||||
from swh.fuse.fs.artifact import OBJTYPE_GETTERS, SWHID_REGEXP, Origin | from swh.fuse.fs.artifact import OBJTYPE_GETTERS, SWHID_REGEXP, Origin | ||||
from swh.fuse.fs.entry import EntryMode, FuseDirEntry, FuseEntry, FuseFileEntry | from swh.fuse.fs.entry import ( | ||||
EntryMode, | |||||
FuseDirEntry, | |||||
FuseEntry, | |||||
FuseFileEntry, | |||||
FuseSymlinkEntry, | |||||
) | |||||
from swh.model.exceptions import ValidationError | from swh.model.exceptions import ValidationError | ||||
from swh.model.identifiers import CONTENT, SWHID, parse_swhid | from swh.model.identifiers import CONTENT, SWHID, parse_swhid | ||||
JSON_SUFFIX = ".json" | |||||
@dataclass | @dataclass | ||||
class Root(FuseDirEntry): | class Root(FuseDirEntry): | ||||
""" The FUSE mountpoint, consisting of the archive/ and origin/ directories """ | """ The FUSE mountpoint, consisting of the archive/ and origin/ directories """ | ||||
name: str = field(init=False, default=None) | name: str = field(init=False, default=None) | ||||
mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) | mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) | ||||
depth: int = field(init=False, default=1) | depth: int = field(init=False, default=1) | ||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
yield self.create_child(ArchiveDir) | yield self.create_child(ArchiveDir) | ||||
yield self.create_child(OriginDir) | yield self.create_child(OriginDir) | ||||
yield self.create_child(CacheDir) | |||||
@dataclass | @dataclass | ||||
class ArchiveDir(FuseDirEntry): | class ArchiveDir(FuseDirEntry): | ||||
""" The `archive/` virtual directory allows to mount any artifact on the fly | """ The `archive/` virtual directory allows to mount any artifact on the fly | ||||
using its SWHID as name. The associated metadata of the artifact from the | using its SWHID as name. The associated metadata of the artifact from the | ||||
Software Heritage Web API can also be accessed through the `SWHID.json` file | Software Heritage Web API can also be accessed through the `SWHID.json` file | ||||
(in case of pagination, the JSON file will contain a complete version with | (in case of pagination, the JSON file will contain a complete version with | ||||
all pages merged together). Note: the archive directory cannot be listed | all pages merged together). Note: the archive directory cannot be listed | ||||
with ls, but entries in it can be accessed (e.g., using cat or cd). """ | with ls, but entries in it can be accessed (e.g., using cat or cd). """ | ||||
name: str = field(init=False, default="archive") | name: str = field(init=False, default="archive") | ||||
mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) | mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) | ||||
ENTRIES_REGEXP = re.compile(r"^(" + SWHID_REGEXP + ")(.json)?$") | ENTRIES_REGEXP = re.compile(r"^(" + SWHID_REGEXP + ")(.json)?$") | ||||
JSON_SUFFIX = ".json" | |||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
return | return | ||||
yield | yield | ||||
async def lookup(self, name: str) -> FuseEntry: | async def lookup(self, name: str) -> FuseEntry: | ||||
# On the fly mounting of a new artifact | # On the fly mounting of a new artifact | ||||
try: | try: | ||||
if name.endswith(self.JSON_SUFFIX): | if name.endswith(JSON_SUFFIX): | ||||
swhid = parse_swhid(name[: -len(self.JSON_SUFFIX)]) | swhid = parse_swhid(name[: -len(JSON_SUFFIX)]) | ||||
return self.create_child( | return self.create_child( | ||||
MetaEntry, | MetaEntry, | ||||
name=f"{swhid}{self.JSON_SUFFIX}", | name=f"{swhid}{JSON_SUFFIX}", | ||||
mode=int(EntryMode.RDONLY_FILE), | mode=int(EntryMode.RDONLY_FILE), | ||||
swhid=swhid, | swhid=swhid, | ||||
) | ) | ||||
else: | else: | ||||
swhid = parse_swhid(name) | swhid = parse_swhid(name) | ||||
await self.fuse.get_metadata(swhid) | await self.fuse.get_metadata(swhid) | ||||
return self.create_child( | return self.create_child( | ||||
OBJTYPE_GETTERS[swhid.object_type], | OBJTYPE_GETTERS[swhid.object_type], | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | async def lookup(self, name: str) -> FuseEntry: | ||||
# On the fly mounting of new origin url | # On the fly mounting of new origin url | ||||
try: | try: | ||||
url_encoded = name | url_encoded = name | ||||
await self.fuse.get_visits(url_encoded) | await self.fuse.get_visits(url_encoded) | ||||
return self.create_child(url_encoded) | return self.create_child(url_encoded) | ||||
except ValueError: | except ValueError: | ||||
return None | return None | ||||
@dataclass | |||||
class CacheDir(FuseDirEntry): | |||||
zack: write this :) | |||||
""" The cache/ directory is an on-disk representation of locally cached | |||||
objects and metadata. Via this directory you can browse cached data and | |||||
selectively remove them from the cache, freeing disk space. (See `swh fs | |||||
clean` in the {ref}`CLI <swh-fuse-cli>` to completely empty the cache). The | |||||
directory is populated with symlinks to: all artifacts, identified by their | |||||
SWHIDs and sharded by the first two character of their object id, the | |||||
metadata identified by a `SWHID.json` entry, and the `origin/` directory. | |||||
""" | |||||
name: str = field(init=False, default="cache") | |||||
mode: int = field(init=False, default=int(EntryMode.RDONLY_DIR)) | |||||
ENTRIES_REGEXP = re.compile(r"^([a-f0-9]{2})|(" + OriginDir.name + ")$") | |||||
@dataclass | |||||
class ArtifactShardBySwhid(FuseDirEntry): | |||||
ENTRIES_REGEXP = re.compile(r"^(" + SWHID_REGEXP + ")$") | |||||
prefix: str = field(default="") | |||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | |||||
root_path = self.get_relative_root_path() | |||||
async for swhid in self.fuse.cache.get_cached_swhids(): | |||||
if not swhid.object_id.startswith(self.prefix): | |||||
continue | |||||
yield self.create_child( | |||||
FuseSymlinkEntry, | |||||
name=str(swhid), | |||||
target=Path(root_path, f"archive/{swhid}"), | |||||
) | |||||
yield self.create_child( | |||||
FuseSymlinkEntry, | |||||
name=f"{swhid}{JSON_SUFFIX}", | |||||
target=Path(root_path, f"archive/{swhid}{JSON_SUFFIX}"), | |||||
) | |||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | |||||
prefixes = set() | |||||
async for swhid in self.fuse.cache.get_cached_swhids(): | |||||
prefixes.add(swhid.object_id[:2]) | |||||
for prefix in prefixes: | |||||
yield self.create_child( | |||||
CacheDir.ArtifactShardBySwhid, | |||||
name=prefix, | |||||
mode=int(EntryMode.RDONLY_DIR), | |||||
prefix=prefix, | |||||
) | |||||
yield self.create_child( | |||||
FuseSymlinkEntry, | |||||
name=OriginDir.name, | |||||
target=Path(self.get_relative_root_path(), OriginDir.name), | |||||
) |
write this :)