Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/artifact.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import asyncio | import asyncio | ||||
from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||
import json | import json | ||||
import logging | import logging | ||||
import os | import os | ||||
from pathlib import Path | from pathlib import Path | ||||
import re | import re | ||||
from typing import Any, AsyncIterator, Dict, List | from typing import Any, AsyncIterator, Dict, List, Optional, cast | ||||
from swh.fuse.fs.entry import ( | from swh.fuse.fs.entry import ( | ||||
EntryMode, | EntryMode, | ||||
FuseDirEntry, | FuseDirEntry, | ||||
FuseEntry, | FuseEntry, | ||||
FuseFileEntry, | FuseFileEntry, | ||||
FuseSymlinkEntry, | FuseSymlinkEntry, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
# 0o040000 so use a read-only permission instead | # 0o040000 so use a read-only permission instead | ||||
int(EntryMode.RDONLY_DIR) | int(EntryMode.RDONLY_DIR) | ||||
if swhid.object_type == DIRECTORY | if swhid.object_type == DIRECTORY | ||||
else entry["perms"] | else entry["perms"] | ||||
) | ) | ||||
# 1. Symlink (check symlink first because condition is less restrictive) | # 1. Symlink (check symlink first because condition is less restrictive) | ||||
if mode == DentryPerms.symlink: | if mode == DentryPerms.symlink: | ||||
target = "" | target = b"" | ||||
try: | try: | ||||
# Symlink target is stored in the blob content | # Symlink target is stored in the blob content | ||||
target = await self.fuse.get_blob(swhid) | target = await self.fuse.get_blob(swhid) | ||||
except Exception: | except Exception: | ||||
pass # Ignore error and create a (broken) symlink anyway | pass # Ignore error and create a (broken) symlink anyway | ||||
yield self.create_child( | yield self.create_child( | ||||
FuseSymlinkEntry, name=name, target=target, | FuseSymlinkEntry, name=name, target=target, | ||||
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines | async def prefill_by_date_cache(self, by_date_dir: FuseDirEntry) -> None: | ||||
# invalidated + updated every 100 API calls | # invalidated + updated every 100 API calls | ||||
nb_api_calls += 1 | nb_api_calls += 1 | ||||
if nb_api_calls % 100 == 0: | if nb_api_calls % 100 == 0: | ||||
self.fuse.cache.direntry.invalidate(by_date_dir) | self.fuse.cache.direntry.invalidate(by_date_dir) | ||||
# Make sure to have the latest entries once the prefilling is done | # Make sure to have the latest entries once the prefilling is done | ||||
self.fuse.cache.direntry.invalidate(by_date_dir) | self.fuse.cache.direntry.invalidate(by_date_dir) | ||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
by_date_dir = self.create_child( | by_date_dir = cast( | ||||
RevisionHistoryShardByDate, | |||||
self.create_child( | |||||
RevisionHistoryShardByDate, | RevisionHistoryShardByDate, | ||||
name="by-date", | name="by-date", | ||||
mode=int(EntryMode.RDONLY_DIR), | mode=int(EntryMode.RDONLY_DIR), | ||||
history_swhid=self.swhid, | history_swhid=self.swhid, | ||||
), | |||||
) | ) | ||||
# Run it concurrently because of the many API calls necessary | # Run it concurrently because of the many API calls necessary | ||||
asyncio.create_task(self.prefill_by_date_cache(by_date_dir)) | asyncio.create_task(self.prefill_by_date_cache(by_date_dir)) | ||||
yield by_date_dir | yield by_date_dir | ||||
yield self.create_child( | yield self.create_child( | ||||
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
) | ) | ||||
@dataclass | @dataclass | ||||
class RevisionHistoryShardByPage(FuseDirEntry): | class RevisionHistoryShardByPage(FuseDirEntry): | ||||
""" Revision virtual `history/by-page` sharded directory """ | """ Revision virtual `history/by-page` sharded directory """ | ||||
history_swhid: SWHID | history_swhid: SWHID | ||||
prefix: int = field(default=None) | prefix: Optional[int] = field(default=None) | ||||
PAGE_SIZE = 10_000 | PAGE_SIZE = 10_000 | ||||
PAGE_FMT = "{page_number:03d}" | PAGE_FMT = "{page_number:03d}" | ||||
ENTRIES_REGEXP = re.compile(r"^([0-9]+)|(" + SWHID_REGEXP + ")$") | ENTRIES_REGEXP = re.compile(r"^([0-9]+)|(" + SWHID_REGEXP + ")$") | ||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
history = await self.fuse.get_history(self.history_swhid) | history = await self.fuse.get_history(self.history_swhid) | ||||
Show All 36 Lines | class Release(FuseDirEntry): | ||||
- `root`: present if and only if the release points to something that | - `root`: present if and only if the release points to something that | ||||
(transitively) resolves to a directory. When present it is a symlink | (transitively) resolves to a directory. When present it is a symlink | ||||
pointing into `archive/` to the SWHID of the given directory | pointing into `archive/` to the SWHID of the given directory | ||||
- `meta.json`: metadata for the current node, as a symlink pointing to the | - `meta.json`: metadata for the current node, as a symlink pointing to the | ||||
relevant `archive/<SWHID>.json` file """ | relevant `archive/<SWHID>.json` file """ | ||||
swhid: SWHID | swhid: SWHID | ||||
async def find_root_directory(self, swhid: SWHID) -> SWHID: | async def find_root_directory(self, swhid: SWHID) -> Optional[SWHID]: | ||||
if swhid.object_type == RELEASE: | if swhid.object_type == RELEASE: | ||||
metadata = await self.fuse.get_metadata(swhid) | metadata = await self.fuse.get_metadata(swhid) | ||||
return await self.find_root_directory(metadata["target"]) | return await self.find_root_directory(metadata["target"]) | ||||
elif swhid.object_type == REVISION: | elif swhid.object_type == REVISION: | ||||
metadata = await self.fuse.get_metadata(swhid) | metadata = await self.fuse.get_metadata(swhid) | ||||
return metadata["directory"] | return metadata["directory"] | ||||
elif swhid.object_type == DIRECTORY: | elif swhid.object_type == DIRECTORY: | ||||
return swhid | return swhid | ||||
▲ Show 20 Lines • Show All 177 Lines • Show Last 20 Lines |