Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/fs/artifact.py
Show First 20 Lines • Show All 202 Lines • ▼ Show 20 Lines | |||||
@dataclass | @dataclass | ||||
class RevisionHistory(FuseDirEntry): | class RevisionHistory(FuseDirEntry): | ||||
""" Revision virtual `history/` directory """ | """ Revision virtual `history/` directory """ | ||||
swhid: SWHID | swhid: SWHID | ||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
history = await self.fuse.get_history(self.swhid) | history = await self.fuse.get_history(self.swhid) | ||||
by_hash = self.create_child( | by_hash = self.create_child( | ||||
RevisionHistoryShardByHash, | RevisionHistoryShardByHash, | ||||
name="by-hash", | name="by-hash", | ||||
mode=int(EntryMode.RDONLY_DIR), | mode=int(EntryMode.RDONLY_DIR), | ||||
history_swhid=self.swhid, | history_swhid=self.swhid, | ||||
) | ) | ||||
by_hash.fill_direntry_cache(history) | by_hash.fill_direntry_cache(history) | ||||
yield by_hash | yield by_hash | ||||
by_page = self.create_child( | |||||
RevisionHistoryShardByPage, | |||||
name="by-page", | |||||
mode=int(EntryMode.RDONLY_DIR), | |||||
history_swhid=self.swhid, | |||||
) | |||||
by_page.fill_direntry_cache(history) | |||||
yield by_page | |||||
@dataclass | @dataclass | ||||
class RevisionHistoryShardByHash(FuseDirEntry): | class RevisionHistoryShardByHash(FuseDirEntry): | ||||
""" Revision virtual `history/by-hash` sharded directory """ | """ Revision virtual `history/by-hash` sharded directory """ | ||||
history_swhid: SWHID | history_swhid: SWHID | ||||
prefix: str = field(default="") | prefix: str = field(default="") | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | async def compute_entries(self) -> AsyncIterator[FuseEntry]: | ||||
hash_prefix = self.prefix.replace("/", "") | hash_prefix = self.prefix.replace("/", "") | ||||
swhids = [s for s in history if s.object_id.startswith(hash_prefix)] | swhids = [s for s in history if s.object_id.startswith(hash_prefix)] | ||||
for entry in self.fill_direntry_cache(swhids): | for entry in self.fill_direntry_cache(swhids): | ||||
yield entry | yield entry | ||||
@dataclass | @dataclass | ||||
class RevisionHistoryShardByPage(FuseDirEntry): | |||||
""" Revision virtual `history/by-page` sharded directory """ | |||||
history_swhid: SWHID | |||||
PAGE_SIZE = 10_000 | |||||
seirl: Please write this with a `def`, PEP8 discourages named lambdas. | |||||
PAGE_FMT = "{page_number:03d}" | |||||
def fill_direntry_cache(self, swhids: List[SWHID]): | |||||
page_number = -1 | |||||
page = None | |||||
page_root_path = None | |||||
page_children = [] | |||||
Done Inline Actionsminor: please move 10_000 to a constant somewhere, e.g., a PAGE_SIZE class attribute zack: minor: please move 10_000 to a constant somewhere, e.g., a `PAGE_SIZE` class attribute | |||||
pages = [] | |||||
for idx, swhid in enumerate(swhids): | |||||
if idx % self.PAGE_SIZE == 0: | |||||
if page: | |||||
self.fuse.cache.direntry.set(page, page_children) | |||||
pages.append(page) | |||||
page_number += 1 | |||||
Done Inline Actionsbonus point: compute the number of needed leading zeros in the format string from PAGE_SIZE, otherwise it's for sure gonna break in the future (or at least leave a TODO in the code about it) zack: bonus point: compute the number of needed leading zeros in the format string from `PAGE_SIZE`… | |||||
page = self.create_child( | |||||
RevisionHistoryShardByPage, | |||||
name=self.PAGE_FMT.format(page_number=page_number), | |||||
mode=int(EntryMode.RDONLY_DIR), | |||||
history_swhid=self.history_swhid, | |||||
) | |||||
page_root_path = page.get_relative_root_path() | |||||
page_children = [] | |||||
page_children.append( | |||||
page.create_child( | |||||
FuseSymlinkEntry, | |||||
name=str(swhid), | |||||
Done Inline ActionsA cleaner way would be to iterate on range(0, len(swhids), 10_000) but it would then require to slice the history list, hence we have the same copying problem we had before. However since it is done here only to prefill the cache, we might not really care about this memory overhead? haltode: A cleaner way would be to iterate on `range(0, len(swhids), 10_000)` but it would then require… | |||||
target=Path(page_root_path, f"archive/{swhid}"), | |||||
) | |||||
) | |||||
if page: | |||||
self.fuse.cache.direntry.set(page, page_children) | |||||
pages.append(page) | |||||
self.fuse.cache.direntry.set(self, pages) | |||||
return pages | |||||
async def compute_entries(self) -> AsyncIterator[FuseEntry]: | |||||
history = await self.fuse.get_history(self.history_swhid) | |||||
for entry in self.fill_direntry_cache(history): | |||||
yield entry | |||||
@dataclass | |||||
class Release(FuseDirEntry): | class Release(FuseDirEntry): | ||||
""" Software Heritage release artifact. | """ Software Heritage release artifact. | ||||
Attributes: | Attributes: | ||||
swhid: Software Heritage persistent identifier | swhid: Software Heritage persistent identifier | ||||
Release nodes are represented on the file-system as directories with the | Release nodes are represented on the file-system as directories with the | ||||
following entries: | following entries: | ||||
▲ Show 20 Lines • Show All 101 Lines • Show Last 20 Lines |
Please write this with a def, PEP8 discourages named lambdas.