Changeset View
Changeset View
Standalone View
Standalone View
swh/objstorage/backends/pathslicing.py
# Copyright (C) 2015-2022 The Software Heritage developers | # Copyright (C) 2015-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from itertools import islice | from itertools import islice | ||||
import os | import os | ||||
import tempfile | import tempfile | ||||
from typing import Iterator, List, Optional | from typing import Iterator, List, Optional | ||||
from typing_extensions import Literal | |||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.objstorage.constants import DEFAULT_LIMIT, ID_HASH_ALGO, ID_HEXDIGEST_LENGTH | from swh.objstorage.constants import DEFAULT_LIMIT, ID_HASH_ALGO, ID_HEXDIGEST_LENGTH | ||||
from swh.objstorage.exc import Error, ObjNotFoundError | from swh.objstorage.exc import Error, ObjNotFoundError | ||||
from swh.objstorage.interface import CompositeObjId, ObjId | from swh.objstorage.interface import CompositeObjId, ObjId | ||||
from swh.objstorage.objstorage import ( | from swh.objstorage.objstorage import ( | ||||
ObjStorage, | ObjStorage, | ||||
compressors, | compressors, | ||||
decompressors, | decompressors, | ||||
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | Args: | ||||
root (str): path to the root directory of the storage on | root (str): path to the root directory of the storage on | ||||
the disk. | the disk. | ||||
slicing (str): string that indicates the slicing to perform | slicing (str): string that indicates the slicing to perform | ||||
on the hash of the content to know the path where it should | on the hash of the content to know the path where it should | ||||
be stored (see the documentation of the PathSlicer class). | be stored (see the documentation of the PathSlicer class). | ||||
""" | """ | ||||
PRIMARY_HASH: Literal["sha1"] = "sha1" | |||||
def __init__(self, root, slicing, compression="gzip", **kwargs): | def __init__(self, root, slicing, compression="gzip", **kwargs): | ||||
super().__init__(**kwargs) | super().__init__(**kwargs) | ||||
self.root = root | self.root = root | ||||
self.slicer = PathSlicer(root, slicing) | self.slicer = PathSlicer(root, slicing) | ||||
self.use_fdatasync = hasattr(os, "fdatasync") | self.use_fdatasync = hasattr(os, "fdatasync") | ||||
self.compression = compression | self.compression = compression | ||||
Show All 37 Lines | def __iter__(self) -> Iterator[CompositeObjId]: | ||||
storages. You almost certainly don't want to use this method | storages. You almost certainly don't want to use this method | ||||
in production. | in production. | ||||
Return: | Return: | ||||
Iterator over object IDs | Iterator over object IDs | ||||
""" | """ | ||||
def obj_iterator(): | |||||
# XXX hackish: it does not verify that the depth of found files | # XXX hackish: it does not verify that the depth of found files | ||||
# matches the slicing depth of the storage | # matches the slicing depth of the storage | ||||
for root, _dirs, files in os.walk(self.root): | for root, _dirs, files in os.walk(self.root): | ||||
_dirs.sort() | _dirs.sort() | ||||
for f in sorted(files): | for f in sorted(files): | ||||
yield bytes.fromhex(f) | yield {self.PRIMARY_HASH: bytes.fromhex(f)} | ||||
return obj_iterator() | |||||
def __len__(self) -> int: | def __len__(self) -> int: | ||||
"""Compute the number of objects available in the storage. | """Compute the number of objects available in the storage. | ||||
Warning: this currently uses `__iter__`, its warning about bad | Warning: this currently uses `__iter__`, its warning about bad | ||||
performances applies | performances applies | ||||
Return: | Return: | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | def iter_from(self, obj_id, n_leaf=False): | ||||
dirs.sort() | dirs.sort() | ||||
if dirs and root == os.path.join(self.root, *slices[:level]): | if dirs and root == os.path.join(self.root, *slices[:level]): | ||||
cslice = slices[level] | cslice = slices[level] | ||||
for d in dirs[:]: | for d in dirs[:]: | ||||
if d < cslice: | if d < cslice: | ||||
dirs.remove(d) | dirs.remove(d) | ||||
for f in sorted(files): | for f in sorted(files): | ||||
if f > hex_obj_id: | if f > hex_obj_id: | ||||
yield bytes.fromhex(f) | yield {self.PRIMARY_HASH: bytes.fromhex(f)} | ||||
if n_leaf: | if n_leaf: | ||||
yield i | yield i | ||||
@contextmanager | @contextmanager | ||||
def _write_obj_file(self, hex_obj_id): | def _write_obj_file(self, hex_obj_id): | ||||
"""Context manager for writing object files to the object storage. | """Context manager for writing object files to the object storage. | ||||
During writing, data are written to a temporary file, which is atomically | During writing, data are written to a temporary file, which is atomically | ||||
Show All 33 Lines |