Changeset View
Changeset View
Standalone View
Standalone View
swh/objstorage/backends/seaweedfs/objstorage.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import io | import io | ||||
from itertools import islice | from itertools import islice | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Iterator, Optional | from typing import Iterator, Optional | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.objstorage.exc import Error, ObjNotFoundError | from swh.objstorage.exc import Error, ObjNotFoundError | ||||
from swh.objstorage.interface import ObjId | from swh.objstorage.interface import CompositeObjId, ObjId | ||||
from swh.objstorage.objstorage import ( | from swh.objstorage.objstorage import ( | ||||
DEFAULT_LIMIT, | DEFAULT_LIMIT, | ||||
ObjStorage, | ObjStorage, | ||||
compressors, | compressors, | ||||
compute_hash, | compute_hash, | ||||
decompressors, | decompressors, | ||||
) | ) | ||||
Show All 13 Lines | def __init__(self, url, compression=None, **kwargs): | ||||
self.wf = HttpFiler(url) | self.wf = HttpFiler(url) | ||||
self.compression = compression | self.compression = compression | ||||
def check_config(self, *, check_write): | def check_config(self, *, check_write): | ||||
"""Check the configuration for this object storage""" | """Check the configuration for this object storage""" | ||||
# FIXME: hopefully this blew up during instantiation | # FIXME: hopefully this blew up during instantiation | ||||
return True | return True | ||||
def __contains__(self, obj_id): | def __contains__(self, obj_id: ObjId) -> bool: | ||||
return self.wf.exists(self._path(obj_id)) | return self.wf.exists(self._path(obj_id)) | ||||
def __iter__(self): | def __iter__(self) -> Iterator[CompositeObjId]: | ||||
"""Iterate over the objects present in the storage | """Iterate over the objects present in the storage | ||||
Warning: Iteration over the contents of a cloud-based object storage | Warning: Iteration over the contents of a cloud-based object storage | ||||
may have bad efficiency: due to the very high amount of objects in it | may have bad efficiency: due to the very high amount of objects in it | ||||
and the fact that it is remote, get all the contents of the current | and the fact that it is remote, get all the contents of the current | ||||
object storage may result in a lot of network requests. | object storage may result in a lot of network requests. | ||||
You almost certainly don't want to use this method in production. | You almost certainly don't want to use this method in production. | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | def delete(self, obj_id: ObjId): | ||||
raise ObjNotFoundError(obj_id) | raise ObjNotFoundError(obj_id) | ||||
self.wf.delete(self._path(obj_id)) | self.wf.delete(self._path(obj_id)) | ||||
return True | return True | ||||
def list_content( | def list_content( | ||||
self, | self, | ||||
last_obj_id: Optional[ObjId] = None, | last_obj_id: Optional[ObjId] = None, | ||||
limit: int = DEFAULT_LIMIT, | limit: int = DEFAULT_LIMIT, | ||||
) -> Iterator[ObjId]: | ) -> Iterator[CompositeObjId]: | ||||
if last_obj_id: | if last_obj_id: | ||||
objid = hashutil.hash_to_hex(last_obj_id) | objid = hashutil.hash_to_hex(last_obj_id) | ||||
douardda: shouldn't this be changed to `objid_to_default_hex`? | |||||
lastfilename = objid | lastfilename = objid | ||||
else: | else: | ||||
lastfilename = None | lastfilename = None | ||||
for fname in islice(self.wf.iterfiles(last_file_name=lastfilename), limit): | for fname in islice(self.wf.iterfiles(last_file_name=lastfilename), limit): | ||||
bytehex = fname.rsplit("/", 1)[-1] | bytehex = fname.rsplit("/", 1)[-1] | ||||
yield hashutil.bytehex_to_hash(bytehex.encode()) | yield hashutil.bytehex_to_hash(bytehex.encode()) | ||||
# internal methods | # internal methods | ||||
Show All 20 Lines |
shouldn't this be changed to objid_to_default_hex?