Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/objstorage.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Dict, Iterable, Optional | from typing import Dict, Iterable, Optional, Union, cast | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS | |||||
from swh.model.model import Content, MissingData | from swh.model.model import Content, MissingData | ||||
from swh.objstorage.exc import ObjNotFoundError | from swh.objstorage.exc import ObjNotFoundError | ||||
from swh.objstorage.factory import get_objstorage | from swh.objstorage.factory import get_objstorage | ||||
from swh.storage.interface import Sha1 | from swh.storage.interface import HashDict, Sha1 | ||||
from .exc import StorageArgumentException | from .exc import StorageArgumentException | ||||
class ObjStorage: | class ObjStorage: | ||||
"""Objstorage collaborator in charge of adding objects to | """Objstorage collaborator in charge of adding objects to | ||||
the objstorage. | the objstorage. | ||||
""" | """ | ||||
def __init__(self, objstorage_config: Dict): | def __init__(self, storage, objstorage_config: Dict): | ||||
self.storage = storage | |||||
self.objstorage = get_objstorage(**objstorage_config) | self.objstorage = get_objstorage(**objstorage_config) | ||||
def __getattr__(self, key): | def __getattr__(self, key): | ||||
if key == "objstorage": | if key == "objstorage": | ||||
raise AttributeError(key) | raise AttributeError(key) | ||||
return getattr(self.objstorage, key) | return getattr(self.objstorage, key) | ||||
def content_get(self, obj_id: Sha1) -> Optional[bytes]: | def content_get(self, obj_id: Union[Sha1, HashDict]) -> Optional[bytes]: | ||||
"""Retrieve data associated to the content from the objstorage | """Retrieve data associated to the content from the objstorage | ||||
Args: | Args: | ||||
content: content identitier | content: content identitier | ||||
Returns: | Returns: | ||||
associated content's data if any, None otherwise. | associated content's data if any, None otherwise. | ||||
""" | """ | ||||
hashes: HashDict | |||||
if isinstance(obj_id, bytes): | |||||
hashes = {"sha1": obj_id} | |||||
else: | |||||
hashes = obj_id | |||||
if set(hashes) < DEFAULT_ALGORITHMS: | |||||
# If some hashes are missing, query the database to fill blanks | |||||
candidates = self.storage.content_find(hashes) | |||||
if candidates: | |||||
# There may be more than one in case of collision; but we cannot | |||||
# do anything about it here | |||||
hashes = cast(HashDict, candidates[0].hashes()) | |||||
else: | |||||
# we will pass the partial hash dict to the objstorage, which | |||||
# will do the best it can with it. Usually, this will return None, | |||||
# as objects missing from the storage DB are unlikely to be present in the | |||||
# objstorage | |||||
pass | |||||
try: | try: | ||||
data = self.objstorage.get(obj_id) | data = self.objstorage.get(hashes) | ||||
except ObjNotFoundError: | except ObjNotFoundError: | ||||
data = None | data = None | ||||
return data | return data | ||||
def content_add(self, contents: Iterable[Content]) -> Dict: | def content_add(self, contents: Iterable[Content]) -> Dict: | ||||
"""Add contents to the objstorage. | """Add contents to the objstorage. | ||||
Show All 17 Lines |