Changeset View
Changeset View
Standalone View
Standalone View
swh/objstorage/cloud/objstorage_cloud.py
Show All 9 Lines | |||||
from swh.core import hashutil | from swh.core import hashutil | ||||
from libcloud.storage import providers | from libcloud.storage import providers | ||||
from libcloud.storage.types import Provider, ObjectDoesNotExistError | from libcloud.storage.types import Provider, ObjectDoesNotExistError | ||||
class CloudObjStorage(ObjStorage, metaclass=abc.ABCMeta): | class CloudObjStorage(ObjStorage, metaclass=abc.ABCMeta): | ||||
""" Abstract ObjStorage that allows connection to a cloud using Libcloud | """Abstract ObjStorage that connect to a cloud using Libcloud | ||||
Implementations of this class must redefine the _get_provider method to | Implementations of this class must redefine the _get_provider | ||||
make it return a driver provider (i.e. object that supports `get_driver` | method to make it return a driver provider (i.e. object that | ||||
method) which return a LibCloud driver | supports `get_driver` method) which return a LibCloud driver (see | ||||
(see https://libcloud.readthedocs.io/en/latest/storage/api.html). | https://libcloud.readthedocs.io/en/latest/storage/api.html). | ||||
""" | |||||
""" | |||||
def __init__(self, api_key, api_secret_key, container_name): | def __init__(self, api_key, api_secret_key, container_name): | ||||
self.driver = self._get_driver(api_key, api_secret_key) | self.driver = self._get_driver(api_key, api_secret_key) | ||||
self.container_name = container_name | self.container_name = container_name | ||||
self.container = self.driver.get_container( | self.container = self.driver.get_container( | ||||
container_name=container_name) | container_name=container_name) | ||||
def _get_driver(self, api_key, api_secret_key): | def _get_driver(self, api_key, api_secret_key): | ||||
""" Initialize a driver to communicate with the cloud | """Initialize a driver to communicate with the cloud | ||||
Args: | Args: | ||||
api_key: key to connect to the API. | api_key: key to connect to the API. | ||||
api_secret_key: secret key for authentification. | api_secret_key: secret key for authentification. | ||||
Returns: | Returns: | ||||
a Libcloud driver to a cloud storage. | a Libcloud driver to a cloud storage. | ||||
""" | """ | ||||
# Get the driver class from its description. | # Get the driver class from its description. | ||||
cls = providers.get_driver(self._get_provider()) | cls = providers.get_driver(self._get_provider()) | ||||
# Initialize the driver. | # Initialize the driver. | ||||
return cls(api_key, api_secret_key) | return cls(api_key, api_secret_key) | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def _get_provider(self): | def _get_provider(self): | ||||
""" Get a libcloud driver provider | """Get a libcloud driver provider | ||||
This method must be overriden by subclasses to specify which of the | This method must be overriden by subclasses to specify which | ||||
native libcloud driver the current storage should connect to. | of the native libcloud driver the current storage should | ||||
Alternatively, provider for a custom driver may be returned, in which | connect to. Alternatively, provider for a custom driver may | ||||
case the provider will have to support `get_driver` method. | be returned, in which case the provider will have tu support | ||||
ardumont: have t`o` support | |||||
`get_driver` method. | |||||
""" | """ | ||||
raise NotImplementedError('%s must implement `get_provider` method' | raise NotImplementedError('%s must implement `get_provider` method' | ||||
% type(self)) | % type(self)) | ||||
def __contains__(self, obj_id): | def __contains__(self, obj_id): | ||||
try: | try: | ||||
self._get_object(obj_id) | self._get_object(obj_id) | ||||
except ObjectDoesNotExistError: | except ObjNotFoundError: | ||||
return False | return False | ||||
else: | else: | ||||
return True | return True | ||||
def __iter__(self): | def __iter__(self): | ||||
""" Iterate over the objects present in the storage | """ Iterate over the objects present in the storage | ||||
Warning: Iteration over the contents of a cloud-based object storage | Warning: Iteration over the contents of a cloud-based object storage | ||||
may have bad efficiency: due to the very high amount of objects in it | may have bad efficiency: due to the very high amount of objects in it | ||||
and the fact that it is remote, get all the contents of the current | and the fact that it is remote, get all the contents of the current | ||||
object storage may result in a lot of network requests. | object storage may result in a lot of network requests. | ||||
You almost certainly don't want to use this method in production. | You almost certainly don't want to use this method in production. | ||||
""" | """ | ||||
yield from map(lambda obj: obj.name, | yield from map(lambda obj: obj.name, | ||||
self.driver.iterate_container_objects(self.container)) | self.driver.iterate_container_objects(self.container)) | ||||
def __len__(self): | def __len__(self): | ||||
""" Compute the number of objects in the current object storage. | """Compute the number of objects in the current object storage. | ||||
Warning: this currently uses `__iter__`, its warning about bad | Warning: this currently uses `__iter__`, its warning about bad | ||||
performance applies. | performance applies. | ||||
Returns: | Returns: | ||||
number of objects contained in the storage. | number of objects contained in the storage. | ||||
""" | """ | ||||
return sum(1 for i in self) | return sum(1 for i in self) | ||||
def add(self, content, obj_id=None, check_presence=True): | def add(self, content, obj_id=None, check_presence=True): | ||||
if obj_id is None: | if obj_id is None: | ||||
# Checksum is missing, compute it on the fly. | # Checksum is missing, compute it on the fly. | ||||
obj_id = compute_hash(content) | obj_id = compute_hash(content) | ||||
if check_presence and obj_id in self: | if check_presence and obj_id in self: | ||||
return obj_id | return obj_id | ||||
self._put_object(content, obj_id) | self._put_object(content, obj_id) | ||||
return obj_id | return obj_id | ||||
def restore(self, content, obj_id=None): | def restore(self, content, obj_id=None): | ||||
return self.add(content, obj_id, chech_presence=False) | return self.add(content, obj_id, check_presence=False) | ||||
def get(self, obj_id): | def get(self, obj_id): | ||||
return bytes(self._get_object(obj_id).as_stream()) | return bytes(self._get_object(obj_id).as_stream()) | ||||
def check(self, obj_id): | def check(self, obj_id): | ||||
# Check that the file exists, as _get_object raises ObjNotFoundError | # Check that the file exists, as _get_object raises ObjNotFoundError | ||||
self._get_object(obj_id) | self._get_object(obj_id) | ||||
# Check the content integrity | # Check the content integrity | ||||
obj_content = self.get(obj_id) | obj_content = self.get(obj_id) | ||||
content_obj_id = compute_hash(obj_content) | content_obj_id = compute_hash(obj_content) | ||||
if content_obj_id != obj_id: | if content_obj_id != obj_id: | ||||
raise Error(obj_id) | raise Error(obj_id) | ||||
def _get_object(self, obj_id): | def _get_object(self, obj_id): | ||||
""" Get a Libcloud wrapper for an object pointer. | """Get a Libcloud wrapper for an object pointer. | ||||
This wrapper does not retrieve the content of the object directly. | This wrapper does not retrieve the content of the object | ||||
directly. | |||||
""" | """ | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
try: | try: | ||||
return self.driver.get_object(self.container_name, hex_obj_id) | return self.driver.get_object(self.container_name, hex_obj_id) | ||||
except ObjectDoesNotExistError as e: | except ObjectDoesNotExistError as e: | ||||
raise ObjNotFoundError(e.object_name) | raise ObjNotFoundError(e.object_name) | ||||
def _put_object(self, content, obj_id): | def _put_object(self, content, obj_id): | ||||
""" Create an object in the cloud storage. | """Create an object in the cloud storage. | ||||
Not Done Inline Actionswill contain ardumont: `will contain` | |||||
Created object will contain the content and be referenced by the | Created object will contains the content and be referenced by | ||||
given id. | the given id. | ||||
""" | """ | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
self.driver.upload_object_via_stream(iter(content), self.container, | self.driver.upload_object_via_stream(iter(content), self.container, | ||||
hex_obj_id) | hex_obj_id) | ||||
class AwsCloudObjStorage(CloudObjStorage): | class AwsCloudObjStorage(CloudObjStorage): | ||||
""" Amazon's S3 Cloud-based object storage | """ Amazon's S3 Cloud-based object storage | ||||
""" | """ | ||||
def _get_provider(self): | def _get_provider(self): | ||||
return Provider.S3 | return Provider.S3 | ||||
class OpenStackCloudObjStorage(CloudObjStorage): | class OpenStackCloudObjStorage(CloudObjStorage): | ||||
""" OpenStack Swift Cloud based object storage | """ OpenStack Swift Cloud based object storage | ||||
""" | """ | ||||
def _get_provider(self): | def _get_provider(self): | ||||
return Provider.OPENSTACK_SWIFT | return Provider.OPENSTACK_SWIFT |
have t`o` support