diff --git a/swh/objstorage/cloud/__init__.py b/swh/objstorage/cloud/__init__.py new file mode 100644 --- /dev/null +++ b/swh/objstorage/cloud/__init__.py @@ -0,0 +1,3 @@ +from objstorage_cloud import AwsCloudObjStorage, OpenStackCloudObjStorage + +__all__ = ['AwsCloudObjStorage', 'OpenStackCloudObjStorage'] diff --git a/swh/objstorage/cloud/objstorage_cloud.py b/swh/objstorage/cloud/objstorage_cloud.py new file mode 100644 --- /dev/null +++ b/swh/objstorage/cloud/objstorage_cloud.py @@ -0,0 +1,186 @@ +# Copyright (C) 2015-2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from ..objstorage import ObjStorage, ID_HASH_ALGO +from ..exc import ObjNotFoundError + +from swh.core import hashutil + +from libcloud.storage import providers +from libcloud.storage.types import Provider, ObjectDoesNotExistError + + +class CloudObjStorage(ObjStorage): + """ Abstract ObjStorage that allows connection to a cloud using Libcloud + + Implementations that inherit this class should redefine the + _get_driver or the _get_provider method in order to return the correct + object needed to use the cloud storage. + """ + + def __init__(self, api_key, api_secret_key, container_name): + self.driver = self._get_driver(api_key, api_secret_key) + self.container_name = container_name + self.container = self.driver.get_container( + container_name=container_name) + + def _get_driver(self, api_key, api_secret_key): + """ Initialize a driver to communicate with the cloud + + This method may be overriden if a custom driver must be provided. + """ + # Get the driver class from its description. + cls = providers.get_driver(self._get_provider()) + # Initialize the driver. + return cls(api_key, api_secret_key) + + def _get_provider(self): + """ Get a driver class description + + This method must be overriden by subclasses to specify which of the + native libcloud driver the current storage should connect to. + """ + raise NotImplementedError('%s class must define a _get_provider ' + 'method to use a native libcloud storage' + % type(self)) + + def __contains__(self, obj_id): + try: + self._get_object(obj_id) + except ObjectDoesNotExistError: + return False + else: + return True + + def __iter__(self): + """ Warning: Iterate over the contents of a cloud-based object storage + may have bad efficiency. + """ + yield from map(lambda obj: obj.name, + self.driver.iterate_container_objects(self.container)) + + def __len__(self): + return sum(1 for i in self) + + def add(self, content, obj_id=None, check_presence=True): + """ Add a new object to the object storage. + + Args: + content: content of the object to be added to the storage. + obj_id: checksum of [bytes] using [ID_HASH_ALGO] algorithm. When + given, obj_id will be trusted to match the bytes. If missing, + obj_id will be computed on the fly. + check_presence: indicate if the presence of the content should be + verified before adding the file. + + Returns: + the id of the object into the storage. + """ + if obj_id is None: + # Checksum is missing, compute it on the fly. + h = hashutil._new_hash(ID_HASH_ALGO, len(bytes)) + h.update(bytes) + obj_id = h.digest() + + if check_presence and obj_id in self: + return obj_id + + self._put_object(content, obj_id) + return obj_id + + def restore(self, content, obj_id=None): + """ Restore a content that have been corrupted. + + This function is identical to add_bytes but does not check if + the object id is already in the file system. + + Args: + content: content of the object to be added to the storage + obj_id: checksums of `bytes` as computed by ID_HASH_ALGO. When + given, obj_id will be trusted to match bytes. If missing, + obj_id will be computed on the fly. + """ + return self.add(content, obj_id, chech_presence=False) + + def get(self, obj_id): + """ Retrieve the content of a given object. + + Args: + obj_id: object id. + + Returns: + the content of the requested object as bytes. + + Raises: + ObjNotFoundError: if the requested object is missing. + """ + # FIXME check the type of as_stream and verify that it is usable + # as a bytes-like object. + # Otherwise, verify that it is correctly converted with bytes(). + return bytes(self._get_object(obj_id).as_stream()) + + def check(self, obj_id): + """ Perform an integrity check for a given object. + + Verify that the file object is in place and that the gziped content + matches the object id. + + Args: + obj_id: object id. + + Raises: + ObjNotFoundError: if the requested object is missing. + Error: if the request object is corrupted. + """ + # Check that the file exists, as _get_object raises ObjNotFoundError + self._get_object(obj_id) + # The file integrity is guaranted by the cloud. + + def get_random(self, batch_size): + """ Get random ids of existing contents + + This method is used in order to get random ids to perform + content integrity verifications on random contents. + + Attributes: + batch_size (int): Number of ids that will be given + + Yields: + An iterable of ids of contents that are in the current object + storage. + """ + # Cloud storages doesnot use the get_random operation. + raise NotImplementedError( + "The current implementation of ObjStorage does not support " + "'get_random' operation" + ) + + def _get_object(self, obj_id): + hex_obj_id = hashutil.hash_to_hex(obj_id) + try: + return self.driver.get_object(self.container_name, hex_obj_id) + except ObjectDoesNotExistError as e: + raise ObjNotFoundError(e.object_name) + + def _put_object(self, content, obj_id): + hex_obj_id = hashutil.hash_to_hex(obj_id) + self.driver.upload_object_via_stream(iter(content), self.container, + hex_obj_id) + + +def AwsCloudObjStorage(CloudObjStorage): + """ Cloud-based object storage that works with Amazon's S3 + """ + def _get_provider(self): + """ Return a libcloud storage driver to Amazon's S3 + """ + return Provider.S3 + + +def OpenStackCloudObjStorage(CloudObjStorage): + """ Cloud-based object storage based on OpenStack Swift + """ + def _get_provider(self): + return Provider.OPENSTACK_SWIFT