diff --git a/debian/control b/debian/control index 6ba9e89..b97b2ba 100644 --- a/debian/control +++ b/debian/control @@ -1,30 +1,37 @@ Source: swh-objstorage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python, python3-all, python3-flask, python3-nose, python3-requests, python3-setuptools, python3-swh.core (>= 0.0.20~), python3-swh.storage.archiver (>= 0.0.52~), python3-click, python3-libcloud, python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DOBJS/ Package: python3-swh.objstorage Architecture: all Depends: python3-swh.core (>= 0.0.20~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Object Storage Package: python3-swh.objstorage.checker Architecture: all Depends: python3-swh.objstorage (= ${binary:Version}), python3-swh.storage.archiver (>= 0.0.52~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Object Storage Checker +Package: python3-swh.objstorage.cloud +Architecture: all +Depends: python3-swh.objstorage (= ${binary:Version}), python3-libcloud, + ${misc:Depends}, ${python3:Depends} +Breaks: python3-swh.objstorage (<= 0.0.7~) +Description: Software Heritage Cloud Object Storage + diff --git a/debian/rules b/debian/rules index 0ea9d41..144f9d4 100755 --- a/debian/rules +++ b/debian/rules @@ -1,17 +1,20 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.objstorage %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install for pyvers in $(shell py3versions -vr); do \ mkdir -p $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/checker.py \ $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ + mkdir -p $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud ; \ + mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/* \ + $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/ ; \ done override_dh_auto_test: diff --git a/swh/objstorage/cloud/objstorage_cloud.py b/swh/objstorage/cloud/objstorage_cloud.py index f5b6015..6d169d6 100644 --- a/swh/objstorage/cloud/objstorage_cloud.py +++ b/swh/objstorage/cloud/objstorage_cloud.py @@ -1,159 +1,158 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc -from ..objstorage import ObjStorage, compute_hash -from ..exc import ObjNotFoundError, Error - from swh.core import hashutil +from swh.objstorage.objstorage import ObjStorage, compute_hash +from swh.objstorage.exc import ObjNotFoundError, Error from libcloud.storage import providers from libcloud.storage.types import Provider, ObjectDoesNotExistError class CloudObjStorage(ObjStorage, metaclass=abc.ABCMeta): """Abstract ObjStorage that connect to a cloud using Libcloud Implementations of this class must redefine the _get_provider method to make it return a driver provider (i.e. object that supports `get_driver` method) which return a LibCloud driver (see https://libcloud.readthedocs.io/en/latest/storage/api.html). """ def __init__(self, api_key, api_secret_key, container_name): self.driver = self._get_driver(api_key, api_secret_key) self.container_name = container_name self.container = self.driver.get_container( container_name=container_name) def _get_driver(self, api_key, api_secret_key): """Initialize a driver to communicate with the cloud Args: api_key: key to connect to the API. api_secret_key: secret key for authentification. Returns: a Libcloud driver to a cloud storage. """ # Get the driver class from its description. cls = providers.get_driver(self._get_provider()) # Initialize the driver. return cls(api_key, api_secret_key) @abc.abstractmethod def _get_provider(self): """Get a libcloud driver provider This method must be overriden by subclasses to specify which of the native libcloud driver the current storage should connect to. Alternatively, provider for a custom driver may be returned, in which case the provider will have tu support `get_driver` method. """ raise NotImplementedError('%s must implement `get_provider` method' % type(self)) def __contains__(self, obj_id): try: self._get_object(obj_id) except ObjNotFoundError: return False else: return True def __iter__(self): """ Iterate over the objects present in the storage Warning: Iteration over the contents of a cloud-based object storage may have bad efficiency: due to the very high amount of objects in it and the fact that it is remote, get all the contents of the current object storage may result in a lot of network requests. You almost certainly don't want to use this method in production. """ yield from map(lambda obj: obj.name, self.driver.iterate_container_objects(self.container)) def __len__(self): """Compute the number of objects in the current object storage. Warning: this currently uses `__iter__`, its warning about bad performance applies. Returns: number of objects contained in the storage. """ return sum(1 for i in self) def add(self, content, obj_id=None, check_presence=True): if obj_id is None: # Checksum is missing, compute it on the fly. obj_id = compute_hash(content) if check_presence and obj_id in self: return obj_id self._put_object(content, obj_id) return obj_id def restore(self, content, obj_id=None): return self.add(content, obj_id, check_presence=False) def get(self, obj_id): return bytes(self._get_object(obj_id).as_stream()) def check(self, obj_id): # Check that the file exists, as _get_object raises ObjNotFoundError self._get_object(obj_id) # Check the content integrity obj_content = self.get(obj_id) content_obj_id = compute_hash(obj_content) if content_obj_id != obj_id: raise Error(obj_id) def _get_object(self, obj_id): """Get a Libcloud wrapper for an object pointer. This wrapper does not retrieve the content of the object directly. """ hex_obj_id = hashutil.hash_to_hex(obj_id) try: return self.driver.get_object(self.container_name, hex_obj_id) except ObjectDoesNotExistError as e: raise ObjNotFoundError(e.object_name) def _put_object(self, content, obj_id): """Create an object in the cloud storage. Created object will contains the content and be referenced by the given id. """ hex_obj_id = hashutil.hash_to_hex(obj_id) self.driver.upload_object_via_stream(iter(content), self.container, hex_obj_id) class AwsCloudObjStorage(CloudObjStorage): """ Amazon's S3 Cloud-based object storage """ def _get_provider(self): return Provider.S3 class OpenStackCloudObjStorage(CloudObjStorage): """ OpenStack Swift Cloud based object storage """ def _get_provider(self): return Provider.OPENSTACK_SWIFT