diff --git a/PKG-INFO b/PKG-INFO index c5bcb1e..73486eb 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.objstorage -Version: 0.0.7 +Version: 0.0.8 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index 5264bc1..b97b2ba 100644 --- a/debian/control +++ b/debian/control @@ -1,29 +1,37 @@ Source: swh-objstorage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python, python3-all, python3-flask, python3-nose, python3-requests, python3-setuptools, python3-swh.core (>= 0.0.20~), python3-swh.storage.archiver (>= 0.0.52~), python3-click, + python3-libcloud, python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DOBJS/ Package: python3-swh.objstorage Architecture: all Depends: python3-swh.core (>= 0.0.20~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Object Storage Package: python3-swh.objstorage.checker Architecture: all -Depends: python3-swh.objstorage (= ${binary:Version}), python3-swh.core (>= 0.0.20~), python3-swh.storage.archiver (>= 0.0.52~), +Depends: python3-swh.objstorage (= ${binary:Version}), python3-swh.storage.archiver (>= 0.0.52~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Object Storage Checker +Package: python3-swh.objstorage.cloud +Architecture: all +Depends: python3-swh.objstorage (= ${binary:Version}), python3-libcloud, + ${misc:Depends}, ${python3:Depends} +Breaks: python3-swh.objstorage (<= 0.0.7~) +Description: Software Heritage Cloud Object Storage + diff --git a/debian/rules b/debian/rules index 0ea9d41..144f9d4 100755 --- a/debian/rules +++ b/debian/rules @@ -1,17 +1,20 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.objstorage %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install for pyvers in $(shell py3versions -vr); do \ mkdir -p $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/checker.py \ $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ + mkdir -p $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud ; \ + mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/* \ + $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/ ; \ done override_dh_auto_test: diff --git a/requirements.txt b/requirements.txt index f31c469..4df4188 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,17 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html vcversioner # remote storage API client requests # remote storage API server flask # Internal dependencies swh.core >= 0.0.20 swh.storage.archiver >= 0.0.52 click + +apache-libcloud diff --git a/swh.objstorage.egg-info/PKG-INFO b/swh.objstorage.egg-info/PKG-INFO index c5bcb1e..73486eb 100644 --- a/swh.objstorage.egg-info/PKG-INFO +++ b/swh.objstorage.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.objstorage -Version: 0.0.7 +Version: 0.0.8 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.objstorage.egg-info/SOURCES.txt b/swh.objstorage.egg-info/SOURCES.txt index 11dea39..8f38ae2 100644 --- a/swh.objstorage.egg-info/SOURCES.txt +++ b/swh.objstorage.egg-info/SOURCES.txt @@ -1,46 +1,47 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile requirements.txt setup.py version.txt bin/swh-objstorage-add-dir bin/swh-objstorage-fsck debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format swh.objstorage.egg-info/PKG-INFO swh.objstorage.egg-info/SOURCES.txt swh.objstorage.egg-info/dependency_links.txt swh.objstorage.egg-info/requires.txt swh.objstorage.egg-info/top_level.txt swh/objstorage/__init__.py swh/objstorage/checker.py swh/objstorage/exc.py swh/objstorage/objstorage.py swh/objstorage/objstorage_pathslicing.py swh/objstorage/api/__init__.py swh/objstorage/api/client.py swh/objstorage/api/common.py swh/objstorage/api/server.py swh/objstorage/cloud/__init__.py swh/objstorage/cloud/objstorage_cloud.py swh/objstorage/multiplexer/__init__.py swh/objstorage/multiplexer/multiplexer_objstorage.py swh/objstorage/multiplexer/filter/__init__.py swh/objstorage/multiplexer/filter/filter.py swh/objstorage/multiplexer/filter/id_filter.py swh/objstorage/multiplexer/filter/read_write_filter.py swh/objstorage/tests/objstorage_testing.py swh/objstorage/tests/server_testing.py swh/objstorage/tests/test_checker.py swh/objstorage/tests/test_multiplexer_filter.py swh/objstorage/tests/test_objstorage_api.py +swh/objstorage/tests/test_objstorage_cloud.py swh/objstorage/tests/test_objstorage_instantiation.py swh/objstorage/tests/test_objstorage_multiplexer.py swh/objstorage/tests/test_objstorage_pathslicing.py \ No newline at end of file diff --git a/swh.objstorage.egg-info/requires.txt b/swh.objstorage.egg-info/requires.txt index 921badb..5b39dd3 100644 --- a/swh.objstorage.egg-info/requires.txt +++ b/swh.objstorage.egg-info/requires.txt @@ -1,6 +1,7 @@ +apache-libcloud click flask requests swh.core>=0.0.20 swh.storage.archiver>=0.0.52 vcversioner diff --git a/swh/objstorage/cloud/__init__.py b/swh/objstorage/cloud/__init__.py index bee9658..f9965e4 100644 --- a/swh/objstorage/cloud/__init__.py +++ b/swh/objstorage/cloud/__init__.py @@ -1,3 +1,3 @@ -from objstorage_cloud import AwsCloudObjStorage, OpenStackCloudObjStorage +from .objstorage_cloud import AwsCloudObjStorage, OpenStackCloudObjStorage __all__ = ['AwsCloudObjStorage', 'OpenStackCloudObjStorage'] diff --git a/swh/objstorage/cloud/objstorage_cloud.py b/swh/objstorage/cloud/objstorage_cloud.py index b9384a7..6d169d6 100644 --- a/swh/objstorage/cloud/objstorage_cloud.py +++ b/swh/objstorage/cloud/objstorage_cloud.py @@ -1,150 +1,158 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc -from ..objstorage import ObjStorage, compute_hash -from ..exc import ObjNotFoundError, Error - from swh.core import hashutil +from swh.objstorage.objstorage import ObjStorage, compute_hash +from swh.objstorage.exc import ObjNotFoundError, Error from libcloud.storage import providers from libcloud.storage.types import Provider, ObjectDoesNotExistError class CloudObjStorage(ObjStorage, metaclass=abc.ABCMeta): - """ Abstract ObjStorage that allows connection to a cloud using Libcloud + """Abstract ObjStorage that connect to a cloud using Libcloud - Implementations of this class must redefine the _get_provider method to - make it return a driver provider (i.e. object that supports `get_driver` - method) which return a LibCloud driver - (see https://libcloud.readthedocs.io/en/latest/storage/api.html). - """ + Implementations of this class must redefine the _get_provider + method to make it return a driver provider (i.e. object that + supports `get_driver` method) which return a LibCloud driver (see + https://libcloud.readthedocs.io/en/latest/storage/api.html). + """ def __init__(self, api_key, api_secret_key, container_name): self.driver = self._get_driver(api_key, api_secret_key) self.container_name = container_name self.container = self.driver.get_container( container_name=container_name) def _get_driver(self, api_key, api_secret_key): - """ Initialize a driver to communicate with the cloud + """Initialize a driver to communicate with the cloud Args: api_key: key to connect to the API. api_secret_key: secret key for authentification. Returns: a Libcloud driver to a cloud storage. + """ # Get the driver class from its description. cls = providers.get_driver(self._get_provider()) # Initialize the driver. return cls(api_key, api_secret_key) @abc.abstractmethod def _get_provider(self): - """ Get a libcloud driver provider + """Get a libcloud driver provider + + This method must be overriden by subclasses to specify which + of the native libcloud driver the current storage should + connect to. Alternatively, provider for a custom driver may + be returned, in which case the provider will have tu support + `get_driver` method. - This method must be overriden by subclasses to specify which of the - native libcloud driver the current storage should connect to. - Alternatively, provider for a custom driver may be returned, in which - case the provider will have tu support `get_driver` method. """ raise NotImplementedError('%s must implement `get_provider` method' % type(self)) def __contains__(self, obj_id): try: self._get_object(obj_id) - except ObjectDoesNotExistError: + except ObjNotFoundError: return False else: return True def __iter__(self): """ Iterate over the objects present in the storage Warning: Iteration over the contents of a cloud-based object storage may have bad efficiency: due to the very high amount of objects in it and the fact that it is remote, get all the contents of the current object storage may result in a lot of network requests. You almost certainly don't want to use this method in production. """ yield from map(lambda obj: obj.name, self.driver.iterate_container_objects(self.container)) def __len__(self): - """ Compute the number of objects in the current object storage. + """Compute the number of objects in the current object storage. Warning: this currently uses `__iter__`, its warning about bad performance applies. Returns: number of objects contained in the storage. + """ return sum(1 for i in self) def add(self, content, obj_id=None, check_presence=True): if obj_id is None: # Checksum is missing, compute it on the fly. obj_id = compute_hash(content) if check_presence and obj_id in self: return obj_id self._put_object(content, obj_id) return obj_id def restore(self, content, obj_id=None): - return self.add(content, obj_id, chech_presence=False) + return self.add(content, obj_id, check_presence=False) def get(self, obj_id): return bytes(self._get_object(obj_id).as_stream()) def check(self, obj_id): # Check that the file exists, as _get_object raises ObjNotFoundError self._get_object(obj_id) # Check the content integrity obj_content = self.get(obj_id) content_obj_id = compute_hash(obj_content) if content_obj_id != obj_id: raise Error(obj_id) def _get_object(self, obj_id): - """ Get a Libcloud wrapper for an object pointer. + """Get a Libcloud wrapper for an object pointer. + + This wrapper does not retrieve the content of the object + directly. - This wrapper does not retrieve the content of the object direclty. """ hex_obj_id = hashutil.hash_to_hex(obj_id) try: return self.driver.get_object(self.container_name, hex_obj_id) except ObjectDoesNotExistError as e: raise ObjNotFoundError(e.object_name) def _put_object(self, content, obj_id): - """ Create an object in the cloud storage. + """Create an object in the cloud storage. + + Created object will contains the content and be referenced by + the given id. - Created object will contains the content and be referenced by the - given id. """ hex_obj_id = hashutil.hash_to_hex(obj_id) self.driver.upload_object_via_stream(iter(content), self.container, hex_obj_id) class AwsCloudObjStorage(CloudObjStorage): - """ Cloud-based object storage that works with Amazon's S3 + """ Amazon's S3 Cloud-based object storage + """ def _get_provider(self): return Provider.S3 class OpenStackCloudObjStorage(CloudObjStorage): - """ Cloud-based object storage based on OpenStack Swift + """ OpenStack Swift Cloud based object storage + """ def _get_provider(self): return Provider.OPENSTACK_SWIFT diff --git a/swh/objstorage/tests/test_objstorage_cloud.py b/swh/objstorage/tests/test_objstorage_cloud.py new file mode 100644 index 0000000..7715032 --- /dev/null +++ b/swh/objstorage/tests/test_objstorage_cloud.py @@ -0,0 +1,87 @@ +# Copyright (C) 2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from swh.objstorage.cloud.objstorage_cloud import CloudObjStorage +from libcloud.storage.types import (ObjectDoesNotExistError, + ContainerDoesNotExistError) +from libcloud.common.types import InvalidCredsError + +from objstorage_testing import ObjStorageTestFixture + + +API_KEY = 'API_KEY' +API_SECRET_KEY = 'API SECRET KEY' +CONTAINER_NAME = 'test_container' + + +class MockLibcloudObject(): + """ Libcloud object mock that replicates its API """ + def __init__(self, name, content): + self.name = name + self.content = list(content) + + def as_stream(self): + yield from iter(self.content) + + +class MockLibcloudDriver(): + """ Mock driver that replicates the used LibCloud API """ + def __init__(self, api_key, api_secret_key): + self.containers = {CONTAINER_NAME: {}} # Storage is initialized + self.api_key = api_key + self.api_secret_key = api_secret_key + + def _check_credentials(self): + # Private method may be known as another name in Libcloud but is used + # to replicate libcloud behavior (i.e. check credential at each + # request) + if self.api_key != API_KEY or self.api_secret_key != API_SECRET_KEY: + raise InvalidCredsError() + + def get_container(self, container_name): + try: + return self.containers[container_name] + except KeyError: + raise ContainerDoesNotExistError(container_name=container_name, + driver=self, value=None) + + def iterate_container_objects(self, container): + self._check_credentials() + yield from container.values() + + def get_object(self, container_name, obj_id): + self._check_credentials() + try: + container = self.get_container(container_name) + return container[obj_id] + except KeyError: + raise ObjectDoesNotExistError(object_name=obj_id, + driver=self, value=None) + + def upload_object_via_stream(self, content, container, obj_id): + self._check_credentials() + obj = MockLibcloudObject(obj_id, content) + container[obj_id] = obj + + +class MockCloudObjStorage(CloudObjStorage): + """ Cloud object storage that uses a mocked driver """ + def _get_driver(self, api_key, api_secret_key): + return MockLibcloudDriver(api_key, api_secret_key) + + def _get_provider(self): + # Implement this for the abc requirement, but behavior is defined in + # _get_driver. + pass + + +class TestCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): + + def setUp(self): + super().setUp() + self.storage = MockCloudObjStorage(API_KEY, API_SECRET_KEY, + CONTAINER_NAME) diff --git a/version.txt b/version.txt index d0c7b00..79d2ebe 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.7-0-gd729254 \ No newline at end of file +v0.0.8-0-gea7ba18 \ No newline at end of file