diff --git a/setup.py b/setup.py index df591ba..b588838 100755 --- a/setup.py +++ b/setup.py @@ -1,70 +1,71 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.objstorage', description='Software Heritage Object Storage', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DOBJS', packages=find_packages(), + python_requires='>=3.6', install_requires=parse_requirements() + parse_requirements('swh'), setup_requires=['vcversioner'], extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, entry_points=''' [console_scripts] swh-objstorage=swh.objstorage.cli:main [swh.cli.subcommands] objstorage=swh.objstorage.cli:cli ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-objstorage', }, ) diff --git a/swh/__init__.py b/swh/__init__.py index f14e196..e780381 100644 --- a/swh/__init__.py +++ b/swh/__init__.py @@ -1,4 +1,4 @@ from pkgutil import extend_path from typing import Iterable -__path__ = extend_path(__path__, __name__) # type: Iterable[str] +__path__: Iterable[str] = extend_path(__path__, __name__) diff --git a/swh/objstorage/__init__.py b/swh/objstorage/__init__.py index 16a9918..373786c 100644 --- a/swh/objstorage/__init__.py +++ b/swh/objstorage/__init__.py @@ -1,109 +1,112 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.objstorage.objstorage import ObjStorage, ID_HASH_LENGTH # noqa from swh.objstorage.backends.pathslicing import PathSlicingObjStorage from swh.objstorage.backends.in_memory import InMemoryObjStorage from swh.objstorage.api.client import RemoteObjStorage from swh.objstorage.multiplexer import ( MultiplexerObjStorage, StripingObjStorage) from swh.objstorage.multiplexer.filter import add_filters from swh.objstorage.backends.seaweed import WeedObjStorage from swh.objstorage.backends.generator import RandomGeneratorObjStorage from typing import Callable, Dict, Union __all__ = ['get_objstorage', 'ObjStorage'] -_STORAGE_CLASSES = { +_STORAGE_CLASSES: Dict[ + str, + Union[type, Callable[..., type]] +] = { 'pathslicing': PathSlicingObjStorage, 'remote': RemoteObjStorage, 'memory': InMemoryObjStorage, 'weed': WeedObjStorage, 'random': RandomGeneratorObjStorage, -} # type: Dict[str, Union[type, Callable[..., type]]] +} _STORAGE_CLASSES_MISSING = { } try: from swh.objstorage.backends.azure import ( AzureCloudObjStorage, PrefixedAzureCloudObjStorage, ) _STORAGE_CLASSES['azure'] = AzureCloudObjStorage _STORAGE_CLASSES['azure-prefixed'] = PrefixedAzureCloudObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING['azure'] = e.args[0] _STORAGE_CLASSES_MISSING['azure-prefixed'] = e.args[0] try: from swh.objstorage.backends.rados import RADOSObjStorage _STORAGE_CLASSES['rados'] = RADOSObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING['rados'] = e.args[0] try: from swh.objstorage.backends.libcloud import ( AwsCloudObjStorage, OpenStackCloudObjStorage, ) _STORAGE_CLASSES['s3'] = AwsCloudObjStorage _STORAGE_CLASSES['swift'] = OpenStackCloudObjStorage except ImportError as e: _STORAGE_CLASSES_MISSING['s3'] = e.args[0] _STORAGE_CLASSES_MISSING['swift'] = e.args[0] def get_objstorage(cls, args): """ Create an ObjStorage using the given implementation class. Args: cls (str): objstorage class unique key contained in the _STORAGE_CLASSES dict. args (dict): arguments for the required class of objstorage that must match exactly the one in the `__init__` method of the class. Returns: subclass of ObjStorage that match the given `storage_class` argument. Raises: ValueError: if the given storage class is not a valid objstorage key. """ if cls in _STORAGE_CLASSES: return _STORAGE_CLASSES[cls](**args) else: raise ValueError('Storage class {} is not available: {}'.format( cls, _STORAGE_CLASSES_MISSING.get(cls, 'unknown name'))) def _construct_filtered_objstorage(storage_conf, filters_conf): return add_filters( get_objstorage(**storage_conf), filters_conf ) _STORAGE_CLASSES['filtered'] = _construct_filtered_objstorage def _construct_multiplexer_objstorage(objstorages): storages = [get_objstorage(**conf) for conf in objstorages] return MultiplexerObjStorage(storages) _STORAGE_CLASSES['multiplexer'] = _construct_multiplexer_objstorage def _construct_striping_objstorage(objstorages): storages = [get_objstorage(**conf) for conf in objstorages] return StripingObjStorage(storages) _STORAGE_CLASSES['striping'] = _construct_striping_objstorage diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py index 708c166..fc9bfd5 100644 --- a/swh/objstorage/tests/test_objstorage_azure.py +++ b/swh/objstorage/tests/test_objstorage_azure.py @@ -1,186 +1,186 @@ # Copyright (C) 2016-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from collections import defaultdict from unittest.mock import patch from typing import Any, Dict from azure.common import AzureMissingResourceHttpError from swh.model.hashutil import hash_to_hex from swh.objstorage import get_objstorage from swh.objstorage.objstorage import decompressors from swh.objstorage.exc import Error from .objstorage_testing import ObjStorageTestFixture class MockBlob(): """ Libcloud object mock that replicates its API """ def __init__(self, name, content): self.name = name self.content = content class MockBlockBlobService(): """Mock internal azure library which AzureCloudObjStorage depends upon. """ - _data = {} # type: Dict[str, Any] + _data: Dict[str, Any] = {} def __init__(self, account_name, account_key, **kwargs): # do not care for the account_name and the api_secret_key here self._data = defaultdict(dict) def get_container_properties(self, container_name): self._data[container_name] return container_name in self._data def create_blob_from_bytes(self, container_name, blob_name, blob): self._data[container_name][blob_name] = blob def get_blob_to_bytes(self, container_name, blob_name): if blob_name not in self._data[container_name]: raise AzureMissingResourceHttpError( 'Blob %s not found' % blob_name, 404) return MockBlob(name=blob_name, content=self._data[container_name][blob_name]) def delete_blob(self, container_name, blob_name): try: self._data[container_name].pop(blob_name) except KeyError: raise AzureMissingResourceHttpError( 'Blob %s not found' % blob_name, 404) return True def exists(self, container_name, blob_name): return blob_name in self._data[container_name] def list_blobs(self, container_name, marker=None, maxresults=None): for blob_name, content in sorted(self._data[container_name].items()): if marker is None or blob_name > marker: yield MockBlob(name=blob_name, content=content) class TestAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): compression = 'none' def setUp(self): super().setUp() patcher = patch( 'swh.objstorage.backends.azure.BlockBlobService', MockBlockBlobService, ) patcher.start() self.addCleanup(patcher.stop) self.storage = get_objstorage('azure', { 'account_name': 'account-name', 'api_secret_key': 'api-secret-key', 'container_name': 'container-name', 'compression': self.compression, }) def test_compression(self): content, obj_id = self.hash_content(b'test content is compressed') self.storage.add(content, obj_id=obj_id) blob_service, container = self.storage.get_blob_service(obj_id) internal_id = self.storage._internal_id(obj_id) raw_blob = blob_service.get_blob_to_bytes(container, internal_id) d = decompressors[self.compression]() assert d.decompress(raw_blob.content) == content assert d.unused_data == b'' def test_trailing_data_on_stored_blob(self): content, obj_id = self.hash_content(b'test content without garbage') self.storage.add(content, obj_id=obj_id) blob_service, container = self.storage.get_blob_service(obj_id) internal_id = self.storage._internal_id(obj_id) blob_service._data[container][internal_id] += b'trailing garbage' if self.compression == 'none': with self.assertRaises(Error) as e: self.storage.check(obj_id) else: with self.assertRaises(Error) as e: self.storage.get(obj_id) assert 'trailing data' in e.exception.args[0] class TestAzureCloudObjStorageGzip(TestAzureCloudObjStorage): compression = 'gzip' class TestAzureCloudObjStorageZlib(TestAzureCloudObjStorage): compression = 'zlib' class TestAzureCloudObjStorageLzma(TestAzureCloudObjStorage): compression = 'lzma' class TestAzureCloudObjStorageBz2(TestAzureCloudObjStorage): compression = 'bz2' class TestPrefixedAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): def setUp(self): super().setUp() patcher = patch( 'swh.objstorage.backends.azure.BlockBlobService', MockBlockBlobService, ) patcher.start() self.addCleanup(patcher.stop) self.accounts = {} for prefix in '0123456789abcdef': self.accounts[prefix] = { 'account_name': 'account_%s' % prefix, 'api_secret_key': 'secret_key_%s' % prefix, 'container_name': 'container_%s' % prefix, } self.storage = get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_instantiation_missing_prefixes(self): del self.accounts['d'] del self.accounts['e'] with self.assertRaisesRegex(ValueError, 'Missing prefixes'): get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_instantiation_inconsistent_prefixes(self): self.accounts['00'] = self.accounts['0'] with self.assertRaisesRegex(ValueError, 'Inconsistent prefixes'): get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_sharding_behavior(self): for i in range(100): content, obj_id = self.hash_content(b'test_content_%02d' % i) self.storage.add(content, obj_id=obj_id) hex_obj_id = hash_to_hex(obj_id) prefix = hex_obj_id[0] self.assertTrue( self.storage.prefixes[prefix][0].exists( self.accounts[prefix]['container_name'], hex_obj_id ))