diff --git a/swh/objstorage/backends/azure.py b/swh/objstorage/backends/azure.py --- a/swh/objstorage/backends/azure.py +++ b/swh/objstorage/backends/azure.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import gzip import string from itertools import dropwhile, islice, product @@ -11,7 +10,8 @@ from azure.common import AzureMissingResourceHttpError import requests -from swh.objstorage.objstorage import ObjStorage, compute_hash, DEFAULT_LIMIT +from swh.objstorage.objstorage import (ObjStorage, compute_hash, DEFAULT_LIMIT, + compressors, decompressors) from swh.objstorage.exc import ObjNotFoundError, Error from swh.model import hashutil @@ -20,7 +20,8 @@ """ObjStorage with azure abilities. """ - def __init__(self, account_name, api_secret_key, container_name, **kwargs): + def __init__(self, account_name, api_secret_key, container_name, + compression='gzip', **kwargs): super().__init__(**kwargs) self.block_blob_service = BlockBlobService( account_name=account_name, @@ -28,6 +29,7 @@ request_session=requests.Session(), ) self.container_name = container_name + self.compression = compression def get_blob_service(self, hex_obj_id): """Get the block_blob_service and container that contains the object with @@ -96,12 +98,16 @@ hex_obj_id = self._internal_id(obj_id) - # Send the gzipped content + # Send the compressed content + compressor = compressors[self.compression]() + blob = [compressor.compress(content), compressor.flush()] + service, container = self.get_blob_service(hex_obj_id) service.create_blob_from_bytes( container_name=container, blob_name=hex_obj_id, - blob=gzip.compress(content)) + blob=b''.join(blob), + ) return obj_id @@ -124,7 +130,11 @@ except AzureMissingResourceHttpError: raise ObjNotFoundError(obj_id) - return gzip.decompress(blob.content) + decompressor = decompressors[self.compression]() + ret = decompressor.decompress(blob.content) + if decompressor.unused_data: + raise Error('Corrupt object %s: trailing data found' % hex_obj_id) + return ret def check(self, obj_id): """Check the content integrity. @@ -177,10 +187,12 @@ api_secret_key: container_name: """ - def __init__(self, accounts, **kwargs): + def __init__(self, accounts, compression='gzip', **kwargs): # shortcut AzureCloudObjStorage __init__ ObjStorage.__init__(self, **kwargs) + self.compression = compression + # Definition sanity check prefix_lengths = set(len(prefix) for prefix in accounts) if not len(prefix_lengths) == 1: diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py --- a/swh/objstorage/tests/test_objstorage_azure.py +++ b/swh/objstorage/tests/test_objstorage_azure.py @@ -7,11 +7,13 @@ from collections import defaultdict from unittest.mock import patch -from typing import Any, Dict +from typing import Any, Dict, Optional from azure.common import AzureMissingResourceHttpError from swh.model.hashutil import hash_to_hex + from swh.objstorage import get_objstorage +from swh.objstorage.objstorage import decompressors from .objstorage_testing import ObjStorageTestFixture @@ -66,6 +68,7 @@ class TestAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): + compression = None # type: Optional[str] def setUp(self): super().setUp() @@ -80,8 +83,38 @@ 'account_name': 'account-name', 'api_secret_key': 'api-secret-key', 'container_name': 'container-name', + 'compression': self.compression, }) + def test_compression(self): + content, obj_id = self.hash_content(b'test content is compressed') + self.storage.add(content, obj_id=obj_id) + + blob_service, container = self.storage.get_blob_service(obj_id) + internal_id = self.storage._internal_id(obj_id) + + raw_blob = blob_service.get_blob_to_bytes(container, internal_id) + + d = decompressors[self.compression]() + assert d.decompress(raw_blob.content) == content + assert d.unused_data == b'' + + +class TestAzureCloudObjStorageGzip(TestAzureCloudObjStorage): + compression = 'gzip' + + +class TestAzureCloudObjStorageZlib(TestAzureCloudObjStorage): + compression = 'zlib' + + +class TestAzureCloudObjStorageLzma(TestAzureCloudObjStorage): + compression = 'lzma' + + +class TestAzureCloudObjStorageBz2(TestAzureCloudObjStorage): + compression = 'bz2' + class TestPrefixedAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase):