Page MenuHomeSoftware Heritage

D2047.id6896.diff
No OneTemporary

D2047.id6896.diff

diff --git a/swh/objstorage/backends/azure.py b/swh/objstorage/backends/azure.py
--- a/swh/objstorage/backends/azure.py
+++ b/swh/objstorage/backends/azure.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import gzip
import string
from itertools import dropwhile, islice, product
@@ -11,7 +10,8 @@
from azure.common import AzureMissingResourceHttpError
import requests
-from swh.objstorage.objstorage import ObjStorage, compute_hash, DEFAULT_LIMIT
+from swh.objstorage.objstorage import (ObjStorage, compute_hash, DEFAULT_LIMIT,
+ compressors, decompressors)
from swh.objstorage.exc import ObjNotFoundError, Error
from swh.model import hashutil
@@ -20,7 +20,8 @@
"""ObjStorage with azure abilities.
"""
- def __init__(self, account_name, api_secret_key, container_name, **kwargs):
+ def __init__(self, account_name, api_secret_key, container_name,
+ compression='gzip', **kwargs):
super().__init__(**kwargs)
self.block_blob_service = BlockBlobService(
account_name=account_name,
@@ -28,6 +29,7 @@
request_session=requests.Session(),
)
self.container_name = container_name
+ self.compression = compression
def get_blob_service(self, hex_obj_id):
"""Get the block_blob_service and container that contains the object with
@@ -96,12 +98,16 @@
hex_obj_id = self._internal_id(obj_id)
- # Send the gzipped content
+ # Send the compressed content
+ compressor = compressors[self.compression]()
+ blob = [compressor.compress(content), compressor.flush()]
+
service, container = self.get_blob_service(hex_obj_id)
service.create_blob_from_bytes(
container_name=container,
blob_name=hex_obj_id,
- blob=gzip.compress(content))
+ blob=b''.join(blob),
+ )
return obj_id
@@ -124,7 +130,11 @@
except AzureMissingResourceHttpError:
raise ObjNotFoundError(obj_id)
- return gzip.decompress(blob.content)
+ decompressor = decompressors[self.compression]()
+ ret = decompressor.decompress(blob.content)
+ if decompressor.unused_data:
+ raise Error('Corrupt object %s: trailing data found' % hex_obj_id)
+ return ret
def check(self, obj_id):
"""Check the content integrity.
@@ -177,10 +187,12 @@
api_secret_key: <api_secret_key>
container_name: <container_name>
"""
- def __init__(self, accounts, **kwargs):
+ def __init__(self, accounts, compression='gzip', **kwargs):
# shortcut AzureCloudObjStorage __init__
ObjStorage.__init__(self, **kwargs)
+ self.compression = compression
+
# Definition sanity check
prefix_lengths = set(len(prefix) for prefix in accounts)
if not len(prefix_lengths) == 1:
diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py
--- a/swh/objstorage/tests/test_objstorage_azure.py
+++ b/swh/objstorage/tests/test_objstorage_azure.py
@@ -7,11 +7,13 @@
from collections import defaultdict
from unittest.mock import patch
-from typing import Any, Dict
+from typing import Any, Dict, Optional
from azure.common import AzureMissingResourceHttpError
from swh.model.hashutil import hash_to_hex
+
from swh.objstorage import get_objstorage
+from swh.objstorage.objstorage import decompressors
from .objstorage_testing import ObjStorageTestFixture
@@ -66,6 +68,7 @@
class TestAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase):
+ compression = None # type: Optional[str]
def setUp(self):
super().setUp()
@@ -80,8 +83,38 @@
'account_name': 'account-name',
'api_secret_key': 'api-secret-key',
'container_name': 'container-name',
+ 'compression': self.compression,
})
+ def test_compression(self):
+ content, obj_id = self.hash_content(b'test content is compressed')
+ self.storage.add(content, obj_id=obj_id)
+
+ blob_service, container = self.storage.get_blob_service(obj_id)
+ internal_id = self.storage._internal_id(obj_id)
+
+ raw_blob = blob_service.get_blob_to_bytes(container, internal_id)
+
+ d = decompressors[self.compression]()
+ assert d.decompress(raw_blob.content) == content
+ assert d.unused_data == b''
+
+
+class TestAzureCloudObjStorageGzip(TestAzureCloudObjStorage):
+ compression = 'gzip'
+
+
+class TestAzureCloudObjStorageZlib(TestAzureCloudObjStorage):
+ compression = 'zlib'
+
+
+class TestAzureCloudObjStorageLzma(TestAzureCloudObjStorage):
+ compression = 'lzma'
+
+
+class TestAzureCloudObjStorageBz2(TestAzureCloudObjStorage):
+ compression = 'bz2'
+
class TestPrefixedAzureCloudObjStorage(ObjStorageTestFixture,
unittest.TestCase):

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 7:42 AM (10 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218613

Event Timeline