Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9336757
D2047.id6896.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D2047.id6896.diff
View Options
diff --git a/swh/objstorage/backends/azure.py b/swh/objstorage/backends/azure.py
--- a/swh/objstorage/backends/azure.py
+++ b/swh/objstorage/backends/azure.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import gzip
import string
from itertools import dropwhile, islice, product
@@ -11,7 +10,8 @@
from azure.common import AzureMissingResourceHttpError
import requests
-from swh.objstorage.objstorage import ObjStorage, compute_hash, DEFAULT_LIMIT
+from swh.objstorage.objstorage import (ObjStorage, compute_hash, DEFAULT_LIMIT,
+ compressors, decompressors)
from swh.objstorage.exc import ObjNotFoundError, Error
from swh.model import hashutil
@@ -20,7 +20,8 @@
"""ObjStorage with azure abilities.
"""
- def __init__(self, account_name, api_secret_key, container_name, **kwargs):
+ def __init__(self, account_name, api_secret_key, container_name,
+ compression='gzip', **kwargs):
super().__init__(**kwargs)
self.block_blob_service = BlockBlobService(
account_name=account_name,
@@ -28,6 +29,7 @@
request_session=requests.Session(),
)
self.container_name = container_name
+ self.compression = compression
def get_blob_service(self, hex_obj_id):
"""Get the block_blob_service and container that contains the object with
@@ -96,12 +98,16 @@
hex_obj_id = self._internal_id(obj_id)
- # Send the gzipped content
+ # Send the compressed content
+ compressor = compressors[self.compression]()
+ blob = [compressor.compress(content), compressor.flush()]
+
service, container = self.get_blob_service(hex_obj_id)
service.create_blob_from_bytes(
container_name=container,
blob_name=hex_obj_id,
- blob=gzip.compress(content))
+ blob=b''.join(blob),
+ )
return obj_id
@@ -124,7 +130,11 @@
except AzureMissingResourceHttpError:
raise ObjNotFoundError(obj_id)
- return gzip.decompress(blob.content)
+ decompressor = decompressors[self.compression]()
+ ret = decompressor.decompress(blob.content)
+ if decompressor.unused_data:
+ raise Error('Corrupt object %s: trailing data found' % hex_obj_id)
+ return ret
def check(self, obj_id):
"""Check the content integrity.
@@ -177,10 +187,12 @@
api_secret_key: <api_secret_key>
container_name: <container_name>
"""
- def __init__(self, accounts, **kwargs):
+ def __init__(self, accounts, compression='gzip', **kwargs):
# shortcut AzureCloudObjStorage __init__
ObjStorage.__init__(self, **kwargs)
+ self.compression = compression
+
# Definition sanity check
prefix_lengths = set(len(prefix) for prefix in accounts)
if not len(prefix_lengths) == 1:
diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py
--- a/swh/objstorage/tests/test_objstorage_azure.py
+++ b/swh/objstorage/tests/test_objstorage_azure.py
@@ -7,11 +7,13 @@
from collections import defaultdict
from unittest.mock import patch
-from typing import Any, Dict
+from typing import Any, Dict, Optional
from azure.common import AzureMissingResourceHttpError
from swh.model.hashutil import hash_to_hex
+
from swh.objstorage import get_objstorage
+from swh.objstorage.objstorage import decompressors
from .objstorage_testing import ObjStorageTestFixture
@@ -66,6 +68,7 @@
class TestAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase):
+ compression = None # type: Optional[str]
def setUp(self):
super().setUp()
@@ -80,8 +83,38 @@
'account_name': 'account-name',
'api_secret_key': 'api-secret-key',
'container_name': 'container-name',
+ 'compression': self.compression,
})
+ def test_compression(self):
+ content, obj_id = self.hash_content(b'test content is compressed')
+ self.storage.add(content, obj_id=obj_id)
+
+ blob_service, container = self.storage.get_blob_service(obj_id)
+ internal_id = self.storage._internal_id(obj_id)
+
+ raw_blob = blob_service.get_blob_to_bytes(container, internal_id)
+
+ d = decompressors[self.compression]()
+ assert d.decompress(raw_blob.content) == content
+ assert d.unused_data == b''
+
+
+class TestAzureCloudObjStorageGzip(TestAzureCloudObjStorage):
+ compression = 'gzip'
+
+
+class TestAzureCloudObjStorageZlib(TestAzureCloudObjStorage):
+ compression = 'zlib'
+
+
+class TestAzureCloudObjStorageLzma(TestAzureCloudObjStorage):
+ compression = 'lzma'
+
+
+class TestAzureCloudObjStorageBz2(TestAzureCloudObjStorage):
+ compression = 'bz2'
+
class TestPrefixedAzureCloudObjStorage(ObjStorageTestFixture,
unittest.TestCase):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 3 2025, 7:42 AM (10 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218613
Attached To
D2047: Support pluggable compression in azure objstorage
Event Timeline
Log In to Comment