Page MenuHomeSoftware Heritage

D2045.diff
No OneTemporary

D2045.diff

diff --git a/swh/objstorage/backends/libcloud.py b/swh/objstorage/backends/libcloud.py
--- a/swh/objstorage/backends/libcloud.py
+++ b/swh/objstorage/backends/libcloud.py
@@ -142,7 +142,12 @@
def get(self, obj_id):
obj = b''.join(self._get_object(obj_id).as_stream())
- return decompressors[self.compression](obj)
+ d = decompressors[self.compression]()
+ ret = d.decompress(obj)
+ if d.unused_data:
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ raise Error('Corrupt object %s: trailing data found' % hex_obj_id)
+ return ret
def check(self, obj_id):
# Check that the file exists, as _get_object raises ObjNotFoundError
diff --git a/swh/objstorage/backends/seaweed.py b/swh/objstorage/backends/seaweed.py
--- a/swh/objstorage/backends/seaweed.py
+++ b/swh/objstorage/backends/seaweed.py
@@ -149,10 +149,16 @@
def get(self, obj_id):
try:
obj = self.wf.get(self._path(obj_id))
- return decompressors[self.compression](obj)
except Exception:
raise ObjNotFoundError(obj_id)
+ d = decompressors[self.compression]()
+ ret = d.decompress(obj)
+ if d.unused_data:
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ raise Error('Corrupt object %s: trailing data found' % hex_obj_id)
+ return ret
+
def check(self, obj_id):
# Check the content integrity
obj_content = self.get(obj_id)
diff --git a/swh/objstorage/objstorage.py b/swh/objstorage/objstorage.py
--- a/swh/objstorage/objstorage.py
+++ b/swh/objstorage/objstorage.py
@@ -6,7 +6,6 @@
import abc
from itertools import dropwhile, islice
import bz2
-import gzip
import lzma
import zlib
@@ -46,13 +45,22 @@
return b''
+class NullDecompressor:
+ def decompress(self, data):
+ return data
+
+ @property
+ def unused_data(self):
+ return b''
+
+
decompressors = {
- 'bz2': bz2.decompress,
- 'lzma': lzma.decompress,
- 'gzip': gzip.decompress,
- 'zlib': zlib.decompress,
- None: lambda x: x,
- }
+ 'bz2': bz2.BZ2Decompressor,
+ 'lzma': lzma.LZMADecompressor,
+ 'gzip': lambda: zlib.decompressobj(wbits=31),
+ 'zlib': zlib.decompressobj,
+ None: NullDecompressor,
+}
compressors = {
'bz2': bz2.BZ2Compressor,
diff --git a/swh/objstorage/tests/test_objstorage_cloud.py b/swh/objstorage/tests/test_objstorage_cloud.py
--- a/swh/objstorage/tests/test_objstorage_cloud.py
+++ b/swh/objstorage/tests/test_objstorage_cloud.py
@@ -111,7 +111,9 @@
obj_id = hashutil.hash_to_hex(obj_id)
raw_content = b''.join(data[obj_id].content)
- assert decompressors[self.compression](raw_content) == content
+ d = decompressors[self.compression]()
+ assert d.decompress(raw_content) == content
+ assert d.unused_data == b''
class TestCloudObjStorageBz2(TestCloudObjStorage):

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 12:21 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219977

Event Timeline