diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py index 69bc383..3ea1e14 100644 --- a/swh/objstorage/tests/test_objstorage_azure.py +++ b/swh/objstorage/tests/test_objstorage_azure.py @@ -1,168 +1,186 @@ # Copyright (C) 2016-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from collections import defaultdict from unittest.mock import patch from typing import Any, Dict, Optional from azure.common import AzureMissingResourceHttpError from swh.model.hashutil import hash_to_hex from swh.objstorage import get_objstorage from swh.objstorage.objstorage import decompressors +from swh.objstorage.exc import Error from .objstorage_testing import ObjStorageTestFixture class MockBlob(): """ Libcloud object mock that replicates its API """ def __init__(self, name, content): self.name = name self.content = content class MockBlockBlobService(): """Mock internal azure library which AzureCloudObjStorage depends upon. """ _data = {} # type: Dict[str, Any] def __init__(self, account_name, account_key, **kwargs): # do not care for the account_name and the api_secret_key here self._data = defaultdict(dict) def get_container_properties(self, container_name): self._data[container_name] return container_name in self._data def create_blob_from_bytes(self, container_name, blob_name, blob): self._data[container_name][blob_name] = blob def get_blob_to_bytes(self, container_name, blob_name): if blob_name not in self._data[container_name]: raise AzureMissingResourceHttpError( 'Blob %s not found' % blob_name, 404) return MockBlob(name=blob_name, content=self._data[container_name][blob_name]) def delete_blob(self, container_name, blob_name): try: self._data[container_name].pop(blob_name) except KeyError: raise AzureMissingResourceHttpError( 'Blob %s not found' % blob_name, 404) return True def exists(self, container_name, blob_name): return blob_name in self._data[container_name] def list_blobs(self, container_name, marker=None, maxresults=None): for blob_name, content in sorted(self._data[container_name].items()): if marker is None or blob_name > marker: yield MockBlob(name=blob_name, content=content) class TestAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): compression = None # type: Optional[str] def setUp(self): super().setUp() patcher = patch( 'swh.objstorage.backends.azure.BlockBlobService', MockBlockBlobService, ) patcher.start() self.addCleanup(patcher.stop) self.storage = get_objstorage('azure', { 'account_name': 'account-name', 'api_secret_key': 'api-secret-key', 'container_name': 'container-name', 'compression': self.compression, }) def test_compression(self): content, obj_id = self.hash_content(b'test content is compressed') self.storage.add(content, obj_id=obj_id) blob_service, container = self.storage.get_blob_service(obj_id) internal_id = self.storage._internal_id(obj_id) raw_blob = blob_service.get_blob_to_bytes(container, internal_id) d = decompressors[self.compression]() assert d.decompress(raw_blob.content) == content assert d.unused_data == b'' + def test_trailing_data_on_stored_blob(self): + content, obj_id = self.hash_content(b'test content without garbage') + self.storage.add(content, obj_id=obj_id) + + blob_service, container = self.storage.get_blob_service(obj_id) + internal_id = self.storage._internal_id(obj_id) + + blob_service._data[container][internal_id] += b'trailing garbage' + + if self.compression is not None: + with self.assertRaises(Error) as e: + self.storage.get(obj_id) + assert 'trailing data' in e.exception.args[0] + else: + with self.assertRaises(Error) as e: + self.storage.check(obj_id) + class TestAzureCloudObjStorageGzip(TestAzureCloudObjStorage): compression = 'gzip' class TestAzureCloudObjStorageZlib(TestAzureCloudObjStorage): compression = 'zlib' class TestAzureCloudObjStorageLzma(TestAzureCloudObjStorage): compression = 'lzma' class TestAzureCloudObjStorageBz2(TestAzureCloudObjStorage): compression = 'bz2' class TestPrefixedAzureCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): def setUp(self): super().setUp() patcher = patch( 'swh.objstorage.backends.azure.BlockBlobService', MockBlockBlobService, ) patcher.start() self.addCleanup(patcher.stop) self.accounts = {} for prefix in '0123456789abcdef': self.accounts[prefix] = { 'account_name': 'account_%s' % prefix, 'api_secret_key': 'secret_key_%s' % prefix, 'container_name': 'container_%s' % prefix, } self.storage = get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_instantiation_missing_prefixes(self): del self.accounts['d'] del self.accounts['e'] with self.assertRaisesRegex(ValueError, 'Missing prefixes'): get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_instantiation_inconsistent_prefixes(self): self.accounts['00'] = self.accounts['0'] with self.assertRaisesRegex(ValueError, 'Inconsistent prefixes'): get_objstorage('azure-prefixed', { 'accounts': self.accounts }) def test_prefixedazure_sharding_behavior(self): for i in range(100): content, obj_id = self.hash_content(b'test_content_%02d' % i) self.storage.add(content, obj_id=obj_id) hex_obj_id = hash_to_hex(obj_id) prefix = hex_obj_id[0] self.assertTrue( self.storage.prefixes[prefix][0].exists( self.accounts[prefix]['container_name'], hex_obj_id )) diff --git a/swh/objstorage/tests/test_objstorage_cloud.py b/swh/objstorage/tests/test_objstorage_cloud.py index 1461528..7cab501 100644 --- a/swh/objstorage/tests/test_objstorage_cloud.py +++ b/swh/objstorage/tests/test_objstorage_cloud.py @@ -1,132 +1,150 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from libcloud.common.types import InvalidCredsError from libcloud.storage.types import (ContainerDoesNotExistError, ObjectDoesNotExistError) from typing import Optional from swh.model import hashutil from swh.objstorage.objstorage import decompressors +from swh.objstorage.exc import Error from swh.objstorage.backends.libcloud import CloudObjStorage from .objstorage_testing import ObjStorageTestFixture API_KEY = 'API_KEY' API_SECRET_KEY = 'API SECRET KEY' CONTAINER_NAME = 'test_container' class MockLibcloudObject(): """ Libcloud object mock that replicates its API """ def __init__(self, name, content): self.name = name self.content = list(content) def as_stream(self): yield from iter(self.content) class MockLibcloudDriver(): """ Mock driver that replicates the used LibCloud API """ def __init__(self, api_key, api_secret_key): self.containers = {CONTAINER_NAME: {}} # Storage is initialized self.api_key = api_key self.api_secret_key = api_secret_key def _check_credentials(self): # Private method may be known as another name in Libcloud but is used # to replicate libcloud behavior (i.e. check credential at each # request) if self.api_key != API_KEY or self.api_secret_key != API_SECRET_KEY: raise InvalidCredsError() def get_container(self, container_name): try: return self.containers[container_name] except KeyError: raise ContainerDoesNotExistError(container_name=container_name, driver=self, value=None) def iterate_container_objects(self, container): self._check_credentials() yield from (v for k, v in sorted(container.items())) def get_object(self, container_name, obj_id): self._check_credentials() try: container = self.get_container(container_name) return container[obj_id] except KeyError: raise ObjectDoesNotExistError(object_name=obj_id, driver=self, value=None) def delete_object(self, obj): self._check_credentials() try: container = self.get_container(CONTAINER_NAME) container.pop(obj.name) return True except KeyError: raise ObjectDoesNotExistError(object_name=obj.name, driver=self, value=None) def upload_object_via_stream(self, content, container, obj_id): self._check_credentials() obj = MockLibcloudObject(obj_id, content) container[obj_id] = obj class MockCloudObjStorage(CloudObjStorage): """ Cloud object storage that uses a mocked driver """ def _get_driver(self, **kwargs): return MockLibcloudDriver(**kwargs) def _get_provider(self): # Implement this for the abc requirement, but behavior is defined in # _get_driver. pass class TestCloudObjStorage(ObjStorageTestFixture, unittest.TestCase): compression = None # type: Optional[str] def setUp(self): super().setUp() self.storage = MockCloudObjStorage( CONTAINER_NAME, api_key=API_KEY, api_secret_key=API_SECRET_KEY, compression=self.compression, ) def test_compression(self): content, obj_id = self.hash_content(b'add_get_w_id') self.storage.add(content, obj_id=obj_id) data = self.storage.driver.containers[CONTAINER_NAME] obj_id = hashutil.hash_to_hex(obj_id) raw_content = b''.join(data[obj_id].content) d = decompressors[self.compression]() assert d.decompress(raw_content) == content assert d.unused_data == b'' + def test_trailing_data_on_stored_blob(self): + content, obj_id = self.hash_content(b'test content without garbage') + self.storage.add(content, obj_id=obj_id) + + data = self.storage.driver.containers[CONTAINER_NAME] + obj_id = hashutil.hash_to_hex(obj_id) + + data[obj_id].content.append(b'trailing garbage') + + if self.compression is not None: + with self.assertRaises(Error) as e: + self.storage.get(obj_id) + assert 'trailing data' in e.exception.args[0] + else: + with self.assertRaises(Error) as e: + self.storage.check(obj_id) + class TestCloudObjStorageBz2(TestCloudObjStorage): compression = 'bz2' class TestCloudObjStorageGzip(TestCloudObjStorage): compression = 'gzip' class TestCloudObjStorageLzma(TestCloudObjStorage): compression = 'lzma' class TestCloudObjStorageZlib(TestCloudObjStorage): compression = 'zlib' diff --git a/swh/objstorage/tests/test_objstorage_seaweedfs.py b/swh/objstorage/tests/test_objstorage_seaweedfs.py index 036fcdc..d069676 100644 --- a/swh/objstorage/tests/test_objstorage_seaweedfs.py +++ b/swh/objstorage/tests/test_objstorage_seaweedfs.py @@ -1,77 +1,93 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from typing import Optional from swh.objstorage.objstorage import decompressors +from swh.objstorage.exc import Error from swh.objstorage.backends.seaweed import WeedObjStorage, DEFAULT_LIMIT from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture class MockWeedFiler: """ WeedFiler mock that replicates its API """ def __init__(self, url): self.url = url self.content = {} def get(self, remote_path): return self.content[remote_path] def put(self, fp, remote_path): self.content[remote_path] = fp.read() def exists(self, remote_path): return remote_path in self.content def delete(self, remote_path): del self.content[remote_path] def list(self, dir, last_file_name=None, limit=DEFAULT_LIMIT): keys = sorted(self.content.keys()) if last_file_name is None: idx = 0 else: idx = keys.index(last_file_name) + 1 return {'Entries': [{'FullPath': x} for x in keys[idx:idx+limit]]} class TestWeedObjStorage(ObjStorageTestFixture, unittest.TestCase): compression = None # type: Optional[str] def setUp(self): super().setUp() self.url = 'http://127.0.0.1/test' self.storage = WeedObjStorage(url=self.url, compression=self.compression) self.storage.wf = MockWeedFiler(self.url) def test_compression(self): content, obj_id = self.hash_content(b'test compression') self.storage.add(content, obj_id=obj_id) raw_content = self.storage.wf.get(self.storage._path(obj_id)) d = decompressors[self.compression]() assert d.decompress(raw_content) == content assert d.unused_data == b'' + def test_trailing_data_on_stored_blob(self): + content, obj_id = self.hash_content(b'test content without garbage') + self.storage.add(content, obj_id=obj_id) + + path = self.storage._path(obj_id) + self.storage.wf.content[path] += b'trailing garbage' + + if self.compression is not None: + with self.assertRaises(Error) as e: + self.storage.get(obj_id) + assert 'trailing data' in e.exception.args[0] + else: + with self.assertRaises(Error) as e: + self.storage.check(obj_id) + class TestWeedObjStorageBz2(TestWeedObjStorage): compression = 'bz2' class TestWeedObjStorageGzip(TestWeedObjStorage): compression = 'gzip' class TestWeedObjStorageLzma(TestWeedObjStorage): compression = 'lzma' class TestWeedObjStorageZlib(TestWeedObjStorage): compression = 'zlib'