diff --git a/swh/objstorage/api/client.py b/swh/objstorage/api/client.py --- a/swh/objstorage/api/client.py +++ b/swh/objstorage/api/client.py @@ -23,8 +23,8 @@ session: The session to send requests. """ - def __init__(self, url): - super().__init__(api_exception=ObjStorageAPIError, url=url) + def __init__(self, url, **kwargs): + super().__init__(api_exception=ObjStorageAPIError, url=url, **kwargs) def check_config(self, *, check_write): return self.post('check_config', {'check_write': check_write}) @@ -45,6 +45,10 @@ def check(self, obj_id): return self.post('content/check', {'obj_id': obj_id}) + def delete(self, obj_id): + super().delete(obj_id) + return self.post('content/delete', {'obj_id': obj_id}) + # Management methods def get_random(self, batch_size): diff --git a/swh/objstorage/api/server.py b/swh/objstorage/api/server.py --- a/swh/objstorage/api/server.py +++ b/swh/objstorage/api/server.py @@ -66,6 +66,12 @@ return encode_data(request.app['objstorage'].check(**req)) +@asyncio.coroutine +def delete(request): + req = yield from decode_request(request) + return encode_data(request.app['objstorage'].delete(**req)) + + # Management methods @asyncio.coroutine @@ -120,6 +126,7 @@ app.router.add_route('POST', '/content/get/batch', get_batch) app.router.add_route('POST', '/content/get/random', get_random_contents) app.router.add_route('POST', '/content/check', check) + app.router.add_route('POST', '/content/delete', delete) app.router.add_route('POST', '/content/add_stream/{hex_id}', add_stream) app.router.add_route('GET', '/content/get_stream/{hex_id}', get_stream) app.update(config) diff --git a/swh/objstorage/cloud/objstorage_azure.py b/swh/objstorage/cloud/objstorage_azure.py --- a/swh/objstorage/cloud/objstorage_azure.py +++ b/swh/objstorage/cloud/objstorage_azure.py @@ -17,7 +17,8 @@ """ObjStorage with azure abilities. """ - def __init__(self, account_name, api_secret_key, container_name): + def __init__(self, account_name, api_secret_key, container_name, **kwargs): + super().__init__(**kwargs) self.block_blob_service = BlockBlobService( account_name=account_name, account_key=api_secret_key) @@ -112,3 +113,16 @@ content_obj_id = compute_hash(obj_content) if content_obj_id != obj_id: raise Error(obj_id) + + def delete(self, obj_id): + """Delete an object.""" + super().delete(obj_id) + hex_obj_id = self._internal_id(obj_id) + try: + self.block_blob_service.delete_blob( + container_name=self.container_name, + blob_name=hex_obj_id) + except AzureMissingResourceHttpError: + raise ObjNotFoundError('Content {} not found!'.format(hex_obj_id)) + + return True diff --git a/swh/objstorage/cloud/objstorage_cloud.py b/swh/objstorage/cloud/objstorage_cloud.py --- a/swh/objstorage/cloud/objstorage_cloud.py +++ b/swh/objstorage/cloud/objstorage_cloud.py @@ -22,7 +22,8 @@ https://libcloud.readthedocs.io/en/latest/storage/api.html). """ - def __init__(self, api_key, api_secret_key, container_name): + def __init__(self, api_key, api_secret_key, container_name, **kwargs): + super().__init__(**kwargs) self.driver = self._get_driver(api_key, api_secret_key) self.container_name = container_name self.container = self.driver.get_container( @@ -122,6 +123,11 @@ if content_obj_id != obj_id: raise Error(obj_id) + def delete(self, obj_id): + super().delete(obj_id) + obj = self._get_object(obj_id) + return self.driver.delete_object(obj) + def _get_object(self, obj_id): """Get a Libcloud wrapper for an object pointer. diff --git a/swh/objstorage/multiplexer/filter/filter.py b/swh/objstorage/multiplexer/filter/filter.py --- a/swh/objstorage/multiplexer/filter/filter.py +++ b/swh/objstorage/multiplexer/filter/filter.py @@ -71,5 +71,8 @@ def check(self, obj_id, *args, **kwargs): return self.storage.check(obj_id, *args, **kwargs) + def delete(self, obj_id, *args, **kwargs): + return self.storage.delete(obj_id, *args, **kwargs) + def get_random(self, batch_size, *args, **kwargs): return self.storage.get_random(batch_size, *args, **kwargs) diff --git a/swh/objstorage/multiplexer/filter/read_write_filter.py b/swh/objstorage/multiplexer/filter/read_write_filter.py --- a/swh/objstorage/multiplexer/filter/read_write_filter.py +++ b/swh/objstorage/multiplexer/filter/read_write_filter.py @@ -19,3 +19,6 @@ def restore(self, *args, **kwargs): return + + def delete(self, *args, **kwargs): + return True diff --git a/swh/objstorage/multiplexer/multiplexer_objstorage.py b/swh/objstorage/multiplexer/multiplexer_objstorage.py --- a/swh/objstorage/multiplexer/multiplexer_objstorage.py +++ b/swh/objstorage/multiplexer/multiplexer_objstorage.py @@ -52,7 +52,8 @@ """ - def __init__(self, storages): + def __init__(self, storages, **kwargs): + super().__init__(**kwargs) self.storages = storages def check_config(self, *, check_write): @@ -153,6 +154,10 @@ if nb_present == 0: raise ObjNotFoundError(obj_id) + def delete(self, obj_id): + super().delete(obj_id) + return all(storage.delete(obj_id) for storage in self.storages) + def get_random(self, batch_size): storages_set = [storage for storage in self.storages if len(storage) > 0] diff --git a/swh/objstorage/objstorage.py b/swh/objstorage/objstorage.py --- a/swh/objstorage/objstorage.py +++ b/swh/objstorage/objstorage.py @@ -33,6 +33,7 @@ - restore() same as add() but erase an already existed content - get() retrieve the content of an object, by object id - check() check the integrity of an object, by object id + - delete() remove an object And some management methods: @@ -48,6 +49,11 @@ Each implementation of this interface can have a different behavior and its own way to store the contents. """ + def __init__(self, *, allow_delete=False, **kwargs): + # A more complete permission system could be used in place of that if + # it becomes needed + super().__init__(**kwargs) + self.allow_delete = allow_delete @abc.abstractmethod def check_config(self, *, check_write): @@ -171,6 +177,20 @@ """ pass + @abc.abstractmethod + def delete(self, obj_id, *args, **kwargs): + """Delete an object. + + Args: + obj_id (bytes): object identifier. + + Raises: + ObjNotFoundError: if the requested object is missing. + + """ + if not self.allow_delete: + raise PermissionError("Delete is not allowed.") + # Management methods def get_random(self, batch_size, *args, **kwargs): diff --git a/swh/objstorage/objstorage_pathslicing.py b/swh/objstorage/objstorage_pathslicing.py --- a/swh/objstorage/objstorage_pathslicing.py +++ b/swh/objstorage/objstorage_pathslicing.py @@ -103,7 +103,7 @@ """ - def __init__(self, root, slicing): + def __init__(self, root, slicing, **kwargs): """ Create an object to access a hash-slicing based object storage. Args: @@ -113,6 +113,7 @@ on the hash of the content to know the path where it should be stored. """ + super().__init__(**kwargs) self.root = root # Make a list of tuples where each tuple contains the beginning # and the end of each slicing. @@ -271,6 +272,18 @@ # IOError is for compatibility with older python versions raise Error('Corrupt object %s is not a gzip file' % obj_id) + def delete(self, obj_id): + super().delete(obj_id) + if obj_id not in self: + raise ObjNotFoundError(obj_id) + + hex_obj_id = hashutil.hash_to_hex(obj_id) + try: + os.remove(self._obj_path(hex_obj_id)) + except FileNotFoundError: + raise ObjNotFoundError(obj_id) + return True + # Management methods def get_random(self, batch_size): diff --git a/swh/objstorage/tests/objstorage_testing.py b/swh/objstorage/tests/objstorage_testing.py --- a/swh/objstorage/tests/objstorage_testing.py +++ b/swh/objstorage/tests/objstorage_testing.py @@ -109,6 +109,30 @@ self.fail('Integrity check failed') @istest + def delete_missing(self): + self.storage.allow_delete = True + content, obj_id = self.hash_content(b'missing_content_to_delete') + with self.assertRaises(exc.Error): + self.storage.delete(obj_id) + + @istest + def delete_present(self): + self.storage.allow_delete = True + content, obj_id = self.hash_content(b'content_to_delete') + self.storage.add(content, obj_id=obj_id) + self.assertTrue(self.storage.delete(obj_id)) + with self.assertRaises(exc.Error): + self.storage.get(obj_id) + + @istest + def delete_not_allowed(self): + self.storage.allow_delete = False + content, obj_id = self.hash_content(b'content_to_delete') + self.storage.add(content, obj_id=obj_id) + with self.assertRaises(PermissionError): + self.assertTrue(self.storage.delete(obj_id)) + + @istest def add_stream(self): content = [b'chunk1', b'chunk2'] _, obj_id = self.hash_content(b''.join(content)) diff --git a/swh/objstorage/tests/server_testing.py b/swh/objstorage/tests/server_testing.py --- a/swh/objstorage/tests/server_testing.py +++ b/swh/objstorage/tests/server_testing.py @@ -56,7 +56,7 @@ # Worker function for multiprocessing def worker(app, port): - return aiohttp.web.run_app(app, port=port) + return aiohttp.web.run_app(app, port=port, print=lambda *_: None) self.process = multiprocessing.Process( target=worker, args=(self.app, self.port) diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -23,6 +23,7 @@ 'args': { 'root': tempfile.mkdtemp(), 'slicing': '0:1/0:5', + 'allow_delete': True, }, 'client_max_size': 8 * 1024 * 1024, } diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py --- a/swh/objstorage/tests/test_objstorage_azure.py +++ b/swh/objstorage/tests/test_objstorage_azure.py @@ -43,6 +43,14 @@ return MockBlob(name=blob_name, content=self.data[container_name][blob_name]) + def delete_blob(self, container_name, blob_name): + try: + self.data[container_name].pop(blob_name) + except KeyError: + raise AzureMissingResourceHttpError( + 'Blob %s not found' % blob_name, 404) + return True + def exists(self, container_name, blob_name): return blob_name in self.data[container_name] diff --git a/swh/objstorage/tests/test_objstorage_cloud.py b/swh/objstorage/tests/test_objstorage_cloud.py --- a/swh/objstorage/tests/test_objstorage_cloud.py +++ b/swh/objstorage/tests/test_objstorage_cloud.py @@ -62,6 +62,16 @@ raise ObjectDoesNotExistError(object_name=obj_id, driver=self, value=None) + def delete_object(self, obj): + self._check_credentials() + try: + container = self.get_container(CONTAINER_NAME) + container.pop(obj.name) + return True + except KeyError: + raise ObjectDoesNotExistError(object_name=obj.name, + driver=self, value=None) + def upload_object_via_stream(self, content, container, obj_id): self._check_credentials() obj = MockLibcloudObject(obj_id, content) diff --git a/swh/objstorage/tests/test_objstorage_multiplexer.py b/swh/objstorage/tests/test_objstorage_multiplexer.py --- a/swh/objstorage/tests/test_objstorage_multiplexer.py +++ b/swh/objstorage/tests/test_objstorage_multiplexer.py @@ -62,6 +62,18 @@ self.assertEqual(len(self.storage), 2) @istest + def delete_missing(self): + self.storage_v1.allow_delete = True + self.storage_v2.allow_delete = True + super().delete_missing() + + @istest + def delete_present(self): + self.storage_v1.allow_delete = True + self.storage_v2.allow_delete = True + super().delete_present() + + @istest def get_random_contents(self): content, obj_id = self.hash_content(b'get_random_content') self.storage.add(content)