diff --git a/swh/objstorage/api/client.py b/swh/objstorage/api/client.py --- a/swh/objstorage/api/client.py +++ b/swh/objstorage/api/client.py @@ -45,6 +45,9 @@ def check(self, obj_id): return self.post('content/check', {'obj_id': obj_id}) + def delete(self, obj_id): + return self.post('content/delete', {'obj_id': obj_id}) + # Management methods def get_random(self, batch_size): diff --git a/swh/objstorage/api/server.py b/swh/objstorage/api/server.py --- a/swh/objstorage/api/server.py +++ b/swh/objstorage/api/server.py @@ -66,6 +66,12 @@ return encode_data(request.app['objstorage'].check(**req)) +@asyncio.coroutine +def delete(request): + req = yield from decode_request(request) + return encode_data(request.app['objstorage'].delete(**req)) + + # Management methods @asyncio.coroutine @@ -120,6 +126,7 @@ app.router.add_route('POST', '/content/get/batch', get_batch) app.router.add_route('POST', '/content/get/random', get_random_contents) app.router.add_route('POST', '/content/check', check) + app.router.add_route('POST', '/content/delete', delete) app.router.add_route('POST', '/content/add_stream/{hex_id}', add_stream) app.router.add_route('GET', '/content/get_stream/{hex_id}', get_stream) app.update(config) diff --git a/swh/objstorage/cloud/objstorage_azure.py b/swh/objstorage/cloud/objstorage_azure.py --- a/swh/objstorage/cloud/objstorage_azure.py +++ b/swh/objstorage/cloud/objstorage_azure.py @@ -112,3 +112,15 @@ content_obj_id = compute_hash(obj_content) if content_obj_id != obj_id: raise Error(obj_id) + + def delete(self, obj_id): + """Delete an object.""" + hex_obj_id = self._internal_id(obj_id) + try: + self.block_blob_service.delete_blob( + container_name=self.container_name, + blob_name=hex_obj_id) + except AzureMissingResourceHttpError: + raise ObjNotFoundError('Content {} not found!'.format(hex_obj_id)) + + return True diff --git a/swh/objstorage/cloud/objstorage_cloud.py b/swh/objstorage/cloud/objstorage_cloud.py --- a/swh/objstorage/cloud/objstorage_cloud.py +++ b/swh/objstorage/cloud/objstorage_cloud.py @@ -122,6 +122,10 @@ if content_obj_id != obj_id: raise Error(obj_id) + def delete(self, obj_id): + obj = self._get_object(obj_id) + return self.driver.delete_object(obj) + def _get_object(self, obj_id): """Get a Libcloud wrapper for an object pointer. diff --git a/swh/objstorage/multiplexer/filter/filter.py b/swh/objstorage/multiplexer/filter/filter.py --- a/swh/objstorage/multiplexer/filter/filter.py +++ b/swh/objstorage/multiplexer/filter/filter.py @@ -71,5 +71,8 @@ def check(self, obj_id, *args, **kwargs): return self.storage.check(obj_id, *args, **kwargs) + def delete(self, obj_id, *args, **kwargs): + return self.storage.delete(obj_id, *args, **kwargs) + def get_random(self, batch_size, *args, **kwargs): return self.storage.get_random(batch_size, *args, **kwargs) diff --git a/swh/objstorage/multiplexer/filter/read_write_filter.py b/swh/objstorage/multiplexer/filter/read_write_filter.py --- a/swh/objstorage/multiplexer/filter/read_write_filter.py +++ b/swh/objstorage/multiplexer/filter/read_write_filter.py @@ -19,3 +19,6 @@ def restore(self, *args, **kwargs): return + + def delete(self, *args, **kwargs): + return True diff --git a/swh/objstorage/multiplexer/filter/resilient_filter.py b/swh/objstorage/multiplexer/filter/resilient_filter.py new file mode 100644 --- /dev/null +++ b/swh/objstorage/multiplexer/filter/resilient_filter.py @@ -0,0 +1,15 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from .filter import ObjStorageFilter + + +class ResilientStorageFilter(ObjStorageFilter): + """ Filter that disable delete operation of the storage. + + Deletes will always succeed without doing any actual write operations. + """ + def delete(self, *args, **kwargs): + return diff --git a/swh/objstorage/multiplexer/multiplexer_objstorage.py b/swh/objstorage/multiplexer/multiplexer_objstorage.py --- a/swh/objstorage/multiplexer/multiplexer_objstorage.py +++ b/swh/objstorage/multiplexer/multiplexer_objstorage.py @@ -153,6 +153,9 @@ if nb_present == 0: raise ObjNotFoundError(obj_id) + def delete(self, obj_id): + return all(storage.delete(obj_id) for storage in self.storages) + def get_random(self, batch_size): storages_set = [storage for storage in self.storages if len(storage) > 0] diff --git a/swh/objstorage/objstorage.py b/swh/objstorage/objstorage.py --- a/swh/objstorage/objstorage.py +++ b/swh/objstorage/objstorage.py @@ -33,6 +33,7 @@ - restore() same as add() but erase an already existed content - get() retrieve the content of an object, by object id - check() check the integrity of an object, by object id + - delete() remove an object And some management methods: @@ -171,6 +172,19 @@ """ pass + @abc.abstractmethod + def delete(self, obj_id, *args, **kwargs): + """Delete an object. + + Args: + obj_id (bytes): object identifier. + + Raises: + ObjNotFoundError: if the requested object is missing. + + """ + pass + # Management methods def get_random(self, batch_size, *args, **kwargs): diff --git a/swh/objstorage/objstorage_pathslicing.py b/swh/objstorage/objstorage_pathslicing.py --- a/swh/objstorage/objstorage_pathslicing.py +++ b/swh/objstorage/objstorage_pathslicing.py @@ -271,6 +271,17 @@ # IOError is for compatibility with older python versions raise Error('Corrupt object %s is not a gzip file' % obj_id) + def delete(self, obj_id): + if obj_id not in self: + raise ObjNotFoundError(obj_id) + + hex_obj_id = hashutil.hash_to_hex(obj_id) + try: + os.remove(self._obj_path(hex_obj_id)) + except FileNotFoundError: + raise ObjNotFoundError(obj_id) + return True + # Management methods def get_random(self, batch_size): diff --git a/swh/objstorage/tests/objstorage_testing.py b/swh/objstorage/tests/objstorage_testing.py --- a/swh/objstorage/tests/objstorage_testing.py +++ b/swh/objstorage/tests/objstorage_testing.py @@ -109,6 +109,20 @@ self.fail('Integrity check failed') @istest + def delete_missing(self): + content, obj_id = self.hash_content(b'missing_content_to_delete') + with self.assertRaises(exc.Error): + self.storage.delete(obj_id) + + @istest + def delete_present(self): + content, obj_id = self.hash_content(b'content_to_delete') + self.storage.add(content, obj_id=obj_id) + self.assertTrue(self.storage.delete(obj_id)) + with self.assertRaises(exc.Error): + self.storage.get(obj_id) + + @istest def add_stream(self): content = [b'chunk1', b'chunk2'] _, obj_id = self.hash_content(b''.join(content)) diff --git a/swh/objstorage/tests/server_testing.py b/swh/objstorage/tests/server_testing.py --- a/swh/objstorage/tests/server_testing.py +++ b/swh/objstorage/tests/server_testing.py @@ -56,7 +56,7 @@ # Worker function for multiprocessing def worker(app, port): - return aiohttp.web.run_app(app, port=port) + return aiohttp.web.run_app(app, port=port, print=lambda *_: None) self.process = multiprocessing.Process( target=worker, args=(self.app, self.port) diff --git a/swh/objstorage/tests/test_objstorage_azure.py b/swh/objstorage/tests/test_objstorage_azure.py --- a/swh/objstorage/tests/test_objstorage_azure.py +++ b/swh/objstorage/tests/test_objstorage_azure.py @@ -43,6 +43,14 @@ return MockBlob(name=blob_name, content=self.data[container_name][blob_name]) + def delete_blob(self, container_name, blob_name): + try: + self.data[container_name].pop(blob_name) + except KeyError: + raise AzureMissingResourceHttpError( + 'Blob %s not found' % blob_name, 404) + return True + def exists(self, container_name, blob_name): return blob_name in self.data[container_name] diff --git a/swh/objstorage/tests/test_objstorage_cloud.py b/swh/objstorage/tests/test_objstorage_cloud.py --- a/swh/objstorage/tests/test_objstorage_cloud.py +++ b/swh/objstorage/tests/test_objstorage_cloud.py @@ -62,6 +62,16 @@ raise ObjectDoesNotExistError(object_name=obj_id, driver=self, value=None) + def delete_object(self, obj): + self._check_credentials() + try: + container = self.get_container(CONTAINER_NAME) + container.pop(obj.name) + return True + except KeyError: + raise ObjectDoesNotExistError(object_name=obj.name, + driver=self, value=None) + def upload_object_via_stream(self, content, container, obj_id): self._check_credentials() obj = MockLibcloudObject(obj_id, content)