diff --git a/swh/objstorage/backends/azure.py b/swh/objstorage/backends/azure.py --- a/swh/objstorage/backends/azure.py +++ b/swh/objstorage/backends/azure.py @@ -168,8 +168,10 @@ """Get all active block_blob_services""" yield self.get_container_client("") - def _internal_id(self, obj_id): + def _internal_id(self, obj_id: ObjId) -> str: """Internal id is the hex version in objstorage.""" + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] return hashutil.hash_to_hex(obj_id) def check_config(self, *, check_write): @@ -296,6 +298,8 @@ """Check the content integrity.""" obj_content = self.get(obj_id) content_obj_id = compute_hash(obj_content) + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] if content_obj_id != obj_id: raise Error(obj_id) diff --git a/swh/objstorage/backends/in_memory.py b/swh/objstorage/backends/in_memory.py --- a/swh/objstorage/backends/in_memory.py +++ b/swh/objstorage/backends/in_memory.py @@ -1,9 +1,9 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Iterator +from typing import Dict, Iterator from typing_extensions import Literal @@ -23,13 +23,19 @@ def __init__(self, **args): super().__init__() - self.state = {} + self.state: Dict[bytes, bytes] = {} def check_config(self, *, check_write): return True + def _state_key(self, obj_id: ObjId) -> bytes: + if isinstance(obj_id, dict): + return obj_id[self.PRIMARY_HASH] + else: + return obj_id + def __contains__(self, obj_id: ObjId) -> bool: - return obj_id in self.state + return self._state_key(obj_id) in self.state def __iter__(self) -> Iterator[CompositeObjId]: for id_ in sorted(self.state): @@ -39,18 +45,18 @@ if check_presence and obj_id in self: return - self.state[obj_id] = content + self.state[self._state_key(obj_id)] = content def get(self, obj_id: ObjId) -> bytes: if obj_id not in self: raise ObjNotFoundError(obj_id) - return self.state[obj_id] + return self.state[self._state_key(obj_id)] def check(self, obj_id: ObjId) -> None: if obj_id not in self: raise ObjNotFoundError(obj_id) - if compute_hash(self.state[obj_id]) != obj_id: + if compute_hash(self.state[self._state_key(obj_id)]) != self._state_key(obj_id): raise Error("Corrupt object %s" % objid_to_default_hex(obj_id)) def delete(self, obj_id: ObjId): @@ -58,5 +64,5 @@ if obj_id not in self: raise ObjNotFoundError(obj_id) - self.state.pop(obj_id) + self.state.pop(self._state_key(obj_id)) return True diff --git a/swh/objstorage/backends/libcloud.py b/swh/objstorage/backends/libcloud.py --- a/swh/objstorage/backends/libcloud.py +++ b/swh/objstorage/backends/libcloud.py @@ -184,6 +184,8 @@ # Check the content integrity obj_content = self.get(obj_id) content_obj_id = compute_hash(obj_content) + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] if content_obj_id != obj_id: raise Error(obj_id) @@ -192,15 +194,18 @@ obj = self._get_object(obj_id) return self.driver.delete_object(obj) - def _object_path(self, obj_id): + def _object_path(self, obj_id: ObjId) -> str: """Get the full path to an object""" + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] + hex_obj_id = hashutil.hash_to_hex(obj_id) if self.path_prefix: return self.path_prefix + hex_obj_id else: return hex_obj_id - def _get_object(self, obj_id): + def _get_object(self, obj_id: ObjId): """Get a Libcloud wrapper for an object pointer. This wrapper does not retrieve the content of the object diff --git a/swh/objstorage/backends/seaweedfs/objstorage.py b/swh/objstorage/backends/seaweedfs/objstorage.py --- a/swh/objstorage/backends/seaweedfs/objstorage.py +++ b/swh/objstorage/backends/seaweedfs/objstorage.py @@ -114,6 +114,8 @@ # Check the content integrity obj_content = self.get(obj_id) content_obj_id = compute_hash(obj_content) + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] if content_obj_id != obj_id: raise Error(obj_id) diff --git a/swh/objstorage/multiplexer/striping_objstorage.py b/swh/objstorage/multiplexer/striping_objstorage.py --- a/swh/objstorage/multiplexer/striping_objstorage.py +++ b/swh/objstorage/multiplexer/striping_objstorage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,6 +7,9 @@ import queue from typing import Dict +from typing_extensions import Literal + +from swh.objstorage.interface import ObjId from swh.objstorage.multiplexer.multiplexer_objstorage import ( MultiplexerObjStorage, ObjStorageThread, @@ -26,14 +29,17 @@ """ MOD_BYTES = 8 + PRIMARY_HASH: Literal["sha1"] = "sha1" def __init__(self, storages, **kwargs): super().__init__(storages, **kwargs) self.num_storages = len(storages) - def get_storage_index(self, obj_id): + def get_storage_index(self, obj_id: ObjId): if obj_id is None: raise ValueError("StripingObjStorage always needs obj_id to be set") + if isinstance(obj_id, dict): + obj_id = obj_id[self.PRIMARY_HASH] index = int.from_bytes(obj_id[: -self.MOD_BYTES], "big") return index % self.num_storages diff --git a/swh/objstorage/tests/objstorage_testing.py b/swh/objstorage/tests/objstorage_testing.py --- a/swh/objstorage/tests/objstorage_testing.py +++ b/swh/objstorage/tests/objstorage_testing.py @@ -4,9 +4,10 @@ # See top-level LICENSE file for more information import inspect +from typing import Tuple from swh.objstorage import exc -from swh.objstorage.interface import ObjStorageInterface +from swh.objstorage.interface import CompositeObjId, ObjStorageInterface from swh.objstorage.objstorage import compute_hash @@ -48,6 +49,10 @@ obj_id = compute_hash(content) return content, obj_id + def compositehash_content(self, content) -> Tuple[bytes, CompositeObjId]: + obj_id = compute_hash(content) + return content, {"sha1": obj_id} + def assertContentMatch(self, obj_id, expected_content): # noqa content = self.storage.get(obj_id) self.assertEqual(content, expected_content) @@ -63,11 +68,23 @@ self.assertIn(obj_id_p, self.storage) self.assertNotIn(obj_id_m, self.storage) + def test_contains_composite(self): + content_p, obj_id_p = self.compositehash_content(b"contains_present") + content_m, obj_id_m = self.compositehash_content(b"contains_missing") + self.storage.add(content_p, obj_id=obj_id_p) + self.assertIn(obj_id_p, self.storage) + self.assertNotIn(obj_id_m, self.storage) + def test_add_get_w_id(self): content, obj_id = self.hash_content(b"add_get_w_id") self.storage.add(content, obj_id=obj_id) self.assertContentMatch(obj_id, content) + def test_add_get_w_composite_id(self): + content, obj_id = self.compositehash_content(b"add_get_w_id") + self.storage.add(content, obj_id=obj_id) + self.assertContentMatch(obj_id, content) + def test_add_twice(self): content, obj_id = self.hash_content(b"add_twice") self.storage.add(content, obj_id=obj_id) @@ -89,6 +106,15 @@ self.assertEqual(cr1, content1) self.assertEqual(cr2, content2) + def test_add_get_batch_composite(self): + content1, obj_id1 = self.compositehash_content(b"add_get_batch_1") + content2, obj_id2 = self.compositehash_content(b"add_get_batch_2") + self.storage.add(content1, obj_id1) + self.storage.add(content2, obj_id2) + cr1, cr2 = self.storage.get_batch([obj_id1, obj_id2]) + self.assertEqual(cr1, content1) + self.assertEqual(cr2, content2) + def test_get_batch_unexisting_content(self): content, obj_id = self.hash_content(b"get_batch_unexisting_content") result = list(self.storage.get_batch([obj_id])) @@ -113,11 +139,23 @@ self.assertIn(obj_id, e.exception.args) + def test_get_missing_composite(self): + content, obj_id = self.compositehash_content(b"get_missing") + with self.assertRaises(exc.ObjNotFoundError) as e: + self.storage.get(obj_id) + + self.assertIn(obj_id, e.exception.args) + def test_check_missing(self): content, obj_id = self.hash_content(b"check_missing") with self.assertRaises(exc.Error): self.storage.check(obj_id) + def test_check_missing_composite(self): + content, obj_id = self.compositehash_content(b"check_missing") + with self.assertRaises(exc.Error): + self.storage.check(obj_id) + def test_check_present(self): content, obj_id = self.hash_content(b"check_present") self.storage.add(content, obj_id) @@ -126,12 +164,26 @@ except exc.Error: self.fail("Integrity check failed") + def test_check_present_composite(self): + content, obj_id = self.compositehash_content(b"check_present") + self.storage.add(content, obj_id) + try: + self.storage.check(obj_id) + except exc.Error: + self.fail("Integrity check failed") + def test_delete_missing(self): self.storage.allow_delete = True content, obj_id = self.hash_content(b"missing_content_to_delete") with self.assertRaises(exc.Error): self.storage.delete(obj_id) + def test_delete_missing_composite(self): + self.storage.allow_delete = True + content, obj_id = self.compositehash_content(b"missing_content_to_delete") + with self.assertRaises(exc.Error): + self.storage.delete(obj_id) + def test_delete_present(self): self.storage.allow_delete = True content, obj_id = self.hash_content(b"content_to_delete") @@ -140,6 +192,14 @@ with self.assertRaises(exc.Error): self.storage.get(obj_id) + def test_delete_present_composite(self): + self.storage.allow_delete = True + content, obj_id = self.compositehash_content(b"content_to_delete") + self.storage.add(content, obj_id=obj_id) + self.assertTrue(self.storage.delete(obj_id)) + with self.assertRaises(exc.Error): + self.storage.get(obj_id) + def test_delete_not_allowed(self): self.storage.allow_delete = False content, obj_id = self.hash_content(b"content_to_delete") diff --git a/swh/objstorage/tests/test_objstorage_multiplexer.py b/swh/objstorage/tests/test_objstorage_multiplexer.py --- a/swh/objstorage/tests/test_objstorage_multiplexer.py +++ b/swh/objstorage/tests/test_objstorage_multiplexer.py @@ -48,11 +48,21 @@ self.storage_v2.allow_delete = True super().test_delete_missing() + def test_delete_missing_composite(self): + self.storage_v1.allow_delete = True + self.storage_v2.allow_delete = True + super().test_delete_missing_composite() + def test_delete_present(self): self.storage_v1.allow_delete = True self.storage_v2.allow_delete = True super().test_delete_present() + def test_delete_present_composite(self): + self.storage_v1.allow_delete = True + self.storage_v2.allow_delete = True + super().test_delete_present_composite() + def test_access_readonly(self): # Add a content to the readonly storage content, obj_id = self.hash_content(b"content in read-only")