diff --git a/swh/objstorage/backends/pathslicing.py b/swh/objstorage/backends/pathslicing.py --- a/swh/objstorage/backends/pathslicing.py +++ b/swh/objstorage/backends/pathslicing.py @@ -6,9 +6,8 @@ from contextlib import contextmanager from itertools import islice import os -import random import tempfile -from typing import Iterable, Iterator, List, Optional +from typing import Iterator, List, Optional from swh.model import hashutil from swh.objstorage.constants import DEFAULT_LIMIT, ID_HASH_ALGO, ID_HEXDIGEST_LENGTH @@ -288,36 +287,6 @@ raise ObjNotFoundError(obj_id) return True - # Management methods - - def get_random(self, batch_size: int) -> Iterable[ObjId]: - def get_random_content(self, batch_size): - """Get a batch of content inside a single directory. - - Returns: - a tuple (batch size, batch). - """ - dirs = [] - for level in range(len(self.slicer)): - path = os.path.join(self.root, *dirs) - dir_list = next(os.walk(path))[1] - if "tmp" in dir_list: - dir_list.remove("tmp") - dirs.append(random.choice(dir_list)) - - path = os.path.join(self.root, *dirs) - content_list = next(os.walk(path))[2] - length = min(batch_size, len(content_list)) - return ( - length, - map(hashutil.hash_to_bytes, random.sample(content_list, length)), - ) - - while batch_size: - length, it = get_random_content(self, batch_size) - batch_size = batch_size - length - yield from it - # Streaming methods @contextmanager diff --git a/swh/objstorage/interface.py b/swh/objstorage/interface.py --- a/swh/objstorage/interface.py +++ b/swh/objstorage/interface.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Dict, Iterable, Iterator, List, Optional +from typing import Dict, Iterator, List, Optional from typing_extensions import Protocol, runtime_checkable @@ -28,11 +28,6 @@ - check() check the integrity of an object, by object id - delete() remove an object - And some management methods: - - - get_random() get random object id of existing contents (used for the - content integrity checker). - Each implementation of this interface can have a different behavior and its own way to store the contents. """ @@ -177,24 +172,6 @@ """ ... - # Management methods - - @remote_api_endpoint("content/get/random") - def get_random(self, batch_size: int) -> Iterable[ObjId]: - """Get random ids of existing contents. - - This method is used in order to get random ids to perform - content integrity verifications on random contents. - - Args: - batch_size: Number of ids that will be given - - Yields: - ids of contents that are in the current object storage. - - """ - ... - def __iter__(self) -> Iterator[ObjId]: ... diff --git a/swh/objstorage/multiplexer/filter/filter.py b/swh/objstorage/multiplexer/filter/filter.py --- a/swh/objstorage/multiplexer/filter/filter.py +++ b/swh/objstorage/multiplexer/filter/filter.py @@ -72,6 +72,3 @@ def delete(self, obj_id, *args, **kwargs): return self.storage.delete(obj_id, *args, **kwargs) - - def get_random(self, batch_size, *args, **kwargs): - return self.storage.get_random(batch_size, *args, **kwargs) diff --git a/swh/objstorage/multiplexer/filter/id_filter.py b/swh/objstorage/multiplexer/filter/id_filter.py --- a/swh/objstorage/multiplexer/filter/id_filter.py +++ b/swh/objstorage/multiplexer/filter/id_filter.py @@ -54,11 +54,6 @@ return self.storage.check(*args, obj_id=obj_id, **kwargs) raise ObjNotFoundError(obj_id) - def get_random(self, *args, **kwargs): - yield from filter( - lambda id: self.is_valid(id), self.storage.get_random(*args, **kwargs) - ) - class RegexIdObjStorageFilter(IdObjStorageFilter): """Filter that allow operations if the content's id as hex match a regex.""" diff --git a/swh/objstorage/multiplexer/multiplexer_objstorage.py b/swh/objstorage/multiplexer/multiplexer_objstorage.py --- a/swh/objstorage/multiplexer/multiplexer_objstorage.py +++ b/swh/objstorage/multiplexer/multiplexer_objstorage.py @@ -4,9 +4,8 @@ # See top-level LICENSE file for more information import queue -import random import threading -from typing import Dict, Iterable +from typing import Dict from swh.objstorage.exc import ObjNotFoundError from swh.objstorage.interface import ObjId @@ -307,20 +306,3 @@ def delete(self, obj_id: ObjId): super().delete(obj_id) # Check delete permission return all(self.wrap_call(self.get_write_threads(obj_id), "delete", obj_id)) - - def get_random(self, batch_size: int) -> Iterable[ObjId]: - storages_set = [storage for storage in self.storages if len(storage) > 0] - if len(storages_set) <= 0: - return [] - - while storages_set: - storage = random.choice(storages_set) - try: - return storage.get_random(batch_size) - except NotImplementedError: - storages_set.remove(storage) - # There is no storage that allow the get_random operation - raise NotImplementedError( - "There is no storage implementation into the multiplexer that " - "support the 'get_random' operation" - ) diff --git a/swh/objstorage/objstorage.py b/swh/objstorage/objstorage.py --- a/swh/objstorage/objstorage.py +++ b/swh/objstorage/objstorage.py @@ -7,7 +7,7 @@ import bz2 from itertools import dropwhile, islice import lzma -from typing import Callable, Dict, Iterable, Iterator, List, Optional +from typing import Callable, Dict, Iterator, List, Optional import zlib from swh.model import hashutil @@ -121,9 +121,6 @@ if not self.allow_delete: raise PermissionError("Delete is not allowed.") - def get_random(self, batch_size: int) -> Iterable[ObjId]: - pass - def list_content( self: ObjStorageInterface, last_obj_id: Optional[ObjId] = None, diff --git a/swh/objstorage/tests/test_multiplexer_filter.py b/swh/objstorage/tests/test_multiplexer_filter.py --- a/swh/objstorage/tests/test_multiplexer_filter.py +++ b/swh/objstorage/tests/test_multiplexer_filter.py @@ -75,12 +75,6 @@ self.storage.check(self.invalid_id) self.storage.check(self.valid_id) - def test_can_get_random(self): - self.assertEqual(1, len(list(self.storage.get_random(1)))) - self.assertEqual( - len(list(self.storage)), len(set(self.storage.get_random(1000))) - ) - def test_cannot_add(self): new_id = self.storage.add(b"New content") result = self.storage.add(self.valid_content, self.valid_id) @@ -256,19 +250,6 @@ with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_invalid_id) - def test_get_random(self): - self.assertEqual(0, len(list(self.storage.get_random(0)))) - - random_content = list(self.storage.get_random(1000)) - self.assertIn(self.present_valid_id, random_content) - self.assertNotIn(self.present_invalid_id, random_content) - self.assertNotIn(self.missing_valid_id, random_content) - self.assertNotIn(self.missing_invalid_id, random_content) - self.assertIn(self.present_corrupted_valid_id, random_content) - self.assertNotIn(self.present_corrupted_invalid_id, random_content) - self.assertNotIn(self.missing_corrupted_valid_id, random_content) - self.assertNotIn(self.missing_corrupted_invalid_id, random_content) - def test_add(self): # Add valid and invalid contents to the storage and check their # presence with the unfiltered storage. diff --git a/swh/objstorage/tests/test_objstorage_multiplexer.py b/swh/objstorage/tests/test_objstorage_multiplexer.py --- a/swh/objstorage/tests/test_objstorage_multiplexer.py +++ b/swh/objstorage/tests/test_objstorage_multiplexer.py @@ -53,13 +53,6 @@ self.storage_v2.allow_delete = True super().test_delete_present() - def test_get_random_contents(self): - content, obj_id = self.hash_content(b"get_random_content") - self.storage.add(content, obj_id=obj_id) - random_contents = list(self.storage.get_random(1)) - self.assertEqual(1, len(random_contents)) - self.assertIn(obj_id, random_contents) - def test_access_readonly(self): # Add a content to the readonly storage content, obj_id = self.hash_content(b"content in read-only") diff --git a/swh/objstorage/tests/test_objstorage_pathslicing.py b/swh/objstorage/tests/test_objstorage_pathslicing.py --- a/swh/objstorage/tests/test_objstorage_pathslicing.py +++ b/swh/objstorage/tests/test_objstorage_pathslicing.py @@ -69,13 +69,6 @@ error.exception.args, ) - def test_get_random_contents(self): - content, obj_id = self.hash_content(b"get_random_content") - self.storage.add(content, obj_id=obj_id) - random_contents = list(self.storage.get_random(1)) - self.assertEqual(1, len(random_contents)) - self.assertIn(obj_id, random_contents) - def test_iterate_from(self): all_ids = [] for i in range(100):