Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_tenacious.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import Counter | |||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.model import model | from swh.model import model | ||||
from swh.model.tests.swh_model_data import TEST_OBJECTS | from swh.model.tests.swh_model_data import TEST_OBJECTS | ||||
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines | testdata = [ | ||||
), | ), | ||||
] | ] | ||||
class LimitedInMemoryStorage(InMemoryStorage): | class LimitedInMemoryStorage(InMemoryStorage): | ||||
# forbidden are 'bad1' and 'bad2' arguments of `testdata` | # forbidden are 'bad1' and 'bad2' arguments of `testdata` | ||||
forbidden = [x[0][2] for x in testdata] + [x[0][3] for x in testdata] | forbidden = [x[0][2] for x in testdata] + [x[0][3] for x in testdata] | ||||
def __init__(self, *args, **kw): | |||||
self.add_calls = Counter() | |||||
super().__init__(*args, **kw) | |||||
def reset(self): | |||||
super().reset() | |||||
self.add_calls.clear() | |||||
def content_add(self, contents): | def content_add(self, contents): | ||||
return self._maybe_add(super().content_add, "content", contents) | return self._maybe_add(super().content_add, "content", contents) | ||||
def skipped_content_add(self, skipped_contents): | def skipped_content_add(self, skipped_contents): | ||||
return self._maybe_add( | return self._maybe_add( | ||||
super().skipped_content_add, "skipped_content", skipped_contents | super().skipped_content_add, "skipped_content", skipped_contents | ||||
) | ) | ||||
def origin_add(self, origins): | def origin_add(self, origins): | ||||
return self._maybe_add(super().origin_add, "origin", origins) | return self._maybe_add(super().origin_add, "origin", origins) | ||||
def directory_add(self, directories): | def directory_add(self, directories): | ||||
return self._maybe_add(super().directory_add, "directory", directories) | return self._maybe_add(super().directory_add, "directory", directories) | ||||
def revision_add(self, revisions): | def revision_add(self, revisions): | ||||
return self._maybe_add(super().revision_add, "revision", revisions) | return self._maybe_add(super().revision_add, "revision", revisions) | ||||
def release_add(self, releases): | def release_add(self, releases): | ||||
return self._maybe_add(super().release_add, "release", releases) | return self._maybe_add(super().release_add, "release", releases) | ||||
def snapshot_add(self, snapshots): | def snapshot_add(self, snapshots): | ||||
return self._maybe_add(super().snapshot_add, "snapshot", snapshots) | return self._maybe_add(super().snapshot_add, "snapshot", snapshots) | ||||
def _maybe_add(self, add_func, object_type, objects): | def _maybe_add(self, add_func, object_type, objects): | ||||
# forbidden = [c.id for c in collections[object_type]] | self.add_calls[object_type] += 1 | ||||
if any(c in self.forbidden for c in objects): | if any(c in self.forbidden for c in objects): | ||||
raise ValueError( | raise ValueError( | ||||
f"{object_type} is forbidden", | f"{object_type} is forbidden", | ||||
[c.unique_key() for c in objects if c in self.forbidden], | [c.unique_key() for c in objects if c in self.forbidden], | ||||
) | ) | ||||
return add_func(objects) | return add_func(objects) | ||||
@patch("swh.storage.in_memory.InMemoryStorage", LimitedInMemoryStorage) | @patch("swh.storage.in_memory.InMemoryStorage", LimitedInMemoryStorage) | ||||
@pytest.mark.parametrize("object_type, objects, bad1, bad2", testdata) | @pytest.mark.parametrize("object_type, objects, bad1, bad2", testdata) | ||||
def test_tenacious_proxy_storage(object_type, objects, bad1, bad2): | def test_tenacious_proxy_storage(object_type, objects, bad1, bad2): | ||||
storage = get_tenacious_storage() | storage = get_tenacious_storage() | ||||
tenacious = storage.storage | tenacious = storage.storage | ||||
in_memory = tenacious.storage | in_memory = tenacious.storage | ||||
assert isinstance(tenacious, TenaciousProxyStorage) | assert isinstance(tenacious, TenaciousProxyStorage) | ||||
assert isinstance(in_memory, LimitedInMemoryStorage) | assert isinstance(in_memory, LimitedInMemoryStorage) | ||||
size = len(objects) | size = len(objects) | ||||
add_func = getattr(storage, f"{object_type}_add") | add_func = getattr(storage, f"{object_type}_add") | ||||
# Note: when checking the LimitedInMemoryStorage.add_calls counter, it's | |||||
# hard to guess the exact number of calls in the end (depends on the size | |||||
# of batch and the position of bad objects in this batch). So we will only | |||||
# check a lower limit of the form (n + m), where n is the minimum expected | |||||
# number of additions (due to the batch begin split), and m is the fact | |||||
# that bad objects are tried (individually) several (3) times before giving | |||||
# up. So for one bad object, m is 3; for 2 bad objects, m is 6, etc. | |||||
s = add_func(objects) | s = add_func(objects) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 0 | assert s.get(f"{object_type}:add:errors", 0) == 0 | ||||
assert storage.add_calls[object_type] == (1 + 0) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 is the last element | # bad1 is the last element | ||||
s = add_func(objects + [bad1]) | s = add_func(objects + [bad1]) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 1 | assert s.get(f"{object_type}:add:errors", 0) == 1 | ||||
assert storage.add_calls[object_type] >= (2 + 3) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 and bad2 are the last elements | # bad1 and bad2 are the last elements | ||||
s = add_func(objects + [bad1, bad2]) | s = add_func(objects + [bad1, bad2]) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 2 | assert s.get(f"{object_type}:add:errors", 0) == 2 | ||||
assert storage.add_calls[object_type] >= (3 + 6) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 is the first element | # bad1 is the first element | ||||
s = add_func([bad1] + objects) | s = add_func([bad1] + objects) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 1 | assert s.get(f"{object_type}:add:errors", 0) == 1 | ||||
assert storage.add_calls[object_type] >= (2 + 3) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 and bad2 are the first elements | # bad1 and bad2 are the first elements | ||||
s = add_func([bad1, bad2] + objects) | s = add_func([bad1, bad2] + objects) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 2 | assert s.get(f"{object_type}:add:errors", 0) == 2 | ||||
assert storage.add_calls[object_type] >= (3 + 6) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 is in the middle of the list of inserted elements | # bad1 is in the middle of the list of inserted elements | ||||
s = add_func(objects[: size // 2] + [bad1] + objects[size // 2 :]) | s = add_func(objects[: size // 2] + [bad1] + objects[size // 2 :]) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 1 | assert s.get(f"{object_type}:add:errors", 0) == 1 | ||||
assert storage.add_calls[object_type] >= (3 + 3) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 and bad2 are together in the middle of the list of inserted elements | # bad1 and bad2 are together in the middle of the list of inserted elements | ||||
s = add_func(objects[: size // 2] + [bad1, bad2] + objects[size // 2 :]) | s = add_func(objects[: size // 2] + [bad1, bad2] + objects[size // 2 :]) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 2 | assert s.get(f"{object_type}:add:errors", 0) == 2 | ||||
assert storage.add_calls[object_type] >= (3 + 6) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 and bad2 are spread in the middle of the list of inserted elements | # bad1 and bad2 are spread in the middle of the list of inserted elements | ||||
s = add_func( | s = add_func( | ||||
objects[: size // 3] | objects[: size // 3] | ||||
+ [bad1] | + [bad1] | ||||
+ objects[size // 3 : 2 * (size // 3)] | + objects[size // 3 : 2 * (size // 3)] | ||||
+ [bad2] | + [bad2] | ||||
+ objects[2 * (size // 3) :] | + objects[2 * (size // 3) :] | ||||
) | ) | ||||
assert s.get(f"{object_type}:add", 0) == size | assert s.get(f"{object_type}:add", 0) == size | ||||
assert s.get(f"{object_type}:add:errors", 0) == 2 | assert s.get(f"{object_type}:add:errors", 0) == 2 | ||||
assert storage.add_calls[object_type] >= (3 + 6) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 is the only element | # bad1 is the only element | ||||
s = add_func([bad1]) | s = add_func([bad1]) | ||||
assert s.get(f"{object_type}:add", 0) == 0 | assert s.get(f"{object_type}:add", 0) == 0 | ||||
assert s.get(f"{object_type}:add:errors", 0) == 1 | assert s.get(f"{object_type}:add:errors", 0) == 1 | ||||
assert storage.add_calls[object_type] == (0 + 3) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
# bad1 and bad2 are the only elements | # bad1 and bad2 are the only elements | ||||
s = add_func([bad1, bad2]) | s = add_func([bad1, bad2]) | ||||
assert s.get(f"{object_type}:add", 0) == 0 | assert s.get(f"{object_type}:add", 0) == 0 | ||||
assert s.get(f"{object_type}:add:errors", 0) == 2 | assert s.get(f"{object_type}:add:errors", 0) == 2 | ||||
assert storage.add_calls[object_type] == (1 + 6) | |||||
in_memory.reset() | in_memory.reset() | ||||
tenacious.reset() | tenacious.reset() | ||||
@patch("swh.storage.in_memory.InMemoryStorage", LimitedInMemoryStorage) | @patch("swh.storage.in_memory.InMemoryStorage", LimitedInMemoryStorage) | ||||
@pytest.mark.parametrize("object_type, objects, bad1, bad2", testdata) | @pytest.mark.parametrize("object_type, objects, bad1, bad2", testdata) | ||||
def test_tenacious_proxy_storage_rate_limit(object_type, objects, bad1, bad2): | def test_tenacious_proxy_storage_rate_limit(object_type, objects, bad1, bad2): | ||||
storage = get_tenacious_storage(error_rate_limit={"errors": 1, "window_size": 3}) | storage = get_tenacious_storage(error_rate_limit={"errors": 1, "window_size": 3}) | ||||
▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines |