diff --git a/swh/storage/proxies/filter.py b/swh/storage/proxies/filter.py --- a/swh/storage/proxies/filter.py +++ b/swh/storage/proxies/filter.py @@ -6,7 +6,14 @@ from typing import Dict, Iterable, List, Set -from swh.model.model import Content, Directory, Revision, Sha1Git, SkippedContent +from swh.model.model import ( + Content, + Directory, + Release, + Revision, + Sha1Git, + SkippedContent, +) from swh.storage import get_storage from swh.storage.interface import StorageInterface @@ -27,7 +34,7 @@ """ - object_types = ["content", "skipped_content", "directory", "revision"] + object_types = ["content", "skipped_content", "directory", "revision", "release"] def __init__(self, storage): self.storage: StorageInterface = get_storage(**storage) @@ -38,27 +45,59 @@ return getattr(self.storage, key) def content_add(self, content: List[Content]) -> Dict[str, int]: + empty_stat = { + "content:add": 0, + "content:add:bytes": 0, + } + if not content: + return empty_stat contents_to_add = self._filter_missing_contents(content) + if not contents_to_add: + return empty_stat return self.storage.content_add( [x for x in content if x.sha256 in contents_to_add] ) def skipped_content_add(self, content: List[SkippedContent]) -> Dict[str, int]: + empty_stat = {"skipped_content:add": 0} + if not content: + return empty_stat contents_to_add = self._filter_missing_skipped_contents(content) + if not contents_to_add and not any(c.sha1_git is None for c in content): + return empty_stat return self.storage.skipped_content_add( [x for x in content if x.sha1_git is None or x.sha1_git in contents_to_add] ) def directory_add(self, directories: List[Directory]) -> Dict[str, int]: + empty_stat = {"directory:add": 0} + if not directories: + return empty_stat missing_ids = self._filter_missing_ids("directory", (d.id for d in directories)) + if not missing_ids: + return empty_stat return self.storage.directory_add( [d for d in directories if d.id in missing_ids] ) def revision_add(self, revisions: List[Revision]) -> Dict[str, int]: + empty_stat = {"revision:add": 0} + if not revisions: + return empty_stat missing_ids = self._filter_missing_ids("revision", (r.id for r in revisions)) + if not missing_ids: + return empty_stat return self.storage.revision_add([r for r in revisions if r.id in missing_ids]) + def release_add(self, releases: List[Release]) -> Dict[str, int]: + empty_stat = {"release:add": 0} + if not releases: + return empty_stat + missing_ids = self._filter_missing_ids("release", (r.id for r in releases)) + if not missing_ids: + return empty_stat + return self.storage.release_add([r for r in releases if r.id in missing_ids]) + def _filter_missing_contents(self, contents: List[Content]) -> Set[bytes]: """Return only the content keys missing from swh @@ -103,14 +142,4 @@ Missing ids from the storage for object_type """ - missing_ids = [] - for id in ids: - missing_ids.append(id) - - fn_by_object_type = { - "revision": self.storage.revision_missing, - "directory": self.storage.directory_missing, - } - - fn = fn_by_object_type[object_type] - return set(fn(missing_ids)) + return set(getattr(self.storage, f"{object_type}_missing")(list(ids))) diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -620,3 +620,13 @@ methods_called, ) prev = cur + + +def test_buffer_empty_batches() -> None: + "Flushing an empty buffer storage doesn't call any underlying _add method" + storage = get_storage_with_buffer_config() + storage.storage = mocked_storage = Mock(wraps=storage.storage) + + storage.flush() + methods_called = {c[0] for c in mocked_storage.method_calls} + assert methods_called == {"flush", "clear_buffers"} diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py --- a/swh/storage/tests/test_filter.py +++ b/swh/storage/tests/test_filter.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from unittest.mock import Mock + import attr import pytest @@ -110,6 +112,26 @@ } +def test_filtering_proxy_storage_release(swh_storage, sample_data): + sample_release = sample_data.release + + release = swh_storage.release_get([sample_release.id])[0] + assert release is None + + s = swh_storage.release_add([sample_release]) + assert s == { + "release:add": 1, + } + + release = swh_storage.release_get([sample_release.id])[0] + assert release is not None + + s = swh_storage.release_add([sample_release]) + assert s == { + "release:add": 0, + } + + def test_filtering_proxy_storage_directory(swh_storage, sample_data): sample_directory = sample_data.directory @@ -128,3 +150,32 @@ assert s == { "directory:add": 0, } + + +def test_filtering_proxy_storage_empty_list(swh_storage, sample_data): + swh_storage.storage = mock_storage = Mock(wraps=swh_storage.storage) + + calls = 0 + for object_type in swh_storage.object_types: + calls += 1 + + method_name = f"{object_type}_add" + method = getattr(swh_storage, method_name) + one_object = getattr(sample_data, object_type) + + # Call with empty list: ensure underlying storage not called + method([]) + assert method_name not in {c[0] for c in mock_storage.method_calls} + mock_storage.reset_mock() + + # Call with an object: ensure underlying storage is called + method([one_object]) + assert method_name in {c[0] for c in mock_storage.method_calls} + mock_storage.reset_mock() + + # Call with the same object: ensure underlying storage is not called again + method([one_object]) + assert method_name not in {c[0] for c in mock_storage.method_calls} + mock_storage.reset_mock() + + assert calls > 0, "Empty list never tested"