diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py --- a/swh/storage/__init__.py +++ b/swh/storage/__init__.py @@ -14,7 +14,6 @@ "filter", "buffer", "retry", - "validate", "cassandra", } @@ -66,8 +65,6 @@ from .buffer import BufferingProxyStorage as Storage elif cls == "retry": from .retry import RetryingProxyStorage as Storage - elif cls == "validate": - from .validate import ValidatingProxyStorage as Storage return Storage(**kwargs) diff --git a/swh/storage/converters.py b/swh/storage/converters.py --- a/swh/storage/converters.py +++ b/swh/storage/converters.py @@ -67,8 +67,8 @@ def db_to_git_headers(db_git_headers): ret = [] for key, value in db_git_headers: - ret.append([key.encode("utf-8"), encode_with_unescape(value)]) - return ret + ret.append(tuple[key.encode("utf-8"), encode_with_unescape(value)]) + return tuple(ret) def db_to_date(date, offset, neg_utc_offset): @@ -223,7 +223,7 @@ "metadata": metadata, "synthetic": db_revision["synthetic"], "extra_headers": extra_headers, - "parents": parents, + "parents": tuple(parents), } if "object_id" in db_revision: diff --git a/swh/storage/tests/conftest.py b/swh/storage/tests/conftest.py --- a/swh/storage/tests/conftest.py +++ b/swh/storage/tests/conftest.py @@ -17,7 +17,6 @@ from swh.model.model import BaseContent, Origin from swh.model.tests.generate_testdata import gen_contents, gen_origins -from swh.storage import get_storage from swh.storage.interface import StorageInterface # define tests profile. Full documentation is at: @@ -49,11 +48,6 @@ yield {**swh_storage_backend_config, "journal_writer": {"cls": "memory",}} -@pytest.fixture -def swh_storage(swh_storage_backend_config): - return get_storage(cls="validate", storage=swh_storage_backend_config) - - @pytest.fixture def swh_contents(swh_storage: StorageInterface) -> Iterable[BaseContent]: contents = [BaseContent.from_dict(c) for c in gen_contents(n=20)] diff --git a/swh/storage/tests/test_api_client.py b/swh/storage/tests/test_api_client.py --- a/swh/storage/tests/test_api_client.py +++ b/swh/storage/tests/test_api_client.py @@ -34,7 +34,10 @@ @pytest.fixture def swh_rpc_client_class(): def storage_factory(**kwargs): - storage_config = {"cls": "validate", "storage": {"cls": "remote", **kwargs,}} + storage_config = { + "cls": "remote", + **kwargs, + } return get_storage(**storage_config) return storage_factory diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -13,7 +13,6 @@ MetadataTargetType, ) -from swh.storage import get_storage from swh.storage.exc import HashCollision, StorageArgumentException from .storage_data import date_visit1 @@ -46,16 +45,6 @@ } -@pytest.fixture -def swh_storage(swh_storage_backend_config): - return get_storage(**swh_storage_backend_config) - - -@pytest.fixture -def swh_storage_validate(swh_storage_backend_config): - return get_storage(cls="validate", storage=swh_storage_backend_config) - - def test_retrying_proxy_storage_content_add(swh_storage, sample_data_model): """Standard content_add works as before @@ -346,33 +335,23 @@ ) -def test_retrying_proxy_storage_metadata_fetcher_add( - swh_storage_validate, sample_data_model -): +def test_retrying_proxy_storage_metadata_fetcher_add(swh_storage, sample_data_model): """Standard metadata_fetcher_add works as before """ fetcher = sample_data_model["fetcher"][0] - metadata_fetcher = swh_storage_validate.metadata_fetcher_get( - fetcher.name, fetcher.version - ) + metadata_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not metadata_fetcher - swh_storage_validate.metadata_fetcher_add([fetcher]) + swh_storage.metadata_fetcher_add([fetcher]) - actual_fetcher = swh_storage_validate.metadata_fetcher_get( - fetcher.name, fetcher.version - ) + actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert actual_fetcher == fetcher def test_retrying_proxy_storage_metadata_fetcher_add_with_retry( - monkeypatch_sleep, - swh_storage_validate, - sample_data_model, - mocker, - fake_hash_collision, + monkeypatch_sleep, swh_storage, sample_data_model, mocker, fake_hash_collision, ): """Multiple retries for hash collision and psycopg2 error but finally ok @@ -390,12 +369,10 @@ [fetcher], ] - actual_fetcher = swh_storage_validate.metadata_fetcher_get( - fetcher.name, fetcher.version - ) + actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not actual_fetcher - swh_storage_validate.metadata_fetcher_add([fetcher]) + swh_storage.metadata_fetcher_add([fetcher]) mock_memory.assert_has_calls( [call([fetcher]), call([fetcher]), call([fetcher]),] @@ -403,7 +380,7 @@ def test_retrying_proxy_swh_storage_metadata_fetcher_add_failure( - swh_storage_validate, sample_data_model, mocker + swh_storage, sample_data_model, mocker ): """Unfiltered errors are raising without retry @@ -417,43 +394,31 @@ fetcher = sample_data_model["fetcher"][0] - actual_fetcher = swh_storage_validate.metadata_fetcher_get( - fetcher.name, fetcher.version - ) + actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not actual_fetcher with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage_validate.metadata_fetcher_add([fetcher]) + swh_storage.metadata_fetcher_add([fetcher]) assert mock_memory.call_count == 1 -def test_retrying_proxy_storage_metadata_authority_add( - swh_storage_validate, sample_data_model -): +def test_retrying_proxy_storage_metadata_authority_add(swh_storage, sample_data_model): """Standard metadata_authority_add works as before """ authority = sample_data_model["authority"][0] - assert not swh_storage_validate.metadata_authority_get( - authority.type, authority.url - ) + assert not swh_storage.metadata_authority_get(authority.type, authority.url) - swh_storage_validate.metadata_authority_add([authority]) + swh_storage.metadata_authority_add([authority]) - actual_authority = swh_storage_validate.metadata_authority_get( - authority.type, authority.url - ) + actual_authority = swh_storage.metadata_authority_get(authority.type, authority.url) assert actual_authority == authority def test_retrying_proxy_storage_metadata_authority_add_with_retry( - monkeypatch_sleep, - swh_storage_validate, - sample_data_model, - mocker, - fake_hash_collision, + monkeypatch_sleep, swh_storage, sample_data_model, mocker, fake_hash_collision, ): """Multiple retries for hash collision and psycopg2 error but finally ok @@ -472,11 +437,9 @@ None, ] - assert not swh_storage_validate.metadata_authority_get( - authority.type, authority.url - ) + assert not swh_storage.metadata_authority_get(authority.type, authority.url) - swh_storage_validate.metadata_authority_add([authority]) + swh_storage.metadata_authority_add([authority]) mock_memory.assert_has_calls( [call([authority]), call([authority]), call([authority])] @@ -484,7 +447,7 @@ def test_retrying_proxy_swh_storage_metadata_authority_add_failure( - swh_storage_validate, sample_data_model, mocker + swh_storage, sample_data_model, mocker ): """Unfiltered errors are raising without retry @@ -498,47 +461,41 @@ authority = sample_data_model["authority"][0] - swh_storage_validate.metadata_authority_get(authority.type, authority.url) + swh_storage.metadata_authority_get(authority.type, authority.url) with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage_validate.metadata_authority_add([authority]) + swh_storage.metadata_authority_add([authority]) assert mock_memory.call_count == 1 -def test_retrying_proxy_storage_object_metadata_add( - swh_storage_validate, sample_data_model -): +def test_retrying_proxy_storage_object_metadata_add(swh_storage, sample_data_model): """Standard object_metadata_add works as before """ origin = sample_data_model["origin"][0] ori_meta = sample_data_model["origin_metadata"][0] assert origin.url == ori_meta.id - swh_storage_validate.origin_add([origin]) - swh_storage_validate.metadata_authority_add([sample_data_model["authority"][0]]) - swh_storage_validate.metadata_fetcher_add([sample_data_model["fetcher"][0]]) + swh_storage.origin_add([origin]) + swh_storage.metadata_authority_add([sample_data_model["authority"][0]]) + swh_storage.metadata_fetcher_add([sample_data_model["fetcher"][0]]) - origin_metadata = swh_storage_validate.object_metadata_get( + origin_metadata = swh_storage.object_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority ) assert origin_metadata["next_page_token"] is None assert not origin_metadata["results"] - swh_storage_validate.object_metadata_add([ori_meta]) + swh_storage.object_metadata_add([ori_meta]) - origin_metadata = swh_storage_validate.object_metadata_get( + origin_metadata = swh_storage.object_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority ) assert origin_metadata def test_retrying_proxy_storage_object_metadata_add_with_retry( - monkeypatch_sleep, - swh_storage_validate, - sample_data_model, - mocker, - fake_hash_collision, + monkeypatch_sleep, swh_storage, sample_data_model, mocker, fake_hash_collision, ): """Multiple retries for hash collision and psycopg2 error but finally ok @@ -546,9 +503,9 @@ origin = sample_data_model["origin"][0] ori_meta = sample_data_model["origin_metadata"][0] assert origin.url == ori_meta.id - swh_storage_validate.origin_add([origin]) - swh_storage_validate.metadata_authority_add([sample_data_model["authority"][0]]) - swh_storage_validate.metadata_fetcher_add([sample_data_model["fetcher"][0]]) + swh_storage.origin_add([origin]) + swh_storage.metadata_authority_add([sample_data_model["authority"][0]]) + swh_storage.metadata_fetcher_add([sample_data_model["fetcher"][0]]) mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.object_metadata_add" ) @@ -563,7 +520,7 @@ ] # No exception raised as insertion finally came through - swh_storage_validate.object_metadata_add([ori_meta]) + swh_storage.object_metadata_add([ori_meta]) mock_memory.assert_has_calls( [ # 3 calls, as long as error raised @@ -575,7 +532,7 @@ def test_retrying_proxy_swh_storage_object_metadata_add_failure( - swh_storage_validate, sample_data_model, mocker + swh_storage, sample_data_model, mocker ): """Unfiltered errors are raising without retry @@ -588,10 +545,10 @@ origin = sample_data_model["origin"][0] ori_meta = sample_data_model["origin_metadata"][0] assert origin.url == ori_meta.id - swh_storage_validate.origin_add([origin]) + swh_storage.origin_add([origin]) with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage_validate.object_metadata_add([ori_meta]) + swh_storage.object_metadata_add([ori_meta]) assert mock_memory.call_count == 1 diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -17,7 +17,6 @@ from unittest.mock import Mock import attr -import psycopg2 import pytest from hypothesis import given, strategies, settings, HealthCheck @@ -223,8 +222,7 @@ insertion_start_time = now() - # bypass the validation proxy for now, to directly put a dict - actual_result = swh_storage.storage.content_add([lazy_content]) + actual_result = swh_storage.content_add([lazy_content]) insertion_end_time = now() @@ -254,33 +252,6 @@ swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["content"] == 1 - def test_content_add_validation(self, swh_storage, sample_data_model): - cont = sample_data_model["content"][0].to_dict() - - with pytest.raises(StorageArgumentException, match="status"): - swh_storage.content_add([{**cont, "status": "absent"}]) - - with pytest.raises(StorageArgumentException, match="status"): - swh_storage.content_add([{**cont, "status": "foobar"}]) - - with pytest.raises(StorageArgumentException, match="(?i)length"): - swh_storage.content_add([{**cont, "length": -2}]) - - with pytest.raises(StorageArgumentException, match="reason"): - swh_storage.content_add([{**cont, "reason": "foobar"}]) - - def test_skipped_content_add_validation(self, swh_storage, sample_data_model): - cont = attr.evolve(sample_data_model["content"][0], data=None).to_dict() - - with pytest.raises(StorageArgumentException, match="status"): - swh_storage.skipped_content_add([{**cont, "status": "visible"}]) - - with pytest.raises(StorageArgumentException, match="reason") as cm: - swh_storage.skipped_content_add([{**cont, "status": "absent"}]) - - if type(cm.value) == psycopg2.IntegrityError: - assert cm.exception.pgcode == psycopg2.errorcodes.NOT_NULL_VIOLATION - def test_content_get_missing(self, swh_storage, sample_data_model): cont, cont2 = sample_data_model["content"][:2] @@ -366,7 +337,7 @@ def test_content_update(self, swh_storage, sample_data_model): cont1 = sample_data_model["content"][0] - if hasattr(swh_storage, "storage"): + if hasattr(swh_storage, "journal_writer"): swh_storage.journal_writer.journal = None # TODO, not supported swh_storage.content_add([cont1]) @@ -739,23 +710,6 @@ swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["directory"] == 1 - def test_directory_add_validation(self, swh_storage, sample_data_model): - directory = sample_data_model["directory"][1] - dir_ = directory.to_dict() - dir_["entries"][0]["type"] = "foobar" - - with pytest.raises(StorageArgumentException, match="type.*foobar"): - swh_storage.directory_add([dir_]) - - dir_ = directory.to_dict() - del dir_["entries"][0]["target"] - - with pytest.raises(StorageArgumentException, match="target") as cm: - swh_storage.directory_add([dir_]) - - if type(cm.value) == psycopg2.IntegrityError: - assert cm.value.pgcode == psycopg2.errorcodes.NOT_NULL_VIOLATION - def test_directory_add_twice(self, swh_storage, sample_data_model): directory = sample_data_model["directory"][1] @@ -956,36 +910,6 @@ swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["revision"] == 1 - def test_revision_add_validation(self, swh_storage, sample_data_model): - revision = sample_data_model["revision"][0] - - rev = revision.to_dict() - rev["date"]["offset"] = 2 ** 16 - - with pytest.raises(StorageArgumentException, match="offset") as cm: - swh_storage.revision_add([rev]) - - if type(cm.value) == psycopg2.DataError: - assert cm.value.pgcode == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - - rev = revision.to_dict() - rev["committer_date"]["offset"] = 2 ** 16 - - with pytest.raises(StorageArgumentException, match="offset") as cm: - swh_storage.revision_add([rev]) - - if type(cm.value) == psycopg2.DataError: - assert cm.value.pgcode == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - - rev = revision.to_dict() - rev["type"] = "foobar" - - with pytest.raises(StorageArgumentException, match="(?i)type") as cm: - swh_storage.revision_add([rev]) - - if type(cm.value) == psycopg2.DataError: - assert cm.value.pgcode == psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION - def test_revision_add_twice(self, swh_storage, sample_data_model): revision, revision2 = sample_data_model["revision"][:2] @@ -1183,34 +1107,6 @@ ("release", release) ] - def test_release_add_validation(self, swh_storage, sample_data_model): - release = sample_data_model["release"][0] - - rel = release.to_dict() - rel["date"]["offset"] = 2 ** 16 - - with pytest.raises(StorageArgumentException, match="offset") as cm: - swh_storage.release_add([rel]) - - if type(cm.value) == psycopg2.DataError: - assert cm.value.pgcode == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - - rel = release.to_dict() - rel["author"] = None - - with pytest.raises(StorageArgumentException, match="date") as cm: - swh_storage.release_add([rel]) - - if type(cm.value) == psycopg2.IntegrityError: - assert cm.value.pgcode == psycopg2.errorcodes.CHECK_VIOLATION - - def test_release_add_validation_type(self, swh_storage, sample_data_model): - release = sample_data_model["release"][0] - rel = release.to_dict() - rel["date"]["offset"] = "toto" - with pytest.raises(StorageArgumentException): - swh_storage.release_add([rel]) - def test_release_add_twice(self, swh_storage, sample_data_model): release, release2 = sample_data_model["release"][:2] @@ -1349,17 +1245,6 @@ ) assert add2 == {"origin:add": 0} - def test_origin_add_validation(self, swh_storage): - """Incorrect formatted origin should fail the validation - - """ - with pytest.raises(StorageArgumentException, match="url"): - swh_storage.origin_add([{}]) - with pytest.raises( - StorageArgumentException, match="unexpected keyword argument" - ): - swh_storage.origin_add([{"ul": "mistyped url key"}]) - def test_origin_get_legacy(self, swh_storage, sample_data_model): origin, origin2 = sample_data_model["origin"][:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] @@ -2532,21 +2417,6 @@ ("snapshot", snapshot), ] - def test_snapshot_add_validation(self, swh_storage, sample_data_model): - snapshot = sample_data_model["snapshot"][0] - - snap = snapshot.to_dict() - snap["branches"][b"foo"] = {"target_type": "revision"} - - with pytest.raises(StorageArgumentException, match="target"): - swh_storage.snapshot_add([snap]) - - snap = snapshot.to_dict() - snap["branches"][b"foo"] = {"target": b"\x42" * 20} - - with pytest.raises(StorageArgumentException, match="target_type"): - swh_storage.snapshot_add([snap]) - def test_snapshot_add_count_branches(self, swh_storage, sample_data_model): complete_snapshot = sample_data_model["snapshot"][2] diff --git a/swh/storage/validate.py b/swh/storage/validate.py --- a/swh/storage/validate.py +++ b/swh/storage/validate.py @@ -4,21 +4,9 @@ # See top-level LICENSE file for more information import contextlib -from typing import Dict, Iterable, Iterator, Optional, Tuple, Type, TypeVar, Union -from swh.model.model import ( - SkippedContent, - Content, - Directory, - Revision, - Release, - Snapshot, - OriginVisit, - Origin, -) -from . import get_storage -from .exc import StorageArgumentException +from swh.storage.exc import StorageArgumentException VALIDATION_EXCEPTIONS = [ @@ -36,113 +24,3 @@ yield except tuple(VALIDATION_EXCEPTIONS) as e: raise StorageArgumentException(str(e)) - - -ModelObject = TypeVar( - "ModelObject", - Content, - SkippedContent, - Directory, - Revision, - Release, - Snapshot, - OriginVisit, - Origin, -) - - -def dict_converter( - model: Type[ModelObject], obj: Union[Dict, ModelObject] -) -> ModelObject: - """Convert dicts to model objects; Passes through model objects as well.""" - if isinstance(obj, dict): - with convert_validation_exceptions(): - return model.from_dict(obj) - else: - return obj - - -class ValidatingProxyStorage: - """Storage implementation converts dictionaries to swh-model objects - before calling its backend, and back to dicts before returning results - - For test purposes. - """ - - def __init__(self, storage): - self.storage = get_storage(**storage) - - def __getattr__(self, key): - if key == "storage": - raise AttributeError(key) - return getattr(self.storage, key) - - def content_add(self, content: Iterable[Union[Content, Dict]]) -> Dict: - return self.storage.content_add([dict_converter(Content, c) for c in content]) - - def content_add_metadata(self, content: Iterable[Union[Content, Dict]]) -> Dict: - return self.storage.content_add_metadata( - [dict_converter(Content, c) for c in content] - ) - - def skipped_content_add( - self, content: Iterable[Union[SkippedContent, Dict]] - ) -> Dict: - return self.storage.skipped_content_add( - [dict_converter(SkippedContent, c) for c in content] - ) - - def directory_add(self, directories: Iterable[Union[Directory, Dict]]) -> Dict: - return self.storage.directory_add( - [dict_converter(Directory, d) for d in directories] - ) - - def revision_add(self, revisions: Iterable[Union[Revision, Dict]]) -> Dict: - return self.storage.revision_add( - [dict_converter(Revision, r) for r in revisions] - ) - - def revision_get(self, revisions: Iterable[bytes]) -> Iterator[Optional[Dict]]: - rev_dicts = self.storage.revision_get(revisions) - with convert_validation_exceptions(): - for rev_dict in rev_dicts: - if rev_dict is None: - yield None - else: - yield Revision.from_dict(rev_dict).to_dict() - - def revision_log( - self, revisions: Iterable[bytes], limit: Optional[int] = None - ) -> Iterator[Dict]: - for rev_dict in self.storage.revision_log(revisions, limit): - with convert_validation_exceptions(): - rev_obj = Revision.from_dict(rev_dict) - yield rev_obj.to_dict() - - def revision_shortlog( - self, revisions: Iterable[bytes], limit: Optional[int] = None - ) -> Iterator[Tuple[bytes, Tuple]]: - for rev, parents in self.storage.revision_shortlog(revisions, limit): - yield (rev, tuple(parents)) - - def release_add(self, releases: Iterable[Union[Dict, Release]]) -> Dict: - return self.storage.release_add( - [dict_converter(Release, release) for release in releases] - ) - - def snapshot_add(self, snapshots: Iterable[Union[Dict, Snapshot]]) -> Dict: - return self.storage.snapshot_add( - [dict_converter(Snapshot, snapshot) for snapshot in snapshots] - ) - - def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]: - return self.storage.origin_visit_add(visits) - - def origin_add(self, origins: Iterable[Union[Dict, Origin]]) -> Dict[str, int]: - return self.storage.origin_add([dict_converter(Origin, o) for o in origins]) - - def clear_buffers(self, object_types: Optional[Iterable[str]] = None) -> None: - return self.storage.clear_buffers(object_types) - - def flush(self, object_types: Optional[Iterable[str]] = None) -> Dict: - return self.storage.flush(object_types)