diff --git a/requirements-swh-journal.txt b/requirements-swh-journal.txt --- a/requirements-swh-journal.txt +++ b/requirements-swh-journal.txt @@ -1 +1 @@ -swh.journal >= 0.1.0 +swh.journal >= 0.2 diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ swh.core[db,http] >= 0.0.94 -swh.model >= 0.0.66 +swh.model >= 0.3 swh.objstorage >= 0.0.40 diff --git a/swh/storage/cassandra/converters.py b/swh/storage/cassandra/converters.py --- a/swh/storage/cassandra/converters.py +++ b/swh/storage/cassandra/converters.py @@ -5,7 +5,7 @@ from copy import deepcopy import json -from typing import Any, Dict, List +from typing import Any, Dict, Tuple import attr @@ -38,7 +38,7 @@ return db_revision -def revision_from_db(db_revision: Row, parents: List[Sha1Git]) -> Revision: +def revision_from_db(db_revision: Row, parents: Tuple[Sha1Git]) -> Revision: revision = db_revision._asdict() # type: ignore metadata = json.loads(revision.pop("metadata", None)) if metadata and "extra_headers" in metadata: diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -458,7 +458,7 @@ parent_rows = self._cql_runner.revision_parent_get(row.id) # parent_rank is the clustering key, so results are already # sorted by rank. - parents = [row.parent_id for row in parent_rows] + parents = tuple([row.parent_id for row in parent_rows]) rev = revision_from_db(row, parents=parents) revs[rev.id] = rev.to_dict() @@ -490,7 +490,7 @@ # parent_rank is the clustering key, so results are already # sorted by rank. - parents = [row.parent_id for row in parent_rows] + parents = tuple([row.parent_id for row in parent_rows]) if short: yield (row.id, parents) diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -113,7 +113,7 @@ dir = { "id": hash_to_bytes("340133423253310030f531e632a733ff37c3a930"), - "entries": [ + "entries": ( { "name": b"foo", "type": "file", @@ -126,12 +126,12 @@ "target": b"12345678901234567890", "perms": from_disk.DentryPerms.directory, }, - ], + ), } dir2 = { "id": hash_to_bytes("340133423253310030f531e632a733ff37c3a935"), - "entries": [ + "entries": ( { "name": b"oof", "type": "file", @@ -139,13 +139,13 @@ "36fade77193cb6d2bd826161a0979d64c28ab4fa" ), "perms": from_disk.DentryPerms.content, - } - ], + }, + ), } dir3 = { "id": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8921"), - "entries": [ + "entries": ( { "name": b"foo", "type": "file", @@ -164,19 +164,19 @@ "target": b"12345678901234567890", "perms": from_disk.DentryPerms.content, }, - ], + ), } dir4 = { "id": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8922"), - "entries": [ + "entries": ( { "name": b"subdir1", "type": "dir", "target": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8921"), # dir3 "perms": from_disk.DentryPerms.directory, }, - ], + ), } dierctories = (dir, dir2, dir3, dir4) @@ -208,7 +208,7 @@ "offset": 0, "negative_utc": True, }, - "parents": [b"01234567890123456789", b"23434512345123456789"], + "parents": (b"01234567890123456789", b"23434512345123456789"), "type": "git", "directory": hash_to_bytes("340133423253310030f531e632a733ff37c3a930"), # dir "metadata": { @@ -245,7 +245,7 @@ "offset": 0, "negative_utc": False, }, - "parents": [b"01234567890123456789"], + "parents": (b"01234567890123456789",), "type": "git", "directory": hash_to_bytes("340133423253310030f531e632a733ff37c3a935"), # dir2 "metadata": None, @@ -275,7 +275,7 @@ "offset": 0, "negative_utc": False, }, - "parents": [], + "parents": (), "type": "git", "directory": hash_to_bytes("340133423253310030f531e632a733ff37c3a935"), # dir2 "metadata": None, @@ -305,7 +305,9 @@ "offset": -720, "negative_utc": False, }, - "parents": [hash_to_bytes("7026b7c1a2af56521e951c01ed20f255fa054238")], # revision3 + "parents": ( + hash_to_bytes("7026b7c1a2af56521e951c01ed20f255fa054238"), + ), # revision3 "type": "git", "directory": hash_to_bytes("340133423253310030f531e632a733ff37c3a930"), # dir "metadata": None, diff --git a/swh/storage/tests/test_api_client_dicts.py b/swh/storage/tests/test_api_client_dicts.py deleted file mode 100644 --- a/swh/storage/tests/test_api_client_dicts.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (C) 2015-2020 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from unittest.mock import patch - -import pytest - -from swh.storage.api.client import RemoteStorage -import swh.storage.api.server as server -import swh.storage.storage -from swh.storage.tests.test_storage import TestStorageGeneratedData # noqa -from swh.storage.tests.test_storage import TestStorage as _TestStorage -from swh.storage.tests.test_api_client import swh_storage # noqa - -# tests are executed using imported classes (TestStorage and -# TestStorageGeneratedData) using overloaded swh_storage fixture -# below - - -@pytest.fixture -def app_server(): - storage_config = { - "cls": "validate", - "storage": {"cls": "memory", "journal_writer": {"cls": "memory",},}, - } - server.storage = swh.storage.get_storage(**storage_config) - yield server - - -@pytest.fixture -def app(app_server): - return app_server.app - - -@pytest.fixture -def swh_rpc_client_class(): - return RemoteStorage - - -class TestStorage(_TestStorage): - def test_content_update(self, swh_storage, app_server): # noqa - # TODO, journal_writer not supported - swh_storage.journal_writer.journal = None - with patch.object(server.storage.journal_writer, "journal", None): - super().test_content_update(swh_storage) - - @pytest.mark.skip("non-applicable test") - def test_content_add_from_lazy_content(self): - pass diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py --- a/swh/storage/tests/test_replay.py +++ b/swh/storage/tests/test_replay.py @@ -25,7 +25,7 @@ from swh.journal.client import JournalClient from swh.journal.tests.utils import MockedJournalClient, MockedKafkaWriter -from swh.journal.tests.conftest import ( +from swh.journal.tests.journal_data import ( TEST_OBJECT_DICTS, DUPLICATE_CONTENTS, ) @@ -62,7 +62,6 @@ # Fill Kafka nb_sent = 0 - nb_visits = 0 for object_type, objects in TEST_OBJECT_DICTS.items(): topic = f"{kafka_prefix}.{object_type}" for object_ in objects: @@ -70,9 +69,6 @@ object_ = object_.copy() if object_type == "content": object_["ctime"] = now - elif object_type == "origin_visit": - nb_visits += 1 - object_["visit"] = nb_visits producer.produce( topic=topic, key=key_to_kafka(key), value=value_to_kafka(object_), ) @@ -116,8 +112,6 @@ if visit["origin"] == origin["url"] ] actual_visits = list(storage.origin_visit_get(origin_url)) - for visit in actual_visits: - del visit["visit"] # opaque identifier assert expected_visits == actual_visits input_contents = TEST_OBJECT_DICTS["content"] @@ -161,7 +155,6 @@ # Fill Kafka nb_sent = 0 - nb_visits = 0 for object_type, objects in TEST_OBJECT_DICTS.items(): topic = f"{kafka_prefix}.{object_type}" for object_ in objects: @@ -169,9 +162,6 @@ object_ = object_.copy() if object_type == "content": object_["ctime"] = now - elif object_type == "origin_visit": - nb_visits += 1 - object_["visit"] = nb_visits producer.produce( topic=topic, key=key_to_kafka(key), value=value_to_kafka(object_), ) @@ -225,8 +215,6 @@ if visit["origin"] == origin["url"] ] actual_visits = list(storage.origin_visit_get(origin_url)) - for visit in actual_visits: - del visit["visit"] # opaque identifier assert expected_visits == actual_visits input_contents = TEST_OBJECT_DICTS["content"] diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -366,7 +366,7 @@ results = swh_storage.content_get_metadata([cont["sha1"]]) del cont["data"] - assert results == {cont["sha1"]: [cont]} + assert tuple(results[cont["sha1"]]) == (cont,) def test_content_add_metadata(self, swh_storage): cont = data.cont @@ -380,9 +380,9 @@ expected_cont = cont.copy() del expected_cont["ctime"] - assert swh_storage.content_get_metadata([cont["sha1"]]) == { - cont["sha1"]: [expected_cont] - } + assert tuple( + swh_storage.content_get_metadata([cont["sha1"]])[cont["sha1"]] + ) == (expected_cont,) contents = [ obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects @@ -684,8 +684,8 @@ cont1.pop("data") cont2.pop("data") - assert actual_md[cont1["sha1"]] == [cont1] - assert actual_md[cont2["sha1"]] == [cont2] + assert tuple(actual_md[cont1["sha1"]]) == (cont1,) + assert tuple(actual_md[cont2["sha1"]]) == (cont2,) assert len(actual_md.keys()) == 2 def test_content_get_metadata_missing_sha1(self, swh_storage): @@ -697,7 +697,8 @@ actual_contents = swh_storage.content_get_metadata([missing_cont["sha1"]]) - assert actual_contents == {missing_cont["sha1"]: []} + assert len(actual_contents) == 1 + assert tuple(actual_contents[missing_cont["sha1"]]) == () def test_content_get_random(self, swh_storage): swh_storage.content_add([data.cont, data.cont2, data.cont3]) @@ -1109,7 +1110,7 @@ get = list(swh_storage.revision_get([data.revision3["id"]])) assert len(get) == 1 - assert get[0]["parents"] == [] # no parents on this one + assert get[0]["parents"] == () # no parents on this one def test_revision_get_random(self, swh_storage): swh_storage.revision_add([data.revision, data.revision2, data.revision3]) diff --git a/swh/storage/validate.py b/swh/storage/validate.py --- a/swh/storage/validate.py +++ b/swh/storage/validate.py @@ -5,7 +5,7 @@ import datetime import contextlib -from typing import Dict, Iterable, Optional, List +from typing import Dict, Iterable, Iterator, List, Optional, Tuple from swh.model.model import ( BaseModel, @@ -79,6 +79,29 @@ revisions = [Revision.from_dict(r) for r in revisions] return self.storage.revision_add(revisions) + def revision_get(self, revisions: Iterable[bytes]) -> Iterator[Optional[Dict]]: + rev_dicts = self.storage.revision_get(revisions) + with convert_validation_exceptions(): + for rev_dict in rev_dicts: + if rev_dict is None: + yield None + else: + yield Revision.from_dict(rev_dict).to_dict() + + def revision_log( + self, revisions: Iterable[bytes], limit: Optional[int] = None + ) -> Iterator[Dict]: + for rev_dict in self.storage.revision_log(revisions, limit): + with convert_validation_exceptions(): + rev_obj = Revision.from_dict(rev_dict) + yield rev_obj.to_dict() + + def revision_shortlog( + self, revisions: Iterable[bytes], limit: Optional[int] = None + ) -> Iterator[Tuple[bytes, Tuple]]: + for rev, parents in self.storage.revision_shortlog(revisions, limit): + yield (rev, tuple(parents)) + def release_add(self, releases: Iterable[Dict]) -> Dict: with convert_validation_exceptions(): releases = [Release.from_dict(r) for r in releases]