diff --git a/swh/journal/pytest_plugin.py b/swh/journal/pytest_plugin.py --- a/swh/journal/pytest_plugin.py +++ b/swh/journal/pytest_plugin.py @@ -13,7 +13,7 @@ from confluent_kafka.admin import AdminClient import pytest -from swh.journal.serializers import kafka_to_key, kafka_to_value, object_key, pprint_key +from swh.journal.serializers import kafka_to_key, kafka_to_value, pprint_key from swh.journal.tests.journal_data import TEST_OBJECTS @@ -69,7 +69,7 @@ """ for object_type, known_objects in TEST_OBJECTS.items(): - known_keys = [object_key(object_type, obj) for obj in known_objects] + known_keys = [obj.unique_key() for obj in known_objects] if not consumed_messages[object_type]: return diff --git a/swh/journal/serializers.py b/swh/journal/serializers.py --- a/swh/journal/serializers.py +++ b/swh/journal/serializers.py @@ -3,15 +3,15 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, Union, overload +from typing import Any, Union import msgpack from swh.core.api.serializers import msgpack_dumps, msgpack_loads -from swh.model.hashutil import DEFAULT_ALGORITHMS from swh.model.model import ( Content, Directory, + KeyType, MetadataAuthority, MetadataFetcher, Origin, @@ -39,82 +39,6 @@ Snapshot, ] -KeyType = Union[Dict[str, str], Dict[str, bytes], bytes] - - -# these @overload'ed versions of the object_key method aim at helping mypy figuring -# the correct type-ing. -@overload -def object_key( - object_type: str, object_: Union[Content, Directory, Revision, Release, Snapshot] -) -> bytes: - ... - - -@overload -def object_key( - object_type: str, object_: Union[Origin, SkippedContent] -) -> Dict[str, bytes]: - ... - - -@overload -def object_key( - object_type: str, - object_: Union[ - MetadataAuthority, - MetadataFetcher, - OriginVisit, - OriginVisitStatus, - RawExtrinsicMetadata, - ], -) -> Dict[str, str]: - ... - - -def object_key(object_type: str, object_) -> KeyType: - if object_type in ("revision", "release", "directory", "snapshot"): - return object_.id - elif object_type == "content": - return object_.sha1 # TODO: use a dict of hashes - elif object_type == "skipped_content": - return {hash: getattr(object_, hash) for hash in DEFAULT_ALGORITHMS} - elif object_type == "origin": - return {"url": object_.url} - elif object_type == "origin_visit": - return { - "origin": object_.origin, - "date": str(object_.date), - } - elif object_type == "origin_visit_status": - return { - "origin": object_.origin, - "visit": str(object_.visit), - "date": str(object_.date), - } - elif object_type == "metadata_authority": - return { - "type": object_.type.value, - "url": object_.url, - } - elif object_type == "metadata_fetcher": - return { - "name": object_.name, - "version": object_.version, - } - elif object_type == "raw_extrinsic_metadata": - return { - "type": object_.type.value, - "id": str(object_.id), - "authority_type": object_.authority.type.value, - "authority_url": object_.authority.url, - "discovery_date": str(object_.discovery_date), - "fetcher_name": object_.fetcher.name, - "fetcher_version": object_.fetcher.version, - } - else: - raise ValueError("Unknown object type: %s." % object_type) - def stringify_key_item(k: str, v: Union[str, bytes]) -> str: """Turn the item of a dict key into a string""" diff --git a/swh/journal/tests/test_serializers.py b/swh/journal/tests/test_serializers.py --- a/swh/journal/tests/test_serializers.py +++ b/swh/journal/tests/test_serializers.py @@ -30,18 +30,11 @@ assert key == serializers.key_to_kafka(d) -def test_get_key(): - """Test whether get_key works on all our objects""" - for object_type, objects in TEST_OBJECTS.items(): - for obj in objects: - assert serializers.object_key(object_type, obj) is not None - - def test_pprint_key(): """Test whether get_key works on all our objects""" for object_type, objects in TEST_OBJECTS.items(): for obj in objects: - key = serializers.object_key(object_type, obj) + key = obj.unique_key() pprinted_key = serializers.pprint_key(key) assert isinstance(pprinted_key, str) @@ -66,7 +59,7 @@ ] for object_type, objects in TEST_OBJECTS.items(): for obj in objects: - key = serializers.object_key(object_type, obj) + key = obj.unique_key() keys.append(key) for key in keys: diff --git a/swh/journal/writer/kafka.py b/swh/journal/writer/kafka.py --- a/swh/journal/writer/kafka.py +++ b/swh/journal/writer/kafka.py @@ -13,7 +13,6 @@ KeyType, ModelObject, key_to_kafka, - object_key, pprint_key, value_to_kafka, ) @@ -206,7 +205,7 @@ def _write_addition(self, object_type: str, object_: ModelObject) -> None: """Write a single object to the journal""" - key = object_key(object_type, object_) + key = object_.unique_key() if self.anonymize: anon_object_ = object_.anonymize()