diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import base64 import datetime import itertools import json @@ -1072,7 +1073,7 @@ after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: + ) -> PagedResult[RawExtrinsicMetadata]: if object_type == MetadataTargetType.ORIGIN: if isinstance(id, SWHID): raise StorageArgumentException( @@ -1088,7 +1089,7 @@ if page_token is not None: (after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads( - page_token + base64.b64decode(page_token) ) if after and after_date < after: raise StorageArgumentException( @@ -1148,20 +1149,19 @@ results.pop() assert len(results) == limit last_result = results[-1] - next_page_token: Optional[bytes] = msgpack_dumps( - ( - last_result.discovery_date, - last_result.fetcher.name, - last_result.fetcher.version, + next_page_token: Optional[str] = base64.b64encode( + msgpack_dumps( + ( + last_result.discovery_date, + last_result.fetcher.name, + last_result.fetcher.version, + ) ) - ) + ).decode() else: next_page_token = None - return { - "next_page_token": next_page_token, - "results": results, - } + return PagedResult(next_page_token=next_page_token, results=results,) def metadata_fetcher_add(self, fetchers: List[MetadataFetcher]) -> None: self.journal_writer.metadata_fetcher_add(fetchers) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -3,13 +3,14 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import re +import base64 import bisect import collections import copy import datetime import itertools import random +import re from collections import defaultdict from datetime import timedelta @@ -1114,7 +1115,7 @@ after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: + ) -> PagedResult[RawExtrinsicMetadata]: authority_key = self._metadata_authority_key(authority) if object_type == MetadataTargetType.ORIGIN: @@ -1131,7 +1132,7 @@ ) if page_token is not None: - (after_time, after_fetcher) = msgpack_loads(page_token) + (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) after_fetcher = tuple(after_fetcher) if after is not None and after > after_time: raise StorageArgumentException( @@ -1173,19 +1174,18 @@ results.pop() assert len(results) == limit last_result = results[-1] - next_page_token: Optional[bytes] = msgpack_dumps( - ( - last_result.discovery_date, - self._metadata_fetcher_key(last_result.fetcher), + next_page_token: Optional[str] = base64.b64encode( + msgpack_dumps( + ( + last_result.discovery_date, + self._metadata_fetcher_key(last_result.fetcher), + ) ) - ) + ).decode() else: next_page_token = None - return { - "next_page_token": next_page_token, - "results": results, - } + return PagedResult(next_page_token=next_page_token, results=results,) def metadata_fetcher_add(self, fetchers: List[MetadataFetcher]) -> None: self.journal_writer.metadata_fetcher_add(fetchers) diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -1143,7 +1143,7 @@ after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: + ) -> PagedResult[RawExtrinsicMetadata]: """Retrieve list of all raw_extrinsic_metadata entries for the id Args: @@ -1155,10 +1155,7 @@ limit: maximum number of results to be returned Returns: - dict with keys `next_page_token` and `results`. - `next_page_token` is an opaque token that is used to get the - next page of results, or `None` if there are no more results. - `results` is a list of RawExtrinsicMetadata objects: + PagedResult of RawExtrinsicMetadata """ ... diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import base64 import contextlib import datetime import itertools @@ -1234,7 +1235,7 @@ limit: int = 1000, db=None, cur=None, - ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: + ) -> PagedResult[RawExtrinsicMetadata]: if object_type == MetadataTargetType.ORIGIN: if isinstance(id, SWHID): raise StorageArgumentException( @@ -1249,7 +1250,7 @@ ) if page_token: - (after_time, after_fetcher) = msgpack_loads(page_token) + (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) if after and after_time < after: raise StorageArgumentException( "page_token is inconsistent with the value of 'after'." @@ -1260,10 +1261,7 @@ authority_id = self._get_authority_id(authority, db, cur) if not authority_id: - return { - "next_page_token": None, - "results": [], - } + return PagedResult(next_page_token=None, results=[],) rows = db.raw_extrinsic_metadata_get( object_type, @@ -1284,19 +1282,18 @@ results.pop() assert len(results) == limit last_returned_row = rows[-2] # rows[-1] corresponds to the popped result - next_page_token: Optional[bytes] = msgpack_dumps( - ( - last_returned_row["discovery_date"], - last_returned_row["metadata_fetcher.id"], + next_page_token: Optional[str] = base64.b64encode( + msgpack_dumps( + ( + last_returned_row["discovery_date"], + last_returned_row["metadata_fetcher.id"], + ) ) - ) + ).decode() else: next_page_token = None - return { - "next_page_token": next_page_token, - "results": results, - } + return PagedResult(next_page_token=next_page_token, results=results,) @timed @db_transaction() diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -485,8 +485,8 @@ origin_metadata = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority ) - assert origin_metadata["next_page_token"] is None - assert not origin_metadata["results"] + assert origin_metadata.next_page_token is None + assert not origin_metadata.results swh_storage.raw_extrinsic_metadata_add([ori_meta]) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -3350,8 +3350,8 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority ) - assert result["next_page_token"] is None - assert list(sorted(result["results"], key=lambda x: x.discovery_date,)) == list( + assert result.next_page_token is None + assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( content_metadata ) @@ -3387,12 +3387,12 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority ) - assert result["next_page_token"] is None + assert result.next_page_token is None expected_results1 = (content_metadata, new_content_metadata2) expected_results2 = (content_metadata, content_metadata2) - assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( + assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( expected_results1, # cassandra expected_results2, # postgresql ) @@ -3426,24 +3426,24 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content1_swhid, authority ) - assert result["next_page_token"] is None + assert result.next_page_token is None assert [content1_metadata1, content1_metadata2] == list( - sorted(result["results"], key=lambda x: x.discovery_date,) + sorted(result.results, key=lambda x: x.discovery_date,) ) result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content1_swhid, authority2 ) - assert result["next_page_token"] is None + assert result.next_page_token is None assert [content1_metadata3] == list( - sorted(result["results"], key=lambda x: x.discovery_date,) + sorted(result.results, key=lambda x: x.discovery_date,) ) result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content2_swhid, authority ) - assert result["next_page_token"] is None - assert [content2_metadata] == list(result["results"],) + assert result.next_page_token is None + assert [content2_metadata] == list(result.results,) def test_content_metadata_get_after(self, swh_storage, sample_data): content = sample_data.content @@ -3464,9 +3464,9 @@ authority, after=content_metadata.discovery_date - timedelta(seconds=1), ) - assert result["next_page_token"] is None + assert result.next_page_token is None assert [content_metadata, content_metadata2] == list( - sorted(result["results"], key=lambda x: x.discovery_date,) + sorted(result.results, key=lambda x: x.discovery_date,) ) result = swh_storage.raw_extrinsic_metadata_get( @@ -3475,8 +3475,8 @@ authority, after=content_metadata.discovery_date, ) - assert result["next_page_token"] is None - assert result["results"] == [content_metadata2] + assert result.next_page_token is None + assert result.results == [content_metadata2] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, @@ -3484,8 +3484,8 @@ authority, after=content_metadata2.discovery_date, ) - assert result["next_page_token"] is None - assert result["results"] == [] + assert result.next_page_token is None + assert result.results == [] def test_content_metadata_get_paginate(self, swh_storage, sample_data): content = sample_data.content @@ -3505,18 +3505,18 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1 ) - assert result["next_page_token"] is not None - assert result["results"] == [content_metadata] + assert result.next_page_token is not None + assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1, - page_token=result["next_page_token"], + page_token=result.next_page_token, ) - assert result["next_page_token"] is None - assert result["results"] == [content_metadata2] + assert result.next_page_token is None + assert result.results == [content_metadata2] def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): content = sample_data.content @@ -3542,18 +3542,18 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1 ) - assert result["next_page_token"] is not None - assert result["results"] == [content_metadata] + assert result.next_page_token is not None + assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1, - page_token=result["next_page_token"], + page_token=result.next_page_token, ) - assert result["next_page_token"] is None - assert result["results"] == [new_content_metadata2] + assert result.next_page_token is None + assert result.results == [new_content_metadata2] def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): origin = sample_data.origin @@ -3586,8 +3586,8 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) - assert result["next_page_token"] is None - assert list(sorted(result["results"], key=lambda x: x.discovery_date)) == [ + assert result.next_page_token is None + assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ origin_metadata, origin_metadata2, ] @@ -3624,13 +3624,13 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) - assert result["next_page_token"] is None + assert result.next_page_token is None # which of the two behavior happens is backend-specific. expected_results1 = (origin_metadata, new_origin_metadata2) expected_results2 = (origin_metadata, origin_metadata2) - assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( + assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( expected_results1, # cassandra expected_results2, # postgresql ) @@ -3659,24 +3659,24 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) - assert result["next_page_token"] is None + assert result.next_page_token is None assert [origin1_metadata1, origin1_metadata2] == list( - sorted(result["results"], key=lambda x: x.discovery_date,) + sorted(result.results, key=lambda x: x.discovery_date,) ) result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority2 ) - assert result["next_page_token"] is None + assert result.next_page_token is None assert [origin1_metadata3] == list( - sorted(result["results"], key=lambda x: x.discovery_date,) + sorted(result.results, key=lambda x: x.discovery_date,) ) result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin2.url, authority ) - assert result["next_page_token"] is None - assert [origin2_metadata] == list(result["results"],) + assert result.next_page_token is None + assert [origin2_metadata] == list(result.results,) def test_origin_metadata_get_after(self, swh_storage, sample_data): origin = sample_data.origin @@ -3696,8 +3696,8 @@ authority, after=origin_metadata.discovery_date - timedelta(seconds=1), ) - assert result["next_page_token"] is None - assert list(sorted(result["results"], key=lambda x: x.discovery_date,)) == [ + assert result.next_page_token is None + assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == [ origin_metadata, origin_metadata2, ] @@ -3708,8 +3708,8 @@ authority, after=origin_metadata.discovery_date, ) - assert result["next_page_token"] is None - assert result["results"] == [origin_metadata2] + assert result.next_page_token is None + assert result.results == [origin_metadata2] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, @@ -3717,8 +3717,8 @@ authority, after=origin_metadata2.discovery_date, ) - assert result["next_page_token"] is None - assert result["results"] == [] + assert result.next_page_token is None + assert result.results == [] def test_origin_metadata_get_paginate(self, swh_storage, sample_data): origin = sample_data.origin @@ -3739,18 +3739,18 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1 ) - assert result["next_page_token"] is not None - assert result["results"] == [origin_metadata] + assert result.next_page_token is not None + assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1, - page_token=result["next_page_token"], + page_token=result.next_page_token, ) - assert result["next_page_token"] is None - assert result["results"] == [origin_metadata2] + assert result.next_page_token is None + assert result.results == [origin_metadata2] def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): origin = sample_data.origin @@ -3773,18 +3773,18 @@ result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1 ) - assert result["next_page_token"] is not None - assert result["results"] == [origin_metadata] + assert result.next_page_token is not None + assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1, - page_token=result["next_page_token"], + page_token=result.next_page_token, ) - assert result["next_page_token"] is None - assert result["results"] == [new_origin_metadata2] + assert result.next_page_token is None + assert result.results == [new_origin_metadata2] def test_origin_metadata_add_missing_authority(self, swh_storage, sample_data): origin = sample_data.origin