diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py --- a/swh/web/api/views/metadata.py +++ b/swh/web/api/views/metadata.py @@ -3,7 +3,6 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import base64 import re import iso8601 @@ -76,7 +75,7 @@ authority_str: str = request.query_params.get("authority") after_str: str = request.query_params.get("after") limit_str: str = request.query_params.get("limit", "100") - page_token_str: str = request.query_params.get("page_token") + page_token: str = request.query_params.get("page_token") if not authority_str: raise BadInputExc("The 'authority' query parameter is required.") @@ -110,12 +109,7 @@ try: target = identifiers.CoreSWHID.from_string(target).to_extended() except identifiers.ValidationError as e: - raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None - - if page_token_str: - page_token = base64.urlsafe_b64decode(page_token_str) - else: - page_token = None + raise BadInputExc(f"Invalid target SWHID: {e.args[2]['args'][0]}") from None result_page = archive.storage.raw_extrinsic_metadata_get( target=target, @@ -145,14 +139,16 @@ "results": results, "headers": {}, } + assert page_token is None or result_page.next_page_token != page_token if result_page.next_page_token is not None: response["headers"]["link-next"] = reverse( - "api-1-raw-extrinsic-metadata", + "api-1-raw-extrinsic-metadata-swhid", + url_args={"target": target}, query_params=dict( authority=authority_str, after=after_str, limit=limit_str, - page_token=base64.urlsafe_b64encode(result_page.next_page_token), + page_token=result_page.next_page_token, ), request=request, ) diff --git a/swh/web/tests/api/views/test_metadata.py b/swh/web/tests/api/views/test_metadata.py --- a/swh/web/tests/api/views/test_metadata.py +++ b/swh/web/tests/api/views/test_metadata.py @@ -7,19 +7,40 @@ from hypothesis import given, strategies import pytest -from swh.model.hypothesis_strategies import raw_extrinsic_metadata +from swh.model.hypothesis_strategies import raw_extrinsic_metadata, sha1 +from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID from swh.web.common.utils import reverse from swh.web.tests.api.views.utils import scroll_results from swh.web.tests.utils import check_api_get_responses, check_http_get_response -@given(raw_extrinsic_metadata()) +def unique_authority(archive_data, authority): + # FIXME: this is awful, but we need a unique authority for this test... + while archive_data.metadata_authority_get(type=authority.type, url=authority.url): + authority = attr.evolve(authority, url=authority.url + "a") + + return authority + + +def queryable_remd(): + """Some REMD objects are not queryable at the moment, because they depend on + Extended SWHIDs, which are private. This strategy excludes these objects.""" + return raw_extrinsic_metadata().filter( + lambda m: m.target.object_type + not in (ExtendedObjectType.ORIGIN, ExtendedObjectType.RAW_EXTRINSIC_METADATA) + ) + + +@given(queryable_remd()) def test_api_raw_extrinsic_metadata(api_client, archive_data, metadata): - archive_data.metadata_authority_add([metadata.authority]) + authority = unique_authority(archive_data, metadata.authority) + metadata = attr.evolve(metadata, authority=authority) + metadata = attr.evolve(metadata, id=metadata.compute_hash()) + + archive_data.metadata_authority_add([authority]) archive_data.metadata_fetcher_add([metadata.fetcher]) archive_data.raw_extrinsic_metadata_add([metadata]) - authority = metadata.authority url = reverse( "api-1-raw-extrinsic-metadata-swhid", url_args={"target": str(metadata.target)}, @@ -47,29 +68,40 @@ @pytest.mark.parametrize("limit", [1, 2, 10, 100]) -@given(strategies.sets(raw_extrinsic_metadata(), min_size=1)) -def test_api_raw_extrinsic_metadata_scroll(api_client, archive_data, limit, metadata): +@given(strategies.sets(queryable_remd(), min_size=1), sha1()) +def test_api_raw_extrinsic_metadata_scroll( + api_client, archive_data, limit, metadata, target_sha1 +): + target_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.CONTENT, object_id=target_sha1 + ) # Make all metadata objects use the same authority and target metadata0 = next(iter(metadata)) - metadata = { - attr.evolve(m, authority=metadata0.authority, target=metadata0.target) - for m in metadata - } - authority = metadata0.authority + authority = unique_authority(archive_data, metadata0.authority) + + metadata = [ + attr.evolve(m, target=target_swhid, authority=authority) for m in metadata + ] + + # recompute hashes + metadata = {attr.evolve(m, id=m.compute_hash()) for m in metadata} archive_data.metadata_authority_add([authority]) archive_data.metadata_fetcher_add(list({m.fetcher for m in metadata})) - archive_data.raw_extrinsic_metadata_add(metadata) url = reverse( "api-1-raw-extrinsic-metadata-swhid", - url_args={"target": str(metadata0.target)}, + url_args={"target": str(target_swhid)}, query_params={ "authority": f"{authority.type.value} {authority.url}", "limit": limit, }, ) + assert scroll_results(api_client, url) == [], "results before insertion in DB?!" + + archive_data.raw_extrinsic_metadata_add(metadata) + results = scroll_results(api_client, url) expected_results = [m.to_dict() for m in metadata] @@ -83,7 +115,9 @@ for result in results: del result["metadata_url"] - assert results == expected_results + assert len(results) == len(expected_results), results + for result in results: + assert result in expected_results _swhid = "swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307" @@ -139,32 +173,34 @@ check_api_get_responses(api_client, url, status_code=status_code) -@given(raw_extrinsic_metadata()) +@given(queryable_remd()) def test_api_raw_extrinsic_metadata_list_authorities( api_client, archive_data, metadata ): - archive_data.metadata_authority_add([metadata.authority]) + authority = unique_authority(archive_data, metadata.authority) + metadata = attr.evolve(metadata, authority=authority) + metadata = attr.evolve(metadata, id=metadata.compute_hash()) + + archive_data.metadata_authority_add([authority]) archive_data.metadata_fetcher_add([metadata.fetcher]) archive_data.raw_extrinsic_metadata_add([metadata]) - authority = metadata.authority url = reverse( "api-1-raw-extrinsic-metadata-swhid-authorities", url_args={"target": str(metadata.target)}, ) rv = check_api_get_responses(api_client, url, status_code=200) - expected_results = [ - { - "type": authority.type.value, - "url": authority.url, - "metadata_list_url": "http://testserver" - + reverse( - "api-1-raw-extrinsic-metadata-swhid", - url_args={"target": str(metadata.target)}, - query_params={"authority": f"{authority.type.value} {authority.url}"}, - ), - } - ] + expected_result = { + "type": authority.type.value, + "url": authority.url, + "metadata_list_url": "http://testserver" + + reverse( + "api-1-raw-extrinsic-metadata-swhid", + url_args={"target": str(metadata.target)}, + query_params={"authority": f"{authority.type.value} {authority.url}"}, + ), + } - assert rv.data == expected_results + # can't use list equality, as other tests added their own authorities. + assert expected_result in rv.data diff --git a/swh/web/tests/api/views/utils.py b/swh/web/tests/api/views/utils.py --- a/swh/web/tests/api/views/utils.py +++ b/swh/web/tests/api/views/utils.py @@ -15,6 +15,8 @@ while True: rv = check_api_get_responses(api_client, url, status_code=200) + for i in rv.data: + assert i not in results results.extend(rv.data) if "Link" in rv: