Page MenuHomeSoftware Heritage

D6081.diff
No OneTemporary

D6081.diff

diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
--- a/swh/web/api/views/metadata.py
+++ b/swh/web/api/views/metadata.py
@@ -3,7 +3,6 @@
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import base64
import re
import iso8601
@@ -76,7 +75,7 @@
authority_str: str = request.query_params.get("authority")
after_str: str = request.query_params.get("after")
limit_str: str = request.query_params.get("limit", "100")
- page_token_str: str = request.query_params.get("page_token")
+ page_token: str = request.query_params.get("page_token")
if not authority_str:
raise BadInputExc("The 'authority' query parameter is required.")
@@ -110,12 +109,7 @@
try:
target = identifiers.CoreSWHID.from_string(target).to_extended()
except identifiers.ValidationError as e:
- raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None
-
- if page_token_str:
- page_token = base64.urlsafe_b64decode(page_token_str)
- else:
- page_token = None
+ raise BadInputExc(f"Invalid target SWHID: {e.args[2]['args'][0]}") from None
result_page = archive.storage.raw_extrinsic_metadata_get(
target=target,
@@ -145,14 +139,16 @@
"results": results,
"headers": {},
}
+ assert page_token is None or result_page.next_page_token != page_token
if result_page.next_page_token is not None:
response["headers"]["link-next"] = reverse(
- "api-1-raw-extrinsic-metadata",
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args={"target": target},
query_params=dict(
authority=authority_str,
after=after_str,
limit=limit_str,
- page_token=base64.urlsafe_b64encode(result_page.next_page_token),
+ page_token=result_page.next_page_token,
),
request=request,
)
diff --git a/swh/web/tests/api/views/test_metadata.py b/swh/web/tests/api/views/test_metadata.py
--- a/swh/web/tests/api/views/test_metadata.py
+++ b/swh/web/tests/api/views/test_metadata.py
@@ -7,19 +7,40 @@
from hypothesis import given, strategies
import pytest
-from swh.model.hypothesis_strategies import raw_extrinsic_metadata
+from swh.model.hypothesis_strategies import raw_extrinsic_metadata, sha1
+from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID
from swh.web.common.utils import reverse
from swh.web.tests.api.views.utils import scroll_results
from swh.web.tests.utils import check_api_get_responses, check_http_get_response
-@given(raw_extrinsic_metadata())
+def unique_authority(archive_data, authority):
+ # FIXME: this is awful, but we need a unique authority for this test...
+ while archive_data.metadata_authority_get(type=authority.type, url=authority.url):
+ authority = attr.evolve(authority, url=authority.url + "a")
+
+ return authority
+
+
+def queryable_remd():
+ """Some REMD objects are not queryable at the moment, because they depend on
+ Extended SWHIDs, which are private. This strategy excludes these objects."""
+ return raw_extrinsic_metadata().filter(
+ lambda m: m.target.object_type
+ not in (ExtendedObjectType.ORIGIN, ExtendedObjectType.RAW_EXTRINSIC_METADATA)
+ )
+
+
+@given(queryable_remd())
def test_api_raw_extrinsic_metadata(api_client, archive_data, metadata):
- archive_data.metadata_authority_add([metadata.authority])
+ authority = unique_authority(archive_data, metadata.authority)
+ metadata = attr.evolve(metadata, authority=authority)
+ metadata = attr.evolve(metadata, id=metadata.compute_hash())
+
+ archive_data.metadata_authority_add([authority])
archive_data.metadata_fetcher_add([metadata.fetcher])
archive_data.raw_extrinsic_metadata_add([metadata])
- authority = metadata.authority
url = reverse(
"api-1-raw-extrinsic-metadata-swhid",
url_args={"target": str(metadata.target)},
@@ -47,29 +68,40 @@
@pytest.mark.parametrize("limit", [1, 2, 10, 100])
-@given(strategies.sets(raw_extrinsic_metadata(), min_size=1))
-def test_api_raw_extrinsic_metadata_scroll(api_client, archive_data, limit, metadata):
+@given(strategies.sets(queryable_remd(), min_size=1), sha1())
+def test_api_raw_extrinsic_metadata_scroll(
+ api_client, archive_data, limit, metadata, target_sha1
+):
+ target_swhid = ExtendedSWHID(
+ object_type=ExtendedObjectType.CONTENT, object_id=target_sha1
+ )
# Make all metadata objects use the same authority and target
metadata0 = next(iter(metadata))
- metadata = {
- attr.evolve(m, authority=metadata0.authority, target=metadata0.target)
- for m in metadata
- }
- authority = metadata0.authority
+ authority = unique_authority(archive_data, metadata0.authority)
+
+ metadata = [
+ attr.evolve(m, target=target_swhid, authority=authority) for m in metadata
+ ]
+
+ # recompute hashes
+ metadata = {attr.evolve(m, id=m.compute_hash()) for m in metadata}
archive_data.metadata_authority_add([authority])
archive_data.metadata_fetcher_add(list({m.fetcher for m in metadata}))
- archive_data.raw_extrinsic_metadata_add(metadata)
url = reverse(
"api-1-raw-extrinsic-metadata-swhid",
- url_args={"target": str(metadata0.target)},
+ url_args={"target": str(target_swhid)},
query_params={
"authority": f"{authority.type.value} {authority.url}",
"limit": limit,
},
)
+ assert scroll_results(api_client, url) == [], "results before insertion in DB?!"
+
+ archive_data.raw_extrinsic_metadata_add(metadata)
+
results = scroll_results(api_client, url)
expected_results = [m.to_dict() for m in metadata]
@@ -83,7 +115,9 @@
for result in results:
del result["metadata_url"]
- assert results == expected_results
+ assert len(results) == len(expected_results), results
+ for result in results:
+ assert result in expected_results
_swhid = "swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307"
@@ -139,32 +173,34 @@
check_api_get_responses(api_client, url, status_code=status_code)
-@given(raw_extrinsic_metadata())
+@given(queryable_remd())
def test_api_raw_extrinsic_metadata_list_authorities(
api_client, archive_data, metadata
):
- archive_data.metadata_authority_add([metadata.authority])
+ authority = unique_authority(archive_data, metadata.authority)
+ metadata = attr.evolve(metadata, authority=authority)
+ metadata = attr.evolve(metadata, id=metadata.compute_hash())
+
+ archive_data.metadata_authority_add([authority])
archive_data.metadata_fetcher_add([metadata.fetcher])
archive_data.raw_extrinsic_metadata_add([metadata])
- authority = metadata.authority
url = reverse(
"api-1-raw-extrinsic-metadata-swhid-authorities",
url_args={"target": str(metadata.target)},
)
rv = check_api_get_responses(api_client, url, status_code=200)
- expected_results = [
- {
- "type": authority.type.value,
- "url": authority.url,
- "metadata_list_url": "http://testserver"
- + reverse(
- "api-1-raw-extrinsic-metadata-swhid",
- url_args={"target": str(metadata.target)},
- query_params={"authority": f"{authority.type.value} {authority.url}"},
- ),
- }
- ]
+ expected_result = {
+ "type": authority.type.value,
+ "url": authority.url,
+ "metadata_list_url": "http://testserver"
+ + reverse(
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args={"target": str(metadata.target)},
+ query_params={"authority": f"{authority.type.value} {authority.url}"},
+ ),
+ }
- assert rv.data == expected_results
+ # can't use list equality, as other tests added their own authorities.
+ assert expected_result in rv.data
diff --git a/swh/web/tests/api/views/utils.py b/swh/web/tests/api/views/utils.py
--- a/swh/web/tests/api/views/utils.py
+++ b/swh/web/tests/api/views/utils.py
@@ -15,6 +15,8 @@
while True:
rv = check_api_get_responses(api_client, url, status_code=200)
+ for i in rv.data:
+ assert i not in results
results.extend(rv.data)
if "Link" in rv:

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 6:12 PM (11 w, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230933

Event Timeline