Page MenuHomeSoftware Heritage

D8114.diff
No OneTemporary

D8114.diff

diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py
--- a/swh/web/api/utils.py
+++ b/swh/web/api/utils.py
@@ -7,6 +7,7 @@
from django.http import HttpRequest
+from swh.model.model import Origin
from swh.web.common.query import parse_hash
from swh.web.common.typing import OriginInfo
from swh.web.common.utils import resolve_branch_alias, reverse
@@ -283,7 +284,7 @@
request: Absolute URIs will be generated if provided
Returns:
- An enriched origin dict filled with an additional url
+ An enriched origin dict filled with additional urls
"""
origin_dict = dict(origin)
if "url" in origin_dict:
@@ -292,6 +293,11 @@
url_args={"origin_url": origin_dict["url"]},
request=request,
)
+ origin_dict["metadata_authorities_url"] = reverse(
+ "api-1-raw-extrinsic-metadata-swhid-authorities",
+ url_args={"target": Origin(url=origin_dict["url"]).swhid()},
+ request=request,
+ )
return origin_dict
diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
--- a/swh/web/api/views/metadata.py
+++ b/swh/web/api/views/metadata.py
@@ -44,6 +44,8 @@
{common_headers}
:>jsonarr string target: SWHID of the object described by this metadata
+ (absent when ``target`` is not a core SWHID (ie. it does not have type
+ ``cnt``/``dir``/``rev``/``rel``/``snp``)
:>jsonarr string discovery_date: ISO8601/RFC3339 timestamp of the moment this
metadata was collected.
:>jsonarr object authority: authority this metadata is coming from
@@ -108,9 +110,17 @@
limit = min(limit, 10000)
try:
- parsed_target = swhids.CoreSWHID.from_string(target).to_extended()
+ parsed_target = swhids.ExtendedSWHID.from_string(target)
except swhids.ValidationError as e:
- raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None
+ raise BadInputExc(f"Invalid target SWHID: {e}") from None
+
+ try:
+ swhids.CoreSWHID.from_string(target)
+ except swhids.ValidationError:
+ # Can be parsed as an extended SWHID, but not as a core SWHID
+ extended_swhid = True
+ else:
+ extended_swhid = False
if page_token_str is not None:
page_token = base64.urlsafe_b64decode(page_token_str)
@@ -125,17 +135,32 @@
limit=limit,
)
+ filename = None
+ if parsed_target.object_type == swhids.ExtendedObjectType.ORIGIN:
+ origin_sha1 = hashutil.hash_to_hex(parsed_target.object_id)
+ (origin_info,) = list(archive.lookup_origins_by_sha1s([origin_sha1]))
+ if origin_info is not None:
+ filename = re.sub("[:/_.]+", "_", origin_info["url"]) + "_metadata"
+ if filename is None:
+ filename = f"{target}_metadata"
+
results = []
for metadata in result_page.results:
result = converters.from_raw_extrinsic_metadata(metadata)
+ if extended_swhid:
+ # Keep extended SWHIDs away from the public API as much as possible.
+ # (It is part of the URL, but not documented, and only accessed via
+ # the link in the response of api-1-origin)
+ del result["target"]
+
# We can't reliably send metadata directly, because it is a bytestring,
# and we have to return JSON documents.
result["metadata_url"] = reverse(
"api-1-raw-extrinsic-metadata-get",
url_args={"id": hashutil.hash_to_hex(metadata.id)},
- query_params={"filename": f"{target}_metadata"},
+ query_params={"filename": filename},
request=request,
)
@@ -208,6 +233,13 @@
They can then be used to get the raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on
that object from each of the authorities.
+ This endpoint should only be used directly to retrieve metadata from
+ core SWHIDs (with type ``cnt``, ``dir``, ``rev``, ``rel``, and ``snp``).
+ For "extended" SWHIDs such as origins, the URL in the
+ ``origin_metadata_authorities_url`` field of
+ :http:get:`/api/1/origin/(origin_url)/get/` should be used instead of building
+ this URL directly.
+
:param string target: The core SWHID of the object whose metadata-providing
authorities should be returned
@@ -228,9 +260,9 @@
""" # noqa
try:
- parsed_target = swhids.CoreSWHID.from_string(target).to_extended()
+ parsed_target = swhids.ExtendedSWHID.from_string(target)
except swhids.ValidationError as e:
- raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None
+ raise BadInputExc(f"Invalid target SWHID: {e}") from None
authorities = archive.storage.raw_extrinsic_metadata_get_authorities(
target=parsed_target
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -45,6 +45,10 @@
:>json string status: status of the visit (either **full**,
**partial** or **ongoing**)
:>json number visit: the unique identifier of the visit
+ :>json string metadata_authorities_url: link to
+ :http:get:`/api/1/raw-extrinsic-metadata/swhid/(target)/authorities/`
+ to get the list of metadata authorities providing extrinsic metadata
+ on this origin (and, indirectly, to the origin's extrinsic metadata itself)
"""
DOC_RETURN_ORIGIN_VISIT_ARRAY = DOC_RETURN_ORIGIN_VISIT.replace(":>json", ":>jsonarr")
diff --git a/swh/web/tests/api/test_utils.py b/swh/web/tests/api/test_utils.py
--- a/swh/web/tests/api/test_utils.py
+++ b/swh/web/tests/api/test_utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2021 The Software Heritage developers
+# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,6 +6,7 @@
import random
from swh.model.hashutil import DEFAULT_ALGORITHMS
+from swh.model.model import Origin
from swh.web.api import utils
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import resolve_branch_alias, reverse
@@ -533,6 +534,11 @@
origin_data["origin_visits_url"] = reverse(
"api-1-origin-visits", url_args={"origin_url": origin["url"]}, request=request
)
+ origin_data["metadata_authorities_url"] = reverse(
+ "api-1-raw-extrinsic-metadata-swhid-authorities",
+ url_args={"target": Origin(url=origin["url"]).swhid()},
+ request=request,
+ )
assert actual_origin == origin_data
@@ -544,6 +550,11 @@
origin_visits_url = reverse(
"api-1-origin-visits", url_args={"origin_url": origin["url"]}, request=request
)
+ metadata_authorities_url = reverse(
+ "api-1-raw-extrinsic-metadata-swhid-authorities",
+ url_args={"target": Origin(url=origin["url"]).swhid()},
+ request=request,
+ )
origin_search_result_data = (
[{"url": origin["url"]}],
@@ -551,7 +562,13 @@
)
enriched_origin_search_result = (
- [{"url": origin["url"], "origin_visits_url": origin_visits_url}],
+ [
+ {
+ "url": origin["url"],
+ "origin_visits_url": origin_visits_url,
+ "metadata_authorities_url": metadata_authorities_url,
+ }
+ ],
None,
)
diff --git a/swh/web/tests/api/views/test_metadata.py b/swh/web/tests/api/views/test_metadata.py
--- a/swh/web/tests/api/views/test_metadata.py
+++ b/swh/web/tests/api/views/test_metadata.py
@@ -1,28 +1,21 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import attr
-from hypothesis import given
-from hypothesis.strategies import composite, sampled_from, sets
+from hypothesis import given, settings
+from hypothesis.strategies import sets
import pytest
-from swh.model.hypothesis_strategies import raw_extrinsic_metadata, sha1_git
-from swh.model.swhids import CoreSWHID, ObjectType
+from swh.model.hypothesis_strategies import raw_extrinsic_metadata
+from swh.model.model import Origin
from swh.web.common.utils import reverse
from swh.web.tests.api.views.utils import scroll_results
from swh.web.tests.utils import check_api_get_responses, check_http_get_response
-@composite
-def core_swhids(draw):
- object_type = draw(sampled_from(ObjectType))
- object_id = draw(sha1_git())
- return CoreSWHID(object_type=object_type, object_id=object_id).to_extended()
-
-
-@given(raw_extrinsic_metadata(target=core_swhids()))
+@given(raw_extrinsic_metadata())
def test_api_raw_extrinsic_metadata(api_client, subtest, metadata):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@@ -50,6 +43,9 @@
expected_result["discovery_date"] = expected_result[
"discovery_date"
].isoformat()
+ if expected_result["target"].startswith(("swh:1:ori:", "swh:1:emd:")):
+ # non-core SWHID are hidden from the API
+ del expected_result["target"]
assert rv.data == [expected_result]
rv = check_http_get_response(api_client, metadata_url, status_code=200)
@@ -61,8 +57,43 @@
assert rv.content == metadata.metadata
+@settings(max_examples=1)
+@given(raw_extrinsic_metadata())
+def test_api_raw_extrinsic_metadata_origin_filename(api_client, subtest, metadata):
+ # ensure archive_data fixture will be reset between each hypothesis
+ # example test run
+ @subtest
+ def test_inner(archive_data):
+ nonlocal metadata
+ origin = Origin(url="http://example.com/repo.git")
+ metadata = attr.evolve(metadata, target=origin.swhid())
+ metadata = attr.evolve(metadata, id=metadata.compute_hash())
+ archive_data.origin_add([origin])
+ archive_data.metadata_authority_add([metadata.authority])
+ archive_data.metadata_fetcher_add([metadata.fetcher])
+ archive_data.raw_extrinsic_metadata_add([metadata])
+
+ authority = metadata.authority
+ url = reverse(
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args={"target": str(metadata.target)},
+ query_params={"authority": f"{authority.type.value} {authority.url}"},
+ )
+ rv = check_api_get_responses(api_client, url, status_code=200)
+
+ assert len(rv.data) == 1
+ metadata_url = rv.data[0]["metadata_url"]
+ rv = check_http_get_response(api_client, metadata_url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert (
+ rv["Content-Disposition"]
+ == 'attachment; filename="http_example_com_repo_git_metadata"'
+ )
+ assert rv.content == metadata.metadata
+
+
@pytest.mark.parametrize("limit", [1, 2, 10, 100])
-@given(sets(raw_extrinsic_metadata(target=core_swhids()), min_size=1))
+@given(sets(raw_extrinsic_metadata(), min_size=1))
def test_api_raw_extrinsic_metadata_scroll(api_client, subtest, limit, meta):
# ensure archive_data fixture will be reset between each hypothesis
# example test run
@@ -101,12 +132,15 @@
expected_result["discovery_date"] = expected_result[
"discovery_date"
].isoformat()
+ if expected_result["target"].startswith(("swh:1:ori:", "swh:1:emd:")):
+ # non-core SWHID are hidden from the API
+ del expected_result["target"]
assert len(results) == len(expected_results)
for result in results:
del result["metadata_url"]
- assert result in expected_results
+ assert result in expected_results, str(expected_results)
_swhid = "swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307"
@@ -168,7 +202,7 @@
check_api_get_responses(api_client, url, status_code=status_code)
-@given(raw_extrinsic_metadata(target=core_swhids()))
+@given(raw_extrinsic_metadata())
def test_api_raw_extrinsic_metadata_list_authorities(api_client, subtest, metadata):
# ensure archive_data fixture will be reset between each hypothesis
# example test run

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 12:28 PM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215818

Event Timeline