Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123174
D5875.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D5875.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,8 +2,8 @@
swh.core >= 0.0.95
swh.counters >= 0.5.1
swh.indexer >= 0.4.1
-swh.model >= 0.5.0
+swh.model >= 2.6.3
swh.scheduler >= 0.7.0
swh.search >= 0.2.0
-swh.storage >= 0.11.10
+swh.storage >= 0.31.0
swh.vault >= 0.0.33
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -8,6 +8,7 @@
import swh.web.api.views.directory # noqa
import swh.web.api.views.graph # noqa
import swh.web.api.views.identifiers # noqa
+import swh.web.api.views.metadata # noqa
import swh.web.api.views.origin # noqa
import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/web/api/views/metadata.py
@@ -0,0 +1,177 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import base64
+
+import iso8601
+
+from django.http import HttpResponse
+
+from swh.model import hashutil, identifiers
+from swh.model.model import MetadataAuthority, MetadataAuthorityType
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive, converters
+from swh.web.common.exc import BadInputExc, NotFoundExc
+from swh.web.common.utils import reverse
+
+SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+
+
+@api_route(
+ f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/",
+ "api-1-raw-extrinsic-metadata-swhid",
+)
+@api_doc("/raw-extrinsic-metadata/swhid/")
+@format_docstring()
+def api_raw_extrinsic_metadata_swhid(request, target):
+ """
+ .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)
+
+ Returns raw `extrinsic metadata`_ collected on a given object.
+
+ .. _extrinsic metadata: https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata
+
+ :param string target: The SWHID of the object whose metadata should be returned
+ :query string authority: A metadata authority identifier, formatted as
+ `<type> <IRI>`. Required.
+ :query string after: An ISO representation of the minimum timestamp of metadata
+ to fetch. Defaults to allowing all metadata.
+ :query int limit: Maximum number of metadata objects to return.
+
+ {common_headers}
+
+ :>jsonarr string target: SWHID of the object described by this metadata
+ :>jsonarr string discovery_date: ISO8601 timestamp of the moment this
+ metadata was collected.
+ :>jsonarr object authority: authority this metadata is coming from
+ :>jsonarr object fetcher: tool used to fetch the metadata
+ :>jsonarr string format: short identifier of the format of the metadata
+ :>jsonarr string metadata_url: link to download the metadata "blob" itself
+ :>jsonarr string origin: URL of the origin in which context's
+ the metadata is valid, if any
+ :>jsonarr int visit: identifier of the visit in which context's
+ the metadata is valid, if any
+ :>jsonarr string snapshot: SWHID of the snapshot in which context's
+ the metadata is valid, if any
+ :>jsonarr string release: SWHID of the release in which context's
+ the metadata is valid, if any
+ :>jsonarr string revision: SWHID of the revision in which context's
+ the metadata is valid, if any
+ :>jsonarr string path: SWHID of the path in which context's
+ is valid if any, relative to a release or revision as anchor
+ :>jsonarr string directory: SWHID of the directory in which context's
+ the metadata is valid, if any
+
+ :statuscode 200: no error
+
+ **Example:**
+
+ .. parsed-literal::
+
+ :swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/?authority=forge%20https://pypi.org/`
+ """ # noqa
+ authority_str: str = request.query_params.get("authority")
+ after_str: str = request.query_params.get("after")
+ limit_str: str = request.query_params.get("limit", "100")
+ page_token_str: str = request.query_params.get("page_token")
+
+ if not authority_str:
+ raise BadInputExc("The 'authority' query parameter is required.")
+ if " " not in authority_str.strip():
+ raise BadInputExc("The 'authority' query parameter should contain a space.")
+
+ (authority_type_str, authority_url) = authority_str.split(" ", 1)
+ try:
+ authority_type = MetadataAuthorityType(authority_type_str)
+ except ValueError:
+ raise BadInputExc(
+ f"Invalid 'authority' type, should be one of: "
+ f"{', '.join(member.value for member in MetadataAuthorityType)}"
+ )
+ authority = MetadataAuthority(authority_type, authority_url)
+
+ if after_str:
+ try:
+ after = iso8601.parse_date(after_str)
+ except iso8601.ParseError:
+ raise BadInputExc("Invalid format for 'after' parameter.") from None
+ else:
+ after = None
+
+ try:
+ limit = int(limit_str)
+ except ValueError:
+ raise BadInputExc("'limit' parameter must be an integer.") from None
+ limit = min(limit, 10000)
+
+ try:
+ target = identifiers.CoreSWHID.from_string(target).to_extended()
+ except identifiers.ValidationError as e:
+ raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None
+
+ if page_token_str:
+ page_token = base64.urlsafe_b64decode(page_token_str)
+ else:
+ page_token = None
+
+ result_page = archive.storage.raw_extrinsic_metadata_get(
+ target=target,
+ authority=authority,
+ after=after,
+ page_token=page_token,
+ limit=limit,
+ )
+
+ results = []
+
+ for metadata in result_page.results:
+ result = converters.from_raw_extrinsic_metadata(metadata)
+
+ # We can't reliably send metadata directly, because it is a bytestring,
+ # and we have to return JSON documents.
+ result["metadata_url"] = reverse(
+ "api-1-raw-extrinsic-metadata-get",
+ url_args={"id": hashutil.hash_to_hex(metadata.id)},
+ request=request,
+ )
+
+ results.append(result)
+
+ response = {
+ "results": results,
+ "headers": {},
+ }
+ if result_page.next_page_token is not None:
+ response["headers"]["link-next"] = reverse(
+ "api-1-raw-extrinsic-metadata",
+ query_params=dict(
+ authority=authority_str,
+ after=after_str,
+ limit=limit_str,
+ page_token=base64.urlsafe_b64encode(result_page.next_page_token),
+ ),
+ request=request,
+ )
+
+ return response
+
+
+@api_route(
+ "/raw-extrinsic-metadata/get/(?P<id>[0-9a-z]+)/",
+ "api-1-raw-extrinsic-metadata-get",
+)
+def api_raw_extrinsic_metadata_get(request, id):
+ # This is an internal endpoint that should only be accessed via URLs given
+ # by /raw-extrinsic-metadata/swhid/; so it is not documented.
+ metadata = archive.storage.raw_extrinsic_metadata_get_by_ids(
+ [hashutil.hash_to_bytes(id)]
+ )
+ if not metadata:
+ raise NotFoundExc(
+ "Metadata not found. Use /raw-extrinsic-metadata/swhid/ to access metadata."
+ )
+
+ return HttpResponse(metadata[0].metadata, content_type="application/octet-stream")
diff --git a/swh/web/common/converters.py b/swh/web/common/converters.py
--- a/swh/web/common/converters.py
+++ b/swh/web/common/converters.py
@@ -9,7 +9,7 @@
from swh.core.utils import decode_with_escape
from swh.model import hashutil
-from swh.model.model import Release, Revision
+from swh.model.model import RawExtrinsicMetadata, Release, Revision
from swh.storage.interface import PartialBranches
from swh.web.common.typing import OriginInfo, OriginVisitInfo
@@ -246,11 +246,11 @@
return json.JSONEncoder.default(self, obj)
-def convert_revision_metadata(metadata):
+def convert_metadata(metadata):
"""Convert json specific dict to a json serializable one.
"""
- if not metadata:
+ if metadata is None:
return {}
return json.loads(json.dumps(metadata, cls=SWHMetadataEncoder))
@@ -281,7 +281,7 @@
hashess={"id", "directory", "parents", "children"},
bytess={"name", "fullname", "email", "extra_headers", "message"},
convert={"metadata"},
- convert_fn=convert_revision_metadata,
+ convert_fn=convert_metadata,
dates={"date", "committer_date"},
)
@@ -292,6 +292,18 @@
return revision_d
+def from_raw_extrinsic_metadata(
+ metadata: Union[Dict[str, Any], RawExtrinsicMetadata]
+) -> Dict[str, Any]:
+ """Convert RawExtrinsicMetadata model object to a json serializable dictionary.
+ """
+ return from_swh(
+ metadata.to_dict() if isinstance(metadata, RawExtrinsicMetadata) else metadata,
+ blacklist={"id", "metadata"},
+ dates={"discovery_date"},
+ )
+
+
def from_content(content):
"""Convert swh content to serializable content dictionary.
diff --git a/swh/web/tests/api/views/test_metadata.py b/swh/web/tests/api/views/test_metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/api/views/test_metadata.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import attr
+from hypothesis import given, strategies
+import pytest
+
+from swh.model.hypothesis_strategies import raw_extrinsic_metadata
+from swh.web.common.utils import reverse
+from swh.web.tests.api.views.utils import scroll_results
+from swh.web.tests.utils import check_api_get_responses, check_http_get_response
+
+
+@given(raw_extrinsic_metadata())
+def test_api_raw_extrinsic_metadata(api_client, archive_data, metadata):
+ archive_data.metadata_authority_add([metadata.authority])
+ archive_data.metadata_fetcher_add([metadata.fetcher])
+ archive_data.raw_extrinsic_metadata_add([metadata])
+
+ authority = metadata.authority
+ url = reverse(
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args={"target": str(metadata.target)},
+ query_params={"authority": f"{authority.type.value} {authority.url}"},
+ )
+ rv = check_api_get_responses(api_client, url, status_code=200)
+
+ assert len(rv.data) == 1
+
+ expected_result = metadata.to_dict()
+ del expected_result["id"]
+ del expected_result["metadata"]
+ metadata_url = rv.data[0]["metadata_url"]
+ expected_result["metadata_url"] = metadata_url
+ expected_result["discovery_date"] = expected_result["discovery_date"].isoformat()
+ assert rv.data == [expected_result]
+
+ rv = check_http_get_response(api_client, metadata_url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert rv.content == metadata.metadata
+
+
+@pytest.mark.parametrize("limit", [1, 2, 10, 100])
+@given(strategies.sets(raw_extrinsic_metadata(), min_size=1))
+def test_api_raw_extrinsic_metadata_scroll(api_client, archive_data, limit, metadata):
+ # Make all metadata objects use the same authority and target
+ metadata0 = next(iter(metadata))
+ metadata = {
+ attr.evolve(m, authority=metadata0.authority, target=metadata0.target)
+ for m in metadata
+ }
+ authority = metadata0.authority
+
+ archive_data.metadata_authority_add([authority])
+ archive_data.metadata_fetcher_add(list({m.fetcher for m in metadata}))
+ archive_data.raw_extrinsic_metadata_add(metadata)
+
+ url = reverse(
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args={"target": str(metadata0.target)},
+ query_params={
+ "authority": f"{authority.type.value} {authority.url}",
+ "limit": limit,
+ },
+ )
+
+ results = scroll_results(api_client, url)
+
+ expected_results = [m.to_dict() for m in metadata]
+ for expected_result in expected_results:
+ del expected_result["id"]
+ del expected_result["metadata"]
+ expected_result["discovery_date"] = expected_result[
+ "discovery_date"
+ ].isoformat()
+
+ for result in results:
+ del result["metadata_url"]
+
+ assert results == expected_results
+
+
+_swhid = "swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307"
+
+
+@pytest.mark.parametrize(
+ "status_code,url_args,query_params",
+ [
+ pytest.param(
+ 200,
+ {"target": _swhid},
+ {"authority": "forge http://example.org"},
+ id="minimal working",
+ ),
+ pytest.param(
+ 200,
+ {"target": _swhid},
+ {
+ "authority": "forge http://example.org",
+ "after": "2021-06-18T09:31:09",
+ "limit": 100,
+ },
+ id="maximal working",
+ ),
+ pytest.param(
+ 400,
+ {"target": _swhid},
+ {"authority": "foo http://example.org"},
+ id="invalid authority type",
+ ),
+ pytest.param(
+ 400,
+ {"target": _swhid},
+ {"authority": "forge http://example.org", "after": "yesterday",},
+ id="invalid 'after' format",
+ ),
+ pytest.param(
+ 400,
+ {"target": _swhid},
+ {"authority": "forge http://example.org", "limit": "abc",},
+ id="invalid 'limit'",
+ ),
+ ],
+)
+def test_api_raw_extrinsic_metadata_check_params(
+ api_client, archive_data, status_code, url_args, query_params
+):
+ url = reverse(
+ "api-1-raw-extrinsic-metadata-swhid",
+ url_args=url_args,
+ query_params=query_params,
+ )
+ check_api_get_responses(api_client, url, status_code=status_code)
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 The Software Heritage developers
+# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,7 +8,6 @@
from hypothesis import given
import pytest
-from requests.utils import parse_header_links
from swh.indexer.storage.model import OriginIntrinsicMetadataRow
from swh.model.hashutil import hash_to_bytes
@@ -19,6 +18,7 @@
from swh.web.common.exc import BadInputExc
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import reverse
+from swh.web.tests.api.views.utils import scroll_results
from swh.web.tests.data import (
INDEXER_TOOL,
ORIGIN_MASTER_REVISION,
@@ -29,31 +29,6 @@
from swh.web.tests.utils import check_api_get_responses
-def _scroll_results(api_client, url):
- """Iterates through pages of results, and returns them all."""
- results = []
-
- while True:
- rv = check_api_get_responses(api_client, url, status_code=200)
-
- results.extend(rv.data)
-
- if "Link" in rv:
- for link in parse_header_links(rv["Link"]):
- if link["rel"] == "next":
- # Found link to next page of results
- url = link["url"]
- break
- else:
- # No link with 'rel=next'
- break
- else:
- # No Link header
- break
-
- return results
-
-
def test_api_lookup_origin_visits_raise_error(api_client, mocker):
mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
err_msg = "voluntary error to check the bad request middleware."
@@ -389,7 +364,7 @@
url = reverse("api-1-origins", query_params={"origin_count": origin_count})
- results = _scroll_results(api_client, url)
+ results = scroll_results(api_client, url)
assert len(results) == len(origins)
assert {origin["url"] for origin in results} == origin_urls
@@ -566,7 +541,7 @@
query_params={"limit": limit},
)
- results = _scroll_results(api_client, url)
+ results = scroll_results(api_client, url)
assert {origin["url"] for origin in results} == expected_origins
diff --git a/swh/web/tests/api/views/utils.py b/swh/web/tests/api/views/utils.py
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/api/views/utils.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2015-2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from requests.utils import parse_header_links
+
+from swh.web.tests.utils import check_api_get_responses
+
+
+def scroll_results(api_client, url):
+ """Iterates through pages of results, and returns them all."""
+ results = []
+
+ while True:
+ rv = check_api_get_responses(api_client, url, status_code=200)
+
+ results.extend(rv.data)
+
+ if "Link" in rv:
+ for link in parse_header_links(rv["Link"]):
+ if link["rel"] == "next":
+ # Found link to next page of results
+ url = link["url"]
+ break
+ else:
+ # No link with 'rel=next'
+ break
+ else:
+ # No Link header
+ break
+
+ return results
diff --git a/swh/web/tests/common/test_converters.py b/swh/web/tests/common/test_converters.py
--- a/swh/web/tests/common/test_converters.py
+++ b/swh/web/tests/common/test_converters.py
@@ -97,7 +97,7 @@
empty_dict={"u"},
empty_list={"v"},
convert={"p", "q", "w"},
- convert_fn=converters.convert_revision_metadata,
+ convert_fn=converters.convert_metadata,
)
assert expected_output == actual_output
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 1:26 AM (2 d, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217149
Attached To
D5875: Add an endpoint to list and access raw extrinsic metadata.
Event Timeline
Log In to Comment