Changeset View
Changeset View
Standalone View
Standalone View
swh/web/api/views/metadata.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import base64 | import base64 | ||||
import re | import re | ||||
from typing import Dict, Optional | |||||
import iso8601 | import iso8601 | ||||
from django.http import HttpResponse | from django.http import HttpResponse | ||||
from rest_framework.request import Request | |||||
from swh.model import hashutil, swhids | from swh.model import hashutil, swhids | ||||
from swh.model.model import MetadataAuthority, MetadataAuthorityType | from swh.model.model import MetadataAuthority, MetadataAuthorityType | ||||
from swh.web.api.apidoc import api_doc, format_docstring | from swh.web.api.apidoc import api_doc, format_docstring | ||||
from swh.web.api.apiurls import api_route | from swh.web.api.apiurls import api_route | ||||
from swh.web.common import archive, converters | from swh.web.common import archive, converters | ||||
from swh.web.common.exc import BadInputExc, NotFoundExc | from swh.web.common.exc import BadInputExc, NotFoundExc | ||||
from swh.web.common.utils import SWHID_RE, reverse | from swh.web.common.utils import SWHID_RE, reverse | ||||
@api_route( | @api_route( | ||||
f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/", | f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/", | ||||
"api-1-raw-extrinsic-metadata-swhid", | "api-1-raw-extrinsic-metadata-swhid", | ||||
) | ) | ||||
@api_doc("/raw-extrinsic-metadata/swhid/") | @api_doc("/raw-extrinsic-metadata/swhid/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_raw_extrinsic_metadata_swhid(request, target): | def api_raw_extrinsic_metadata_swhid(request: Request, target: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target) | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target) | ||||
Returns raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on a given object. | Returns raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on a given object. | ||||
:param string target: The core SWHID of the object whose metadata | :param string target: The core SWHID of the object whose metadata | ||||
should be returned | should be returned | ||||
:query string authority: A metadata authority identifier, formatted as | :query string authority: A metadata authority identifier, formatted as | ||||
Show All 29 Lines | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target) | ||||
:statuscode 200: no error | :statuscode 200: no error | ||||
**Example:** | **Example:** | ||||
.. parsed-literal:: | .. parsed-literal:: | ||||
:swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/?authority=forge%20https://pypi.org/` | :swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/?authority=forge%20https://pypi.org/` | ||||
""" # noqa | """ # noqa | ||||
authority_str: str = request.query_params.get("authority") | authority_str: Optional[str] = request.query_params.get("authority") | ||||
after_str: str = request.query_params.get("after") | after_str: Optional[str] = request.query_params.get("after") | ||||
limit_str: str = request.query_params.get("limit", "100") | limit_str: str = request.query_params.get("limit", "100") | ||||
page_token_str: str = request.query_params.get("page_token") | page_token_str: Optional[str] = request.query_params.get("page_token") | ||||
if not authority_str: | if authority_str is None: | ||||
raise BadInputExc("The 'authority' query parameter is required.") | raise BadInputExc("The 'authority' query parameter is required.") | ||||
vlorentz: slightly incorrect message now, as we may raise it when it is provided but empty | |||||
if " " not in authority_str.strip(): | if " " not in authority_str.strip(): | ||||
raise BadInputExc("The 'authority' query parameter should contain a space.") | raise BadInputExc("The 'authority' query parameter should contain a space.") | ||||
(authority_type_str, authority_url) = authority_str.split(" ", 1) | (authority_type_str, authority_url) = authority_str.split(" ", 1) | ||||
try: | try: | ||||
authority_type = MetadataAuthorityType(authority_type_str) | authority_type = MetadataAuthorityType(authority_type_str) | ||||
except ValueError: | except ValueError: | ||||
raise BadInputExc( | raise BadInputExc( | ||||
Show All 12 Lines | def api_raw_extrinsic_metadata_swhid(request: Request, target: str): | ||||
try: | try: | ||||
limit = int(limit_str) | limit = int(limit_str) | ||||
except ValueError: | except ValueError: | ||||
raise BadInputExc("'limit' parameter must be an integer.") from None | raise BadInputExc("'limit' parameter must be an integer.") from None | ||||
limit = min(limit, 10000) | limit = min(limit, 10000) | ||||
try: | try: | ||||
target = swhids.CoreSWHID.from_string(target).to_extended() | parsed_target = swhids.CoreSWHID.from_string(target).to_extended() | ||||
except swhids.ValidationError as e: | except swhids.ValidationError as e: | ||||
raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | ||||
if page_token_str: | if page_token_str is not None: | ||||
page_token = base64.urlsafe_b64decode(page_token_str) | page_token = base64.urlsafe_b64decode(page_token_str) | ||||
else: | else: | ||||
page_token = None | page_token = None | ||||
result_page = archive.storage.raw_extrinsic_metadata_get( | result_page = archive.storage.raw_extrinsic_metadata_get( | ||||
target=target, | target=parsed_target, | ||||
authority=authority, | authority=authority, | ||||
after=after, | after=after, | ||||
page_token=page_token, | page_token=page_token, | ||||
limit=limit, | limit=limit, | ||||
) | ) | ||||
results = [] | results = [] | ||||
for metadata in result_page.results: | for metadata in result_page.results: | ||||
result = converters.from_raw_extrinsic_metadata(metadata) | result = converters.from_raw_extrinsic_metadata(metadata) | ||||
# We can't reliably send metadata directly, because it is a bytestring, | # We can't reliably send metadata directly, because it is a bytestring, | ||||
# and we have to return JSON documents. | # and we have to return JSON documents. | ||||
result["metadata_url"] = reverse( | result["metadata_url"] = reverse( | ||||
"api-1-raw-extrinsic-metadata-get", | "api-1-raw-extrinsic-metadata-get", | ||||
url_args={"id": hashutil.hash_to_hex(metadata.id)}, | url_args={"id": hashutil.hash_to_hex(metadata.id)}, | ||||
query_params={"filename": f"{target}_metadata"}, | query_params={"filename": f"{target}_metadata"}, | ||||
request=request, | request=request, | ||||
) | ) | ||||
results.append(result) | results.append(result) | ||||
response = { | headers: Dict[str, str] = {} | ||||
"results": results, | |||||
"headers": {}, | |||||
} | |||||
if result_page.next_page_token is not None: | if result_page.next_page_token is not None: | ||||
response["headers"]["link-next"] = reverse( | headers["link-next"] = reverse( | ||||
"api-1-raw-extrinsic-metadata-swhid", | "api-1-raw-extrinsic-metadata-swhid", | ||||
url_args={"target": target}, | url_args={"target": target}, | ||||
query_params=dict( | query_params=dict( | ||||
authority=authority_str, | authority=authority_str, | ||||
after=after_str, | after=after_str, | ||||
limit=limit_str, | limit=limit_str, | ||||
page_token=base64.urlsafe_b64encode( | page_token=base64.urlsafe_b64encode( | ||||
result_page.next_page_token.encode() | result_page.next_page_token.encode() | ||||
).decode(), | ).decode(), | ||||
), | ), | ||||
request=request, | request=request, | ||||
) | ) | ||||
return response | return { | ||||
"results": results, | |||||
"headers": headers, | |||||
} | |||||
@api_route( | @api_route( | ||||
"/raw-extrinsic-metadata/get/(?P<id>[0-9a-z]+)/", | "/raw-extrinsic-metadata/get/(?P<id>[0-9a-z]+)/", | ||||
"api-1-raw-extrinsic-metadata-get", | "api-1-raw-extrinsic-metadata-get", | ||||
) | ) | ||||
def api_raw_extrinsic_metadata_get(request, id): | def api_raw_extrinsic_metadata_get(request: Request, id: str): | ||||
# This is an internal endpoint that should only be accessed via URLs given | # This is an internal endpoint that should only be accessed via URLs given | ||||
# by /raw-extrinsic-metadata/swhid/; so it is not documented. | # by /raw-extrinsic-metadata/swhid/; so it is not documented. | ||||
metadata = archive.storage.raw_extrinsic_metadata_get_by_ids( | metadata = archive.storage.raw_extrinsic_metadata_get_by_ids( | ||||
[hashutil.hash_to_bytes(id)] | [hashutil.hash_to_bytes(id)] | ||||
) | ) | ||||
if not metadata: | if not metadata: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
"Metadata not found. Use /raw-extrinsic-metadata/swhid/ to access metadata." | "Metadata not found. Use /raw-extrinsic-metadata/swhid/ to access metadata." | ||||
Show All 15 Lines | |||||
@api_route( | @api_route( | ||||
f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/authorities/", | f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/authorities/", | ||||
"api-1-raw-extrinsic-metadata-swhid-authorities", | "api-1-raw-extrinsic-metadata-swhid-authorities", | ||||
) | ) | ||||
@api_doc("/raw-extrinsic-metadata/swhid/authorities/") | @api_doc("/raw-extrinsic-metadata/swhid/authorities/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_raw_extrinsic_metadata_swhid_authorities(request, target): | def api_raw_extrinsic_metadata_swhid_authorities(request: Request, target: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)/authorities/ | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)/authorities/ | ||||
Returns a list of metadata authorities that provided metadata on | Returns a list of metadata authorities that provided metadata on | ||||
the given target. | the given target. | ||||
They can then be used to get the raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on | They can then be used to get the raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on | ||||
that object from each of the authorities. | that object from each of the authorities. | ||||
Show All 10 Lines | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)/authorities/ | ||||
:statuscode 200: no error | :statuscode 200: no error | ||||
**Example:** | **Example:** | ||||
.. parsed-literal:: | .. parsed-literal:: | ||||
:swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/authorities/` | :swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/authorities/` | ||||
""" # noqa | """ # noqa | ||||
target_str = target | |||||
try: | try: | ||||
target = swhids.CoreSWHID.from_string(target_str).to_extended() | parsed_target = swhids.CoreSWHID.from_string(target).to_extended() | ||||
except swhids.ValidationError as e: | except swhids.ValidationError as e: | ||||
raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | ||||
authorities = archive.storage.raw_extrinsic_metadata_get_authorities(target=target) | authorities = archive.storage.raw_extrinsic_metadata_get_authorities( | ||||
target=parsed_target | |||||
) | |||||
results = [ | results = [ | ||||
{ | { | ||||
**authority.to_dict(), | **authority.to_dict(), | ||||
"metadata_list_url": reverse( | "metadata_list_url": reverse( | ||||
"api-1-raw-extrinsic-metadata-swhid", | "api-1-raw-extrinsic-metadata-swhid", | ||||
url_args={"target": target_str}, | url_args={"target": target}, | ||||
query_params={"authority": f"{authority.type.value} {authority.url}"}, | query_params={"authority": f"{authority.type.value} {authority.url}"}, | ||||
request=request, | request=request, | ||||
), | ), | ||||
} | } | ||||
for authority in authorities | for authority in authorities | ||||
] | ] | ||||
return { | return { | ||||
"results": results, | "results": results, | ||||
"headers": {}, | "headers": {}, | ||||
} | } |
slightly incorrect message now, as we may raise it when it is provided but empty