diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 4022a8e9..edea7cd5 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,219 +1,225 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import logging import traceback from typing import Any, Dict, Optional import sentry_sdk from django.http import HttpResponse from django.shortcuts import render +from django.utils.cache import add_never_cache_headers from django.utils.html import escape from rest_framework.exceptions import APIException from rest_framework.request import Request from rest_framework.response import Response from rest_framework.utils.encoders import JSONEncoder from swh.storage.exc import StorageAPIError, StorageDBError from swh.web.api import utils from swh.web.common.exc import BadInputExc, ForbiddenExc, LargePayloadExc, NotFoundExc from swh.web.common.utils import gen_path_info, shorten_path from swh.web.config import get_config logger = logging.getLogger("django") def compute_link_header(rv: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]: """Add Link header in returned value results. Args: request: a DRF Request object rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if "headers" not in rv: return {} rv_headers = rv["headers"] if "link-next" in rv_headers: link_headers.append('<%s>; rel="next"' % rv_headers["link-next"]) if "link-prev" in rv_headers: link_headers.append('<%s>; rel="previous"' % rv_headers["link-prev"]) if link_headers: link_header_str = ",".join(link_headers) headers = options.get("headers", {}) headers.update({"Link": link_header_str}) return headers return {} def filter_by_fields(request: Request, data: Dict[str, Any]) -> Dict[str, Any]: """Extract a request parameter 'fields' if it exists to permit the filtering on the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get("fields") if fields: data = utils.filter_field_keys(data, set(fields.split(","))) return data def transform(rv: Dict[str, Any]) -> Dict[str, Any]: """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if "results" in rv: return rv["results"] if "headers" in rv: rv.pop("headers") return rv def make_api_response( request: Request, data: Dict[str, Any], doc_data: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, ) -> HttpResponse: """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ options = options or {} if data: options["headers"] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_data = doc_data or {} headers = {} if "headers" in options: doc_data["headers_data"] = options["headers"] headers = options["headers"] # get request status code doc_data["status_code"] = options.get("status", 200) # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # and render the apidoc HTML template if request.accepted_media_type == "text/html": doc_data["response_data"] = data if data is not None: doc_data["response_data"] = json.dumps( data, cls=JSONEncoder, sort_keys=True, indent=4, separators=(",", ": ") ) doc_data["heading"] = shorten_path(str(request.path)) # generate breadcrumbs data if "route" in doc_data: doc_data["endpoint_path"] = gen_path_info(doc_data["route"]) for i in range(len(doc_data["endpoint_path"]) - 1): doc_data["endpoint_path"][i]["path"] += "/doc/" if not doc_data["noargs"]: doc_data["endpoint_path"][-1]["path"] += "/doc/" - return render( + response = render( request, "api/apidoc.html", doc_data, status=doc_data["status_code"] ) # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: - return Response( + response = Response( data, headers=headers, content_type=request.accepted_media_type, status=doc_data["status_code"], ) + if getattr(request, "never_cache", False): + add_never_cache_headers(response) + + return response + def error_response( request: Request, exception: Exception, doc_data: Dict[str, Any] ) -> HttpResponse: """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 500 if isinstance(exception, BadInputExc): error_code = 400 elif isinstance(exception, NotFoundExc): error_code = 404 elif isinstance(exception, ForbiddenExc): error_code = 403 elif isinstance(exception, LargePayloadExc): error_code = 413 elif isinstance(exception, StorageDBError): error_code = 503 elif isinstance(exception, StorageAPIError): error_code = 503 elif isinstance(exception, APIException): error_code = exception.status_code error_opts = {"status": error_code} error_data = { "exception": exception.__class__.__name__, "reason": str(exception), } if request.accepted_media_type == "text/html": error_data["reason"] = escape(error_data["reason"]) if get_config()["debug"]: error_data["traceback"] = traceback.format_exc() logger.debug(error_data["traceback"]) return make_api_response(request, error_data, doc_data, options=error_opts) def error_response_handler( exc: Exception, context: Dict[str, Any] ) -> Optional[HttpResponse]: """Custom DRF exception handler used to generate API error responses. """ sentry_sdk.capture_exception(exc) doc_data = getattr(exc, "doc_data", None) return error_response(context["request"], exc, doc_data) diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 423299b8..58f93157 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,127 +1,125 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from typing import Dict, List, Optional from django.http.response import HttpResponseBase -from django.utils.cache import add_never_cache_headers from rest_framework.decorators import api_view from swh.web.api import throttling from swh.web.api.apiresponse import make_api_response from swh.web.common.urlsindex import UrlsIndex class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} # type: Dict[str, Dict[str, str]] scope = "api" @classmethod def get_app_endpoints(cls) -> Dict[str, Dict[str, str]]: return cls._apidoc_routes @classmethod def add_doc_route( cls, route: str, docstring: str, noargs: bool = False, api_version: str = "1", **kwargs, ) -> None: """ Add a route to the self-documenting API reference """ route_name = route[1:-1].replace("/", "-") if not noargs: route_name = "%s-doc" % route_name route_view_name = "api-%s-%s" % (api_version, route_name) if route not in cls._apidoc_routes: d = { "docstring": docstring, "route": "/api/%s%s" % (api_version, route), "route_view_name": route_view_name, } for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d def api_route( url_pattern: str, view_name: Optional[str] = None, methods: List[str] = ["GET", "HEAD", "OPTIONS"], throttle_scope: str = "swh_api", api_version: str = "1", checksum_args: Optional[List[str]] = None, never_cache: bool = False, ): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route throttle_scope: Named scope for rate limiting api_version: web API version checksum_args: list of view argument names holding checksum values never_cache: define if api response must be cached """ url_pattern = "^" + api_version + url_pattern + "$" def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) def api_view_f(request, **kwargs): + # never_cache will be handled in apiresponse module + request.never_cache = never_cache response = f(request, **kwargs) doc_data = None # check if response has been forwarded by api_doc decorator if isinstance(response, dict) and "doc_data" in response: doc_data = response["doc_data"] response = response["data"] # check if HTTP response needs to be created if not isinstance(response, HttpResponseBase): api_response = make_api_response( request, data=response, doc_data=doc_data ) else: api_response = response - if never_cache: - add_never_cache_headers(api_response) - return api_response # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: APIUrls.add_redirect_for_checksum_args( view_name, [url_pattern], checksum_args ) return f return decorator diff --git a/swh/web/tests/api/test_apiurls.py b/swh/web/tests/api/test_apiurls.py index 0983f5de..b073c638 100644 --- a/swh/web/tests/api/test_apiurls.py +++ b/swh/web/tests/api/test_apiurls.py @@ -1,38 +1,57 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.api.apiurls import api_route from swh.web.common.utils import reverse from swh.web.tests.utils import check_api_get_responses @api_route(r"/some/route/(?P[0-9]+)/", "api-1-some-route") def api_some_route(request, int_arg): return {"result": int(int_arg)} @api_route( r"/never/cache/route/(?P[0-9]+)/", "api-1-never-cache-route", never_cache=True, ) def api_never_cache_route(request, int_arg): return {"result": int(int_arg)} +@api_route( + r"/never/cache/route/error/", + "api-1-never-cache-route-with-error", + never_cache=True, +) +def api_never_cache_route_with_error(request): + raise Exception("error") + + def test_api_route_with_cache(api_client): url = reverse("api-1-some-route", url_args={"int_arg": 1}) resp = check_api_get_responses(api_client, url, status_code=200) assert resp.data == {"result": 1} assert "Cache-Control" not in resp +_cache_control = "max-age=0, no-cache, no-store, must-revalidate" + + def test_api_route_never_cache(api_client): url = reverse("api-1-never-cache-route", url_args={"int_arg": 1}) resp = check_api_get_responses(api_client, url, status_code=200) assert resp.data == {"result": 1} assert "Cache-Control" in resp - assert resp["Cache-Control"] == "max-age=0, no-cache, no-store, must-revalidate" + assert resp["Cache-Control"] == _cache_control + + +def test_api_route_never_cache_with_error(api_client): + url = reverse("api-1-never-cache-route-with-error") + resp = check_api_get_responses(api_client, url, status_code=500) + assert "Cache-Control" in resp + assert resp["Cache-Control"] == _cache_control