diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index edea7cd5..aaf63ad7 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,225 +1,229 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import logging import traceback from typing import Any, Dict, Optional -import sentry_sdk - from django.http import HttpResponse from django.shortcuts import render from django.utils.cache import add_never_cache_headers from django.utils.html import escape from rest_framework.exceptions import APIException from rest_framework.request import Request from rest_framework.response import Response from rest_framework.utils.encoders import JSONEncoder from swh.storage.exc import StorageAPIError, StorageDBError from swh.web.api import utils -from swh.web.common.exc import BadInputExc, ForbiddenExc, LargePayloadExc, NotFoundExc +from swh.web.common.exc import ( + BadInputExc, + ForbiddenExc, + LargePayloadExc, + NotFoundExc, + sentry_capture_exception, +) from swh.web.common.utils import gen_path_info, shorten_path from swh.web.config import get_config logger = logging.getLogger("django") def compute_link_header(rv: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]: """Add Link header in returned value results. Args: request: a DRF Request object rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if "headers" not in rv: return {} rv_headers = rv["headers"] if "link-next" in rv_headers: link_headers.append('<%s>; rel="next"' % rv_headers["link-next"]) if "link-prev" in rv_headers: link_headers.append('<%s>; rel="previous"' % rv_headers["link-prev"]) if link_headers: link_header_str = ",".join(link_headers) headers = options.get("headers", {}) headers.update({"Link": link_header_str}) return headers return {} def filter_by_fields(request: Request, data: Dict[str, Any]) -> Dict[str, Any]: """Extract a request parameter 'fields' if it exists to permit the filtering on the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get("fields") if fields: data = utils.filter_field_keys(data, set(fields.split(","))) return data def transform(rv: Dict[str, Any]) -> Dict[str, Any]: """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if "results" in rv: return rv["results"] if "headers" in rv: rv.pop("headers") return rv def make_api_response( request: Request, data: Dict[str, Any], doc_data: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, ) -> HttpResponse: """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ options = options or {} if data: options["headers"] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_data = doc_data or {} headers = {} if "headers" in options: doc_data["headers_data"] = options["headers"] headers = options["headers"] # get request status code doc_data["status_code"] = options.get("status", 200) # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # and render the apidoc HTML template if request.accepted_media_type == "text/html": doc_data["response_data"] = data if data is not None: doc_data["response_data"] = json.dumps( data, cls=JSONEncoder, sort_keys=True, indent=4, separators=(",", ": ") ) doc_data["heading"] = shorten_path(str(request.path)) # generate breadcrumbs data if "route" in doc_data: doc_data["endpoint_path"] = gen_path_info(doc_data["route"]) for i in range(len(doc_data["endpoint_path"]) - 1): doc_data["endpoint_path"][i]["path"] += "/doc/" if not doc_data["noargs"]: doc_data["endpoint_path"][-1]["path"] += "/doc/" response = render( request, "api/apidoc.html", doc_data, status=doc_data["status_code"] ) # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response = Response( data, headers=headers, content_type=request.accepted_media_type, status=doc_data["status_code"], ) if getattr(request, "never_cache", False): add_never_cache_headers(response) return response def error_response( request: Request, exception: Exception, doc_data: Dict[str, Any] ) -> HttpResponse: """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 500 if isinstance(exception, BadInputExc): error_code = 400 elif isinstance(exception, NotFoundExc): error_code = 404 elif isinstance(exception, ForbiddenExc): error_code = 403 elif isinstance(exception, LargePayloadExc): error_code = 413 elif isinstance(exception, StorageDBError): error_code = 503 elif isinstance(exception, StorageAPIError): error_code = 503 elif isinstance(exception, APIException): error_code = exception.status_code error_opts = {"status": error_code} error_data = { "exception": exception.__class__.__name__, "reason": str(exception), } if request.accepted_media_type == "text/html": error_data["reason"] = escape(error_data["reason"]) if get_config()["debug"]: error_data["traceback"] = traceback.format_exc() logger.debug(error_data["traceback"]) return make_api_response(request, error_data, doc_data, options=error_opts) def error_response_handler( exc: Exception, context: Dict[str, Any] ) -> Optional[HttpResponse]: """Custom DRF exception handler used to generate API error responses. """ - sentry_sdk.capture_exception(exc) + sentry_capture_exception(exc) doc_data = getattr(exc, "doc_data", None) return error_response(context["request"], exc, doc_data) diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py index a7c6a142..b688ccb8 100644 --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -1,152 +1,165 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import traceback import sentry_sdk +from django.core import exceptions from django.shortcuts import render from django.utils.html import escape from django.utils.safestring import mark_safe from swh.web.config import get_config logger = logging.getLogger("django") class BadInputExc(ValueError): """Wrong request to the api. Example: Asking a content with the wrong identifier format. """ pass -class NotFoundExc(Exception): +class NotFoundExc(exceptions.ObjectDoesNotExist): """Good request to the api but no result were found. Example: Asking a content with the right identifier format but that content does not exist. """ pass -class ForbiddenExc(Exception): +class ForbiddenExc(exceptions.PermissionDenied): """Good request to the api, forbidden result to return due to enforce policy. Example: Asking for a raw content which exists but whose mimetype is not text. """ pass class LargePayloadExc(Exception): """The input size is too large. Example: Asking to resolve 10000 SWHIDs when the limit is 1000. """ pass http_status_code_message = { 400: "Bad Request", 401: "Unauthorized", 403: "Access Denied", 404: "Resource not found", 413: "Payload Too Large", 500: "Internal Server Error", 501: "Not Implemented", 502: "Bad Gateway", 503: "Service unavailable", } def _generate_error_page(request, error_code, error_description): return render( request, "error.html", { "error_code": error_code, "error_message": http_status_code_message[error_code], "error_description": mark_safe(error_description), }, status=error_code, ) def swh_handle400(request, exception=None): """ Custom Django HTTP error 400 handler for swh-web. """ error_description = ( "The server cannot process the request to %s due to " "something that is perceived to be a client error." % escape(request.META["PATH_INFO"]) ) return _generate_error_page(request, 400, error_description) def swh_handle403(request, exception=None): """ Custom Django HTTP error 403 handler for swh-web. """ error_description = "The resource %s requires an authentication." % escape( request.META["PATH_INFO"] ) return _generate_error_page(request, 403, error_description) def swh_handle404(request, exception=None): """ Custom Django HTTP error 404 handler for swh-web. """ error_description = "The resource %s could not be found on the server." % escape( request.META["PATH_INFO"] ) return _generate_error_page(request, 404, error_description) def swh_handle500(request): """ Custom Django HTTP error 500 handler for swh-web. """ error_description = ( "An unexpected condition was encountered when " "requesting resource %s." % escape(request.META["PATH_INFO"]) ) return _generate_error_page(request, 500, error_description) +def sentry_capture_exception(exc): + if not isinstance( + exc, + ( + exceptions.ObjectDoesNotExist, + exceptions.DisallowedHost, + exceptions.PermissionDenied, + ), + ): + sentry_sdk.capture_exception(exc) + + def handle_view_exception(request, exc): """ Function used to generate an error page when an exception was raised inside a swh-web browse view. """ - sentry_sdk.capture_exception(exc) + sentry_capture_exception(exc) error_code = 500 error_description = "%s: %s" % (type(exc).__name__, str(exc)) if get_config()["debug"]: error_description = traceback.format_exc() logger.debug(error_description) if isinstance(exc, BadInputExc): error_code = 400 if isinstance(exc, ForbiddenExc): error_code = 403 if isinstance(exc, NotFoundExc): error_code = 404 resp = _generate_error_page(request, error_code, error_description) if get_config()["debug"]: resp.traceback = error_description return resp diff --git a/swh/web/common/middlewares.py b/swh/web/common/middlewares.py index 0c589fe0..7f30cc9d 100644 --- a/swh/web/common/middlewares.py +++ b/swh/web/common/middlewares.py @@ -1,91 +1,90 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from htmlmin import minify -import sentry_sdk -from swh.web.common.exc import handle_view_exception +from swh.web.common.exc import handle_view_exception, sentry_capture_exception from swh.web.common.utils import prettify_html class HtmlPrettifyMiddleware(object): """ Django middleware for prettifying generated HTML in development mode. """ def __init__(self, get_response): self.get_response = get_response def __call__(self, request): response = self.get_response(request) if "text/html" in response.get("Content-Type", ""): if hasattr(response, "content"): content = response.content response.content = prettify_html(content) elif hasattr(response, "streaming_content"): content = b"".join(response.streaming_content) response.streaming_content = prettify_html(content) return response class HtmlMinifyMiddleware(object): """ Django middleware for minifying generated HTML in production mode. """ def __init__(self, get_response=None): self.get_response = get_response def __call__(self, request): response = self.get_response(request) if "text/html" in response.get("Content-Type", ""): try: minified_html = minify( response.content.decode("utf-8"), convert_charrefs=False ) response.content = minified_html.encode("utf-8") except Exception as exc: - sentry_sdk.capture_exception(exc) + sentry_capture_exception(exc) return response class ThrottlingHeadersMiddleware(object): """ Django middleware for inserting rate limiting related headers in HTTP response. """ def __init__(self, get_response=None): self.get_response = get_response def __call__(self, request): resp = self.get_response(request) if "RateLimit-Limit" in request.META: resp["X-RateLimit-Limit"] = request.META["RateLimit-Limit"] if "RateLimit-Remaining" in request.META: resp["X-RateLimit-Remaining"] = request.META["RateLimit-Remaining"] if "RateLimit-Reset" in request.META: resp["X-RateLimit-Reset"] = request.META["RateLimit-Reset"] return resp class ExceptionMiddleware(object): """ Django middleware for handling uncaught exception raised when processing a view. """ def __init__(self, get_response=None): self.get_response = get_response def __call__(self, request): return self.get_response(request) def process_exception(self, request, exception): return handle_view_exception(request, exception)