diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index c571ddc7..25eeffa2 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,454 +1,440 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict import functools from functools import wraps import os import re import textwrap from typing import List import docutils.nodes import docutils.parsers.rst import docutils.utils -import sentry_sdk from rest_framework.decorators import api_view -from swh.web.api.apiresponse import error_response, make_api_response +from swh.web.api.apiresponse import make_api_response from swh.web.api.apiurls import APIUrls from swh.web.common.utils import parse_rst class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ("param", "parameter", "arg", "argument") request_json_object_roles = ("reqjsonobj", "reqjson", "jsonobj", ">json") response_json_array_roles = ("resjsonarr", ">jsonarr") query_parameter_roles = ("queryparameter", "queryparam", "qparam", "query") request_header_roles = ("header", "resheader", "responseheader") status_code_roles = ("statuscode", "status", "code") def __init__(self, document, data): super().__init__(document) self.data = data self.args_set = set() self.params_set = set() self.inputs_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False self.current_json_obj = None def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace("\n", " ") # keep emphasized, strong and literal text par = par.replace("", "*") par = par.replace("", "*") par = par.replace("", "**") par = par.replace("", "**") par = par.replace("", "``") par = par.replace("", "``") # keep links to web pages if "', r"`\1 <\2>`_", par, ) # remove parsed document markups but keep rst links par = re.sub(r"<[^<]+?>(?!`_)", "", par) # api urls cleanup to generate valid links afterwards subs_made = 1 while subs_made: (par, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", par) subs_made = 1 while subs_made: (par, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", par) par = re.sub(r"([^:])//", r"\1/", par) # transform references to api endpoints doc into valid rst links par = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", par) # transform references to some elements into bold text par = re.sub(":http:header:`(.*)`", r"**\1**", par) par = re.sub(":func:`(.*)`", r"**\1**", par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(" ") # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data["args"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data["params"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.params_set.add(field_data[2]) # Request data type if ( field_data[0] in self.request_json_array_roles or field_data[0] in self.request_json_object_roles ): # array if field_data[0] in self.request_json_array_roles: self.data["input_type"] = "array" # object else: self.data["input_type"] = "object" # input object field if field_data[2] not in self.inputs_set: self.data["inputs"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.inputs_set.add(field_data[2]) self.current_json_obj = self.data["inputs"][-1] # Response type if ( field_data[0] in self.response_json_array_roles or field_data[0] in self.response_json_object_roles ): # array if field_data[0] in self.response_json_array_roles: self.data["return_type"] = "array" # object else: self.data["return_type"] = "object" # returned object field if field_data[2] not in self.returns_set: self.data["returns"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.returns_set.add(field_data[2]) self.current_json_obj = self.data["returns"][-1] # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data["status_codes"].append( {"code": field_data[1], "doc": text} ) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data["reqheaders"].append( {"name": field_data[1], "doc": text} ) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {"name": field_data[1], "doc": text} self.data["resheaders"].append(resheader) self.resheaders_set.add(field_data[1]) if ( resheader["name"] == "Content-Type" and resheader["doc"] == "application/octet-stream" ): self.data["return_type"] = "octet stream" def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if not text.startswith("**") and text not in self.data["description"]: self.data["description"] += "\n\n" if self.data["description"] else "" self.data["description"] += text def visit_literal_block(self, node): """ Visit literal blocks """ text = node.astext() # literal block in endpoint description if not self.field_list_visited: self.data["description"] += ":\n\n%s\n" % textwrap.indent(text, "\t") # extract example urls if ":swh_web_api:" in text: examples_str = re.sub(".*`(.+)`.*", r"/api/1/\1", text) self.data["examples"] += examples_str.split("\n") def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: self.data["description"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.data["description"] += "\t* %s\n" % line_text elif self.current_json_obj: self.current_json_obj["doc"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.current_json_obj["doc"] += "\t\t* %s\n" % line_text self.current_json_obj = None def visit_warning(self, node): text = self.process_paragraph(str(node)) rst_warning = "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") if rst_warning not in self.data["description"]: self.data["description"] += rst_warning def unknown_visit(self, node): pass def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split("\n") doc_lines_filtered = [] urls = defaultdict(list) default_http_methods = ["HEAD", "OPTIONS"] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if ".. http" not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find("/") :] # emphasize url arguments for html rendering url = re.sub(r"\((\w+)\)", r" **\(\1\)** ", url) method = re.search(r"http:(\w+)::", doc_line).group(1) urls[url].append(method.upper()) for url, methods in urls.items(): data["urls"].append({"rule": url, "methods": methods + default_http_methods}) # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst("\n".join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ def api_doc( - route: str, - noargs: bool = False, - tags: List[str] = [], - handle_response: bool = False, - api_version: str = "1", + route: str, noargs: bool = False, tags: List[str] = [], api_version: str = "1", ): """ Decorator for an API endpoint implementation used to generate a dedicated view displaying its HTML documentation. The documentation will be generated from the endpoint docstring based on sphinxcontrib-httpdomain format. Args: route: documentation page's route noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False tags: Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable - - handle_response: indicate if the decorated function takes - care of creating the HTTP response or delegates that task to the - apiresponse module api_version: api version string """ tags_set = set(tags) # @api_doc() Decorator call def decorator(f): # if the route is not hidden, add it to the index if "hidden" not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data["description"] first_dot_pos = doc_desc.find(".") APIUrls.add_doc_route( route, doc_desc[: first_dot_pos + 1], noargs=noargs, api_version=api_version, tags=tags_set, ) # create a dedicated view to display endpoint HTML doc @api_view(["GET", "HEAD"]) @wraps(f) def doc_view(request): doc_data = get_doc_data(f, route, noargs) return make_api_response(request, None, doc_data) route_name = "%s-doc" % route[1:-1].replace("/", "-") urlpattern = f"^{api_version}{route}doc/$" view_name = "api-%s-%s" % (api_version, route_name) APIUrls.add_url_pattern(urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = get_doc_data(f, route, noargs) try: - response = f(request, **kwargs) + return {"data": f(request, **kwargs), "doc_data": doc_data} except Exception as exc: - sentry_sdk.capture_exception(exc) - return error_response(request, exc, doc_data) - - if handle_response: - return response - else: - return make_api_response(request, response, doc_data) + exc.doc_data = doc_data + raise exc return documented_view return decorator @functools.lru_cache(maxsize=32) def get_doc_data(f, route, noargs): """ Build documentation data for the decorated api endpoint function """ data = { "description": "", "response_data": None, "urls": [], "args": [], "params": [], "input_type": "", "inputs": [], "resheaders": [], "reqheaders": [], "return_type": "", "returns": [], "status_codes": [], "examples": [], "route": route, "noargs": noargs, } if not f.__doc__: raise APIDocException( "apidoc: expected a docstring" " for function %s" % (f.__name__,) ) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if ".. http" not in f.__doc__: data["description"] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif "SWH_WEB_DOC_BUILD" not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process input/returned object info for nicer html display inputs_list = "" returns_list = "" for inp in data["inputs"]: # special case for array of non object type, for instance # :jsonarr string -: an array of string if ret["name"] != "-": returns_list += "\t* **%s (%s)**: %s\n" % ( ret["name"], ret["type"], ret["doc"], ) data["inputs_list"] = inputs_list data["returns_list"] = returns_list return data DOC_COMMON_HEADERS = """ :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request""" DOC_RESHEADER_LINK = """ :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it """ DEFAULT_SUBSTITUTIONS = { "common_headers": DOC_COMMON_HEADERS, "resheader_link": DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): f.__doc__ = f.__doc__.format(**{**DEFAULT_SUBSTITUTIONS, **substitutions}) return f return decorator diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 69b38970..a7ed42df 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,200 +1,215 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import traceback from typing import Any, Dict, Optional +import sentry_sdk + +from django.http import HttpResponse +from django.shortcuts import render from django.utils.html import escape +from rest_framework.exceptions import APIException from rest_framework.request import Request from rest_framework.response import Response from rest_framework.utils.encoders import JSONEncoder from swh.storage.exc import StorageAPIError, StorageDBError from swh.web.api import utils from swh.web.common.exc import BadInputExc, ForbiddenExc, LargePayloadExc, NotFoundExc from swh.web.common.utils import gen_path_info, shorten_path from swh.web.config import get_config def compute_link_header(rv: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]: """Add Link header in returned value results. Args: request: a DRF Request object rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if "headers" not in rv: return {} rv_headers = rv["headers"] if "link-next" in rv_headers: link_headers.append('<%s>; rel="next"' % rv_headers["link-next"]) if "link-prev" in rv_headers: link_headers.append('<%s>; rel="previous"' % rv_headers["link-prev"]) if link_headers: link_header_str = ",".join(link_headers) headers = options.get("headers", {}) headers.update({"Link": link_header_str}) return headers return {} def filter_by_fields(request: Request, data: Dict[str, Any]) -> Dict[str, Any]: """Extract a request parameter 'fields' if it exists to permit the filtering on the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get("fields") if fields: data = utils.filter_field_keys(data, set(fields.split(","))) return data def transform(rv: Dict[str, Any]) -> Dict[str, Any]: """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if "results" in rv: return rv["results"] if "headers" in rv: rv.pop("headers") return rv def make_api_response( request: Request, data: Dict[str, Any], doc_data: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, -) -> Response: +) -> HttpResponse: """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ options = options or {} if data: options["headers"] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_data = doc_data or {} headers = {} if "headers" in options: doc_data["headers_data"] = options["headers"] headers = options["headers"] # get request status code doc_data["status_code"] = options.get("status", 200) - response_args = { - "status": doc_data["status_code"], - "headers": headers, - "content_type": request.accepted_media_type, - } - # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation - # related ones and inform DRF that we request HTML template rendering + # and render the apidoc HTML template if request.accepted_media_type == "text/html": doc_data["response_data"] = data if data: doc_data["response_data"] = json.dumps( data, cls=JSONEncoder, sort_keys=True, indent=4, separators=(",", ": ") ) doc_data["heading"] = shorten_path(str(request.path)) # generate breadcrumbs data if "route" in doc_data: doc_data["endpoint_path"] = gen_path_info(doc_data["route"]) for i in range(len(doc_data["endpoint_path"]) - 1): doc_data["endpoint_path"][i]["path"] += "/doc/" if not doc_data["noargs"]: doc_data["endpoint_path"][-1]["path"] += "/doc/" - response_args["data"] = doc_data - response_args["template_name"] = "api/apidoc.html" + return render( + request, "api/apidoc.html", doc_data, status=doc_data["status_code"] + ) # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: - response_args["data"] = data - - return Response(**response_args) + return Response( + data, + headers=headers, + content_type=request.accepted_media_type, + status=doc_data["status_code"], + ) def error_response( - request: Request, error: Exception, doc_data: Dict[str, Any] -) -> Response: + request: Request, exception: Exception, doc_data: Dict[str, Any] +) -> HttpResponse: """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 500 - if isinstance(error, BadInputExc): + if isinstance(exception, BadInputExc): error_code = 400 - elif isinstance(error, NotFoundExc): + elif isinstance(exception, NotFoundExc): error_code = 404 - elif isinstance(error, ForbiddenExc): + elif isinstance(exception, ForbiddenExc): error_code = 403 - elif isinstance(error, LargePayloadExc): + elif isinstance(exception, LargePayloadExc): error_code = 413 - elif isinstance(error, StorageDBError): + elif isinstance(exception, StorageDBError): error_code = 503 - elif isinstance(error, StorageAPIError): + elif isinstance(exception, StorageAPIError): error_code = 503 + elif isinstance(exception, APIException): + error_code = exception.status_code error_opts = {"status": error_code} error_data = { - "exception": error.__class__.__name__, - "reason": str(error), + "exception": exception.__class__.__name__, + "reason": str(exception), } if request.accepted_media_type == "text/html": error_data["reason"] = escape(error_data["reason"]) if get_config()["debug"]: error_data["traceback"] = traceback.format_exc() return make_api_response(request, error_data, doc_data, options=error_opts) + + +def error_response_handler( + exc: Exception, context: Dict[str, Any] +) -> Optional[HttpResponse]: + """Custom DRF exception handler used to generate API error responses. + """ + sentry_sdk.capture_exception(exc) + doc_data = getattr(exc, "doc_data", None) + return error_response(context["request"], exc, doc_data) diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 4efe8ce5..3bfb29f2 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,95 +1,117 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools -from typing import Dict +from typing import Dict, List, Optional +from django.http import HttpResponse from rest_framework.decorators import api_view from swh.web.api import throttling +from swh.web.api.apiresponse import make_api_response from swh.web.common.urlsindex import UrlsIndex class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} # type: Dict[str, Dict[str, str]] scope = "api" @classmethod - def get_app_endpoints(cls): + def get_app_endpoints(cls) -> Dict[str, Dict[str, str]]: return cls._apidoc_routes @classmethod - def add_doc_route(cls, route, docstring, noargs=False, api_version="1", **kwargs): + def add_doc_route( + cls, + route: str, + docstring: str, + noargs: bool = False, + api_version: str = "1", + **kwargs, + ) -> None: """ Add a route to the self-documenting API reference """ route_name = route[1:-1].replace("/", "-") if not noargs: route_name = "%s-doc" % route_name route_view_name = "api-%s-%s" % (api_version, route_name) if route not in cls._apidoc_routes: d = { "docstring": docstring, "route": "/api/%s%s" % (api_version, route), "route_view_name": route_view_name, } for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d def api_route( - url_pattern=None, - view_name=None, - methods=["GET", "HEAD", "OPTIONS"], - throttle_scope="swh_api", - api_version="1", - checksum_args=None, + url_pattern: str, + view_name: Optional[str] = None, + methods: List[str] = ["GET", "HEAD", "OPTIONS"], + throttle_scope: str = "swh_api", + api_version: str = "1", + checksum_args: Optional[List[str]] = None, ): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route + throttle_scope: Named scope for rate limiting + api_version: web API version + checksum_args: list of view argument names holding checksum values """ url_pattern = "^" + api_version + url_pattern + "$" def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) - def api_view_f(*args, **kwargs): - return f(*args, **kwargs) + def api_view_f(request, **kwargs): + response = f(request, **kwargs) + doc_data = None + # check if response has been forwarded by api_doc decorator + if isinstance(response, dict) and "doc_data" in response: + doc_data = response["doc_data"] + response = response["data"] + # check if HTTP response needs to be created + if not isinstance(response, HttpResponse): + return make_api_response(request, data=response, doc_data=doc_data) + else: + return response # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: APIUrls.add_redirect_for_checksum_args( view_name, [url_pattern], checksum_args ) return f return decorator diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index 39c85547..aade2fe9 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,409 +1,409 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup from swh.web.common import archive from swh.web.common.exc import NotFoundExc from swh.web.common.utils import reverse @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/", "api-1-content-filetype", checksum_args=["q"], ) @api_doc("/content/filetype/") @format_docstring() def api_content_filetype(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ Get information about the detected MIME type of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is `sha1`. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string encoding: the detected content encoding :>json string id: the **sha1** identifier of the content :>json string mimetype: the detected MIME type of the content :>json object tool: information about the tool used to detect the content filetype {common_headers} :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/` """ return api_lookup( archive.lookup_content_filetype, q, notfound_msg="No filetype information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request, ) @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/", "api-1-content-language", checksum_args=["q"], ) @api_doc("/content/language/") @format_docstring() def api_content_language(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/language/ Get information about the programming language used in a content object. Note: this endpoint currently returns no data. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json string lang: the detected programming language if any :>json object tool: information about the tool used to detect the programming language {common_headers} :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/` """ return api_lookup( archive.lookup_content_language, q, notfound_msg="No language information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request, ) @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/", "api-1-content-license", checksum_args=["q"], ) @api_doc("/content/license/") @format_docstring() def api_content_license(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/license/ Get information about the license of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json array licenses: array of strings containing the detected license names :>json object tool: information about the tool used to detect the license {common_headers} :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/` """ return api_lookup( archive.lookup_content_license, q, notfound_msg="No license information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request, ) @api_route(r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/", "api-1-content-ctags") @api_doc("/content/ctags/", tags=["hidden"]) def api_content_ctags(request, q): """ Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( archive.lookup_content_ctags, q, notfound_msg="No ctags symbol found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request, ) @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/", "api-1-content-raw", checksum_args=["q"], ) -@api_doc("/content/raw/", handle_response=True) +@api_doc("/content/raw/") def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ def generate(content): yield content["data"] content_raw = archive.lookup_content_raw(q) if not content_raw: raise NotFoundExc("Content %s is not found." % q) filename = request.query_params.get("filename") if not filename: filename = "content_%s_raw" % q.replace(":", "_") response = HttpResponse( generate(content_raw), content_type="application/octet-stream" ) response["Content-disposition"] = "attachment; filename=%s" % filename return response @api_route(r"/content/symbol/(?P.+)/", "api-1-content-symbol") @api_doc("/content/symbol/", tags=["hidden"]) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = request.query_params.get("last_sha1", None) per_page = int(request.query_params.get("per_page", "10")) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): exp = list(archive.lookup_expression(exp, last_sha1, per_page)) return exp if exp else None symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True), request=request, ) if symbols: nb_symbols = len(symbols) if nb_symbols == per_page: query_params = {} new_last_sha1 = symbols[-1]["sha1"] query_params["last_sha1"] = new_last_sha1 if request.query_params.get("per_page"): query_params["per_page"] = per_page result["headers"] = { "link-next": reverse( "api-1-content-symbol", url_args={"q": q}, query_params=query_params, request=request, ) } result.update({"results": symbols}) return result @api_route(r"/content/known/search/", "api-1-content-known", methods=["POST"]) @api_route(r"/content/known/(?P(?!search).*)/", "api-1-content-known") @api_doc("/content/known/", tags=["hidden"]) @format_docstring() def api_check_content_known(request, q=None): """ .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ Check whether some content(s) (aka "blob(s)") is present in the archive based on its **sha1** checksum. :param string sha1: hexadecimal representation of the **sha1** checksum value for the content to check existence. Multiple values can be provided separated by ','. {common_headers} :>json array search_res: array holding the search result for each provided **sha1** :>json object search_stats: some statistics regarding the number of **sha1** provided and the percentage of those found in the archive :statuscode 200: no error :statuscode 400: an invalid **sha1** has been provided **Example:** .. parsed-literal:: :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` """ response = {"search_res": None, "search_stats": None} search_stats = {"nbfiles": 0, "pct": 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(",") for v in hashes: queries.append({"filename": None, "sha1": v}) # POST: Many hash requests in post form submission elif request.method == "POST": data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == "q" and len(v) > 0: queries.append({"filename": None, "sha1": v}) elif v != "": queries.append({"filename": k, "sha1": v}) if queries: lookup = archive.lookup_multiple_hashes(queries) result = [] nb_queries = len(queries) for el in lookup: res_d = {"sha1": el["sha1"], "found": el["found"]} if "filename" in el and el["filename"]: res_d["filename"] = el["filename"] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x["found"]]) search_stats["nbfiles"] = nb_queries search_stats["pct"] = (nbfound / nb_queries) * 100 response["search_res"] = search_res response["search_stats"] = search_stats return response @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/", "api-1-content", checksum_args=["q"] ) @api_doc("/content/") @format_docstring() def api_content_metadata(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/ Get information about a content (aka a "blob") object. In the archive, a content object is identified based on checksum values computed using various hashing algorithms. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. {common_headers} :>json object checksums: object holding the computed checksum values for the requested content :>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/` for downloading the content raw bytes :>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` for getting information about the content MIME type :>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/` for getting information about the programming language used in the content :>json number length: length of the content in bytes :>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/` for getting information about the license of the content :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` """ return api_lookup( archive.lookup_content, q, notfound_msg="Content with {} not found.".format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q), request=request, ) diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index 65896318..490bc811 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,89 +1,89 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.views.decorators.cache import never_cache from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_requests, ) +@never_cache @api_route( r"/origin/save/(?P.+)/url/(?P.+)/", "api-1-save-origin", methods=["GET", "POST"], throttle_scope="swh_save_origin", ) -@never_cache @api_doc("/origin/save/") @format_docstring() def api_save_origin(request, visit_type, origin_url): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeeded**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are ``git``, ``hg`` and ``svn``) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeeded** or **failed** :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ if request.method == "POST": sor = create_save_origin_request(visit_type, origin_url) del sor["id"] else: sor = get_save_origin_requests(visit_type, origin_url) for s in sor: del s["id"] return sor diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index f5cf924f..17cb1d25 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,237 +1,237 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup from swh.web.common import archive DOC_RETURN_REVISION = """ :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision """ DOC_RETURN_REVISION_ARRAY = DOC_RETURN_REVISION.replace(":>json", ":>jsonarr") def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = archive.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data ) content = result["content"] if result["type"] == "dir": # dir_entries result["content"] = list(map(enrich_directory_local, content)) elif result["type"] == "file": # content result["content"] = utils.enrich_content(content) elif result["type"] == "rev": # revision result["content"] = utils.enrich_revision(content) return result @api_route( r"/revision/(?P[0-9a-f]+)/", "api-1-revision", checksum_args=["sha1_git"] ) @api_doc("/revision/") @format_docstring(return_revision=DOC_RETURN_REVISION) def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier {common_headers} {return_revision} :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ return api_lookup( archive.lookup_revision, sha1_git, notfound_msg="Revision with sha1_git {} not found.".format(sha1_git), enrich_fn=utils.enrich_revision, request=request, ) @api_route( r"/revision/(?P[0-9a-f]+)/raw/", "api-1-revision-raw-message", checksum_args=["sha1_git"], ) -@api_doc("/revision/raw/", tags=["hidden"], handle_response=True) +@api_doc("/revision/raw/", tags=["hidden"]) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = archive.lookup_revision_message(sha1_git) response = HttpResponse(raw["message"], content_type="application/octet-stream") response["Content-disposition"] = "attachment;filename=rev_%s_raw" % sha1_git return response @api_route( r"/revision/(?P[0-9a-f]+)/directory/", "api-1-revision-directory", checksum_args=["sha1_git"], ) @api_route( r"/revision/(?P[0-9a-f]+)/directory/(?P.+)/", "api-1-revision-directory", checksum_args=["sha1_git"], ) @api_doc("/revision/directory/") @format_docstring() def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ return _revision_directory_by( {"sha1_git": sha1_git}, dir_path, request.path, with_data=with_data ) @api_route( r"/revision/(?P[0-9a-f]+)/log/", "api-1-revision-log", checksum_args=["sha1_git"], ) @api_doc("/revision/log/") @format_docstring(return_revision_array=DOC_RETURN_REVISION_ARRAY) def api_revision_log(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/log/ Get a list of all revisions heading to a given one, in other words show the commit log. The revisions are returned in the breadth-first search order while visiting the revision graph. The number of revisions to return is also bounded by the **limit** query parameter. .. warning:: To get the full BFS traversal of the revision graph when the total number of revisions is greater than 1000, it is up to the client to keep track of the multiple branches of history when there's merge revisions in the returned objects. In other words, identify all the continuation points that need to be followed to get the full history through recursion. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :query int limit: maximum number of revisions to return when performing BFS traversal on the revision graph (default to 10, can not exceed 1000) {common_headers} {return_revision_array} :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: head revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ limit = int(request.query_params.get("limit", "10")) limit = min(limit, 1000) error_msg = "Revision with sha1_git %s not found." % sha1_git revisions = api_lookup( archive.lookup_revision_log, sha1_git, limit, notfound_msg=error_msg, enrich_fn=utils.enrich_revision, request=request, ) return {"results": revisions} diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index deef227d..9220c26f 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,272 +1,272 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import redirect from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup from swh.web.common import archive, query from swh.web.common.utils import reverse # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split("_")[0] if request.method == "GET": return api_lookup( archive.vault_progress, obj_type, obj_id, notfound_msg=( "Cooking of {} '{}' was never requested.".format(object_name, hex_id) ), request=request, ) elif request.method == "POST": email = request.POST.get("email", request.GET.get("email", None)) return api_lookup( archive.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found.".format(object_name.title(), hex_id)), request=request, ) +@never_cache @api_route( r"/vault/directory/(?P[0-9a-f]+)/", "api-1-vault-cook-directory", methods=["GET", "POST"], checksum_args=["dir_id"], throttle_scope="swh_vault_cooking", ) -@never_cache @api_doc("/vault/directory/") @format_docstring() def api_vault_cook_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/ .. http:post:: /api/1/vault/directory/(dir_id)/ Request the cooking of an archive for a directory or check its cooking status. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/directory/(dir_id)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/directory.tar.gz :param string dir_id: the directory's sha1 identifier :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/directory/(dir_id)/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "directory", obj_id) res["fetch_url"] = reverse( "api-1-vault-fetch-directory", url_args={"dir_id": dir_id} ) return res @api_route( r"/vault/directory/(?P[0-9a-f]+)/raw/", "api-1-vault-fetch-directory", checksum_args=["dir_id"], ) -@api_doc("/vault/directory/raw/", handle_response=True) +@api_doc("/vault/directory/raw/") def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ Fetch the cooked archive for a directory. See :http:get:`/api/1/vault/directory/(dir_id)/` to get more details on directory cooking. :param string dir_id: the directory's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "directory", obj_id, notfound_msg="Cooked archive for directory '{}' not found.".format(dir_id), request=request, ) fname = "{}.tar.gz".format(dir_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response +@never_cache @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/", "api-1-vault-cook-revision_gitfast", methods=["GET", "POST"], checksum_args=["rev_id"], throttle_scope="swh_vault_cooking", ) -@never_cache @api_doc("/vault/revision/gitfast/") @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ .. http:post:: /api/1/vault/revision/(rev_id)/gitfast/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD :param string rev_id: the revision's sha1 identifier :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "revision_gitfast", obj_id) res["fetch_url"] = reverse( "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} ) return res @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/raw/", "api-1-vault-fetch-revision_gitfast", checksum_args=["rev_id"], ) -@api_doc("/vault/revision/gitfast/raw/", handle_response=True) +@api_doc("/vault/revision/gitfast/raw/") def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more details on directory cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "revision_gitfast", obj_id, notfound_msg="Cooked archive for revision '{}' not found.".format(rev_id), request=request, ) fname = "{}.gitfast.gz".format(rev_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response @api_route( r"/vault/revision_gitfast/(?P[0-9a-f]+)/raw/", "api-1-vault-revision_gitfast-raw", checksum_args=["rev_id"], ) -@api_doc("/vault/revision_gitfast/raw/", tags=["hidden"], handle_response=True) +@api_doc("/vault/revision_gitfast/raw/", tags=["hidden"]) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ rev_gitfast_raw_url = reverse( "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} ) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index b83a524a..4a3ba8d1 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,280 +1,281 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ import os import sys from typing import Any, Dict from swh.web.config import get_config swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config["secret_key"] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config["debug"] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] # Application definition INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", "rest_framework", "swh.web.common", "swh.web.api", "swh.web.auth", "swh.web.browse", "webpack_loader", "django_js_reverse", "corsheaders", ] MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "swh.web.auth.middlewares.OIDCSessionRefreshMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.web.common.middlewares.ThrottlingHeadersMiddleware", ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config["serve_assets"]: MIDDLEWARE.insert(0, "django.middleware.gzip.GZipMiddleware") ROOT_URLCONF = "swh.web.urls" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [os.path.join(PROJECT_DIR, "../templates")], "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", "swh.web.common.utils.context_processor", ], "libraries": {"swh_templatetags": "swh.web.common.swh_templatetags",}, }, }, ] DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": swh_web_config["development_db"], } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",}, {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",}, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = "/static/" # static folder location when swh-web has been installed with pip STATIC_DIR = os.path.join(sys.prefix, "share/swh/web/static") if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") STATICFILES_DIRS = [STATIC_DIR] INTERNAL_IPS = ["127.0.0.1"] throttle_rates = {} http_requests = ["GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"] throttling = swh_web_config["throttling"] for limiter_scope, limiter_conf in throttling["scopes"].items(): if "default" in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"]["default"] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope + "_" + http_request.lower()] = limiter_conf[ "limiter_rate" ][http_request] REST_FRAMEWORK: Dict[str, Any] = { "DEFAULT_RENDERER_CLASSES": ( "rest_framework.renderers.JSONRenderer", "swh.web.api.renderers.YAMLRenderer", "rest_framework.renderers.TemplateHTMLRenderer", ), "DEFAULT_THROTTLE_CLASSES": ("swh.web.api.throttling.SwhWebRateThrottle",), "DEFAULT_THROTTLE_RATES": throttle_rates, "DEFAULT_AUTHENTICATION_CLASSES": [ "rest_framework.authentication.SessionAuthentication", "swh.web.auth.backends.OIDCBearerTokenAuthentication", ], + "EXCEPTION_HANDLER": "swh.web.api.apiresponse.error_response_handler", } LOGGING = { "version": 1, "disable_existing_loggers": False, "filters": { "require_debug_false": {"()": "django.utils.log.RequireDebugFalse",}, "require_debug_true": {"()": "django.utils.log.RequireDebugTrue",}, }, "formatters": { "request": { "format": "[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "simple": { "format": "[%(asctime)s] [%(levelname)s] %(message)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "verbose": { "format": ( "[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s " "- %(message)s" ), "datefmt": "%d/%b/%Y %H:%M:%S", }, }, "handlers": { "console": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "simple", }, "file": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "simple", }, "file_request": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "request", }, "console_verbose": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "verbose", }, "file_verbose": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "verbose", }, "null": {"class": "logging.NullHandler",}, }, "loggers": { "": { "handlers": ["console_verbose", "file_verbose"], "level": "DEBUG" if DEBUG else "WARNING", }, "django": { "handlers": ["console"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.request": { "handlers": ["file_request"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.db.backends": {"handlers": ["null"], "propagate": False}, "django.utils.autoreload": {"level": "INFO",}, }, } WEBPACK_LOADER = { "DEFAULT": { "CACHE": False, "BUNDLE_DIR_NAME": "./", "STATS_FILE": os.path.join(STATIC_DIR, "webpack-stats.json"), "POLL_INTERVAL": 0.1, "TIMEOUT": None, "IGNORE": [".+\\.hot-update.js", ".+\\.map"], } } LOGIN_URL = "/admin/login/" LOGIN_REDIRECT_URL = "admin" SESSION_ENGINE = "django.contrib.sessions.backends.cache" CACHES = { "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, } JS_REVERSE_JS_MINIFY = False CORS_ORIGIN_ALLOW_ALL = True CORS_URLS_REGEX = r"^/badge/.*$" AUTHENTICATION_BACKENDS = [ "django.contrib.auth.backends.ModelBackend", "swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend", ] diff --git a/swh/web/tests/api/test_apiresponse.py b/swh/web/tests/api/test_apiresponse.py index 82067f61..5f84c6cb 100644 --- a/swh/web/tests/api/test_apiresponse.py +++ b/swh/web/tests/api/test_apiresponse.py @@ -1,140 +1,140 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from swh.web.api.apiresponse import ( compute_link_header, filter_by_fields, make_api_response, transform, ) +from swh.web.common.utils import reverse +from swh.web.tests.django_asserts import assert_contains def test_compute_link_header(): next_link = "/api/endpoint/next" prev_link = "/api/endpoint/prev" rv = { "headers": {"link-next": next_link, "link-prev": prev_link}, "results": [1, 2, 3], } options = {} headers = compute_link_header(rv, options) assert headers == { "Link": (f'<{next_link}>; rel="next",' f'<{prev_link}>; rel="previous"') } def test_compute_link_header_nothing_changed(): rv = {} options = {} headers = compute_link_header(rv, options) assert headers == {} def test_compute_link_header_nothing_changed_2(): rv = {"headers": {}} options = {} headers = compute_link_header(rv, options) assert headers == {} def test_transform_only_return_results_1(): rv = {"results": {"some-key": "some-value"}} assert transform(rv) == {"some-key": "some-value"} def test_transform_only_return_results_2(): rv = {"headers": {"something": "do changes"}, "results": {"some-key": "some-value"}} assert transform(rv) == {"some-key": "some-value"} def test_transform_do_remove_headers(): rv = {"headers": {"something": "do changes"}, "some-key": "some-value"} assert transform(rv) == {"some-key": "some-value"} def test_transform_do_nothing(): rv = {"some-key": "some-value"} assert transform(rv) == {"some-key": "some-value"} def test_swh_multi_response_mimetype(mocker, api_request_factory): mock_shorten_path = mocker.patch("swh.web.api.apiresponse.shorten_path") mock_filter = mocker.patch("swh.web.api.apiresponse.filter_by_fields") mock_json = mocker.patch("swh.web.api.apiresponse.json") data = {"data": [12, 34], "id": "adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"} mock_filter.return_value = data mock_shorten_path.return_value = "my_short_path" + mock_json.dumps.return_value = json.dumps(data) accepted_response_formats = { "html": "text/html", "yaml": "application/yaml", "json": "application/json", } - for format in accepted_response_formats: + for resp_format in accepted_response_formats: request = api_request_factory.get("/api/test/path/") - mime_type = accepted_response_formats[format] - setattr(request, "accepted_media_type", mime_type) - - if mime_type == "text/html": - - expected_data = { - "response_data": json.dumps(data), - "headers_data": {}, - "heading": "my_short_path", - "status_code": 200, - } - - mock_json.dumps.return_value = json.dumps(data) - else: - expected_data = data + content_type = accepted_response_formats[resp_format] + setattr(request, "accepted_media_type", content_type) rv = make_api_response(request, data) mock_filter.assert_called_with(request, data) - assert rv.status_code == 200, rv.data - assert rv.data == expected_data - if mime_type == "text/html": - assert rv.template_name == "api/apidoc.html" + if resp_format != "html": + assert rv.status_code == 200, rv.data + assert rv.data == data + else: + assert rv.status_code == 200, rv.content + assert_contains(rv, json.dumps(data)) def test_swh_filter_renderer_do_nothing(api_request_factory): input_data = {"a": "some-data"} request = api_request_factory.get("/api/test/path/", data={}) setattr(request, "query_params", request.GET) actual_data = filter_by_fields(request, input_data) assert actual_data == input_data def test_swh_filter_renderer_do_filter(mocker, api_request_factory): mock_ffk = mocker.patch("swh.web.api.apiresponse.utils.filter_field_keys") mock_ffk.return_value = {"a": "some-data"} request = api_request_factory.get("/api/test/path/", data={"fields": "a,c"}) setattr(request, "query_params", request.GET) input_data = {"a": "some-data", "b": "some-other-data"} actual_data = filter_by_fields(request, input_data) assert actual_data == {"a": "some-data"} mock_ffk.assert_called_once_with(input_data, {"a", "c"}) + + +def test_error_response_handler(mocker, api_client): + mock_archive = mocker.patch("swh.web.api.views.stat.archive") + mock_archive.stat_counters.side_effect = Exception("Something went wrong") + url = reverse("api-1-stat-counters") + resp = api_client.get(url) + assert resp.status_code == 500 diff --git a/swh/web/tests/api/views/__init__.py b/swh/web/tests/api/views/__init__.py index ec25c84a..f2f97daa 100644 --- a/swh/web/tests/api/views/__init__.py +++ b/swh/web/tests/api/views/__init__.py @@ -1,71 +1,71 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict, Optional from rest_framework.response import Response from rest_framework.test import APIClient def check_api_get_responses( api_client: APIClient, url: str, status_code: int ) -> Response: """Helper function to check Web API responses to GET requests for all accepted content types. Args: api_client: DRF test client url: Web API URL to check responses status_code: expected HTTP status code Returns: The Web API JSON response """ # check API Web UI html_content_type = "text/html" resp = api_client.get(url, HTTP_ACCEPT=html_content_type) assert resp.status_code == status_code, resp.content - assert resp["Content-Type"] == html_content_type + assert resp["Content-Type"].startswith(html_content_type) # check YAML response yaml_content_type = "application/yaml" resp = api_client.get(url, HTTP_ACCEPT=yaml_content_type) assert resp.status_code == status_code, resp.data assert resp["Content-Type"] == yaml_content_type # check JSON response resp = api_client.get(url) assert resp.status_code == status_code, resp.data assert resp["Content-Type"] == "application/json" return resp def check_api_post_responses( api_client: APIClient, url: str, data: Optional[Dict[str, Any]], status_code: int ) -> Response: """Helper function to check Web API responses to POST requests for all accepted content types. Args: api_client: DRF test client url: Web API URL to check responses status_code: expected HTTP status code Returns: The Web API JSON response """ # check YAML response yaml_content_type = "application/yaml" resp = api_client.post(url, data=data, format="json", HTTP_ACCEPT=yaml_content_type) assert resp.status_code == status_code, resp.data assert resp["Content-Type"] == yaml_content_type # check JSON response resp = api_client.post(url, data=data, format="json") assert resp.status_code == status_code, resp.data assert resp["Content-Type"] == "application/json" return resp