diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 3bfb29f2..206404ea 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,117 +1,127 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from typing import Dict, List, Optional from django.http import HttpResponse +from django.utils.cache import add_never_cache_headers from rest_framework.decorators import api_view from swh.web.api import throttling from swh.web.api.apiresponse import make_api_response from swh.web.common.urlsindex import UrlsIndex class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} # type: Dict[str, Dict[str, str]] scope = "api" @classmethod def get_app_endpoints(cls) -> Dict[str, Dict[str, str]]: return cls._apidoc_routes @classmethod def add_doc_route( cls, route: str, docstring: str, noargs: bool = False, api_version: str = "1", **kwargs, ) -> None: """ Add a route to the self-documenting API reference """ route_name = route[1:-1].replace("/", "-") if not noargs: route_name = "%s-doc" % route_name route_view_name = "api-%s-%s" % (api_version, route_name) if route not in cls._apidoc_routes: d = { "docstring": docstring, "route": "/api/%s%s" % (api_version, route), "route_view_name": route_view_name, } for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d def api_route( url_pattern: str, view_name: Optional[str] = None, methods: List[str] = ["GET", "HEAD", "OPTIONS"], throttle_scope: str = "swh_api", api_version: str = "1", checksum_args: Optional[List[str]] = None, + never_cache: bool = False, ): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route throttle_scope: Named scope for rate limiting api_version: web API version checksum_args: list of view argument names holding checksum values + never_cache: define if api response must be cached """ url_pattern = "^" + api_version + url_pattern + "$" def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) def api_view_f(request, **kwargs): response = f(request, **kwargs) doc_data = None # check if response has been forwarded by api_doc decorator if isinstance(response, dict) and "doc_data" in response: doc_data = response["doc_data"] response = response["data"] # check if HTTP response needs to be created if not isinstance(response, HttpResponse): - return make_api_response(request, data=response, doc_data=doc_data) + api_response = make_api_response( + request, data=response, doc_data=doc_data + ) else: - return response + api_response = response + + if never_cache: + add_never_cache_headers(api_response) + + return api_response # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: APIUrls.add_redirect_for_checksum_args( view_name, [url_pattern], checksum_args ) return f return decorator diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index 490bc811..150ee3e0 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,89 +1,87 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.views.decorators.cache import never_cache - from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_requests, ) -@never_cache @api_route( r"/origin/save/(?P.+)/url/(?P.+)/", "api-1-save-origin", methods=["GET", "POST"], throttle_scope="swh_save_origin", + never_cache=True, ) @api_doc("/origin/save/") @format_docstring() def api_save_origin(request, visit_type, origin_url): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeeded**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are ``git``, ``hg`` and ``svn``) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeeded** or **failed** :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ if request.method == "POST": sor = create_save_origin_request(visit_type, origin_url) del sor["id"] else: sor = get_save_origin_requests(visit_type, origin_url) for s in sor: del s["id"] return sor diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index df5eabc0..9789b2a9 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,287 +1,286 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict from django.http import HttpResponse from django.shortcuts import redirect -from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup from swh.web.common import archive, query from swh.web.common.utils import reverse # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split("_")[0] if request.method == "GET": return api_lookup( archive.vault_progress, obj_type, obj_id, notfound_msg=( "Cooking of {} '{}' was never requested.".format(object_name, hex_id) ), request=request, ) elif request.method == "POST": email = request.POST.get("email", request.GET.get("email", None)) return api_lookup( archive.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found.".format(object_name.title(), hex_id)), request=request, ) def _vault_response(vault_response: Dict[str, Any]) -> Dict[str, Any]: return { "fetch_url": vault_response["fetch_url"], "obj_type": vault_response["type"], "progress_message": vault_response["progress_msg"], "id": vault_response["task_id"], "status": vault_response["task_status"], "obj_id": vault_response["object_id"], } -@never_cache @api_route( r"/vault/directory/(?P[0-9a-f]+)/", "api-1-vault-cook-directory", methods=["GET", "POST"], checksum_args=["dir_id"], throttle_scope="swh_vault_cooking", + never_cache=True, ) @api_doc("/vault/directory/") @format_docstring() def api_vault_cook_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/ .. http:post:: /api/1/vault/directory/(dir_id)/ Request the cooking of an archive for a directory or check its cooking status. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/directory/(dir_id)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/directory.tar.gz :param string dir_id: the directory's sha1 identifier :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/directory/(dir_id)/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "directory", obj_id) res["fetch_url"] = reverse( "api-1-vault-fetch-directory", url_args={"dir_id": dir_id}, request=request, ) return _vault_response(res) @api_route( r"/vault/directory/(?P[0-9a-f]+)/raw/", "api-1-vault-fetch-directory", checksum_args=["dir_id"], ) @api_doc("/vault/directory/raw/") def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ Fetch the cooked archive for a directory. See :http:get:`/api/1/vault/directory/(dir_id)/` to get more details on directory cooking. :param string dir_id: the directory's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "directory", obj_id, notfound_msg="Cooked archive for directory '{}' not found.".format(dir_id), request=request, ) fname = "{}.tar.gz".format(dir_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response -@never_cache @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/", "api-1-vault-cook-revision_gitfast", methods=["GET", "POST"], checksum_args=["rev_id"], throttle_scope="swh_vault_cooking", + never_cache=True, ) @api_doc("/vault/revision/gitfast/") @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ .. http:post:: /api/1/vault/revision/(rev_id)/gitfast/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD :param string rev_id: the revision's sha1 identifier :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string obj_id: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = _dispatch_cook_progress(request, "revision_gitfast", obj_id) res["fetch_url"] = reverse( "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id}, request=request, ) return _vault_response(res) @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/raw/", "api-1-vault-fetch-revision_gitfast", checksum_args=["rev_id"], ) @api_doc("/vault/revision/gitfast/raw/") def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more details on directory cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) res = api_lookup( archive.vault_fetch, "revision_gitfast", obj_id, notfound_msg="Cooked archive for revision '{}' not found.".format(rev_id), request=request, ) fname = "{}.gitfast.gz".format(rev_id) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format(fname) return response @api_route( r"/vault/revision_gitfast/(?P[0-9a-f]+)/raw/", "api-1-vault-revision_gitfast-raw", checksum_args=["rev_id"], ) @api_doc("/vault/revision_gitfast/raw/", tags=["hidden"]) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ rev_gitfast_raw_url = reverse( "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} ) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/tests/api/test_apiurls.py b/swh/web/tests/api/test_apiurls.py new file mode 100644 index 00000000..0983f5de --- /dev/null +++ b/swh/web/tests/api/test_apiurls.py @@ -0,0 +1,38 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from swh.web.api.apiurls import api_route +from swh.web.common.utils import reverse +from swh.web.tests.utils import check_api_get_responses + + +@api_route(r"/some/route/(?P[0-9]+)/", "api-1-some-route") +def api_some_route(request, int_arg): + return {"result": int(int_arg)} + + +@api_route( + r"/never/cache/route/(?P[0-9]+)/", + "api-1-never-cache-route", + never_cache=True, +) +def api_never_cache_route(request, int_arg): + return {"result": int(int_arg)} + + +def test_api_route_with_cache(api_client): + url = reverse("api-1-some-route", url_args={"int_arg": 1}) + resp = check_api_get_responses(api_client, url, status_code=200) + assert resp.data == {"result": 1} + assert "Cache-Control" not in resp + + +def test_api_route_never_cache(api_client): + url = reverse("api-1-never-cache-route", url_args={"int_arg": 1}) + resp = check_api_get_responses(api_client, url, status_code=200) + assert resp.data == {"result": 1} + assert "Cache-Control" in resp + assert resp["Cache-Control"] == "max-age=0, no-cache, no-store, must-revalidate"