diff --git a/swh/web/add_forge_now/api_views.py b/swh/web/add_forge_now/api_views.py index 42b00c5b..d5ce9468 100644 --- a/swh/web/add_forge_now/api_views.py +++ b/swh/web/add_forge_now/api_views.py @@ -1,391 +1,398 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Any, Dict, Union from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.db import transaction from django.forms import CharField, ModelForm from django.http import HttpResponseBadRequest from django.http.request import HttpRequest from django.http.response import HttpResponse, HttpResponseForbidden from rest_framework import serializers from rest_framework.request import Request from rest_framework.response import Response from swh.web.add_forge_now.models import Request as AddForgeRequest from swh.web.add_forge_now.models import RequestActorRole as AddForgeNowRequestActorRole from swh.web.add_forge_now.models import RequestHistory as AddForgeNowRequestHistory from swh.web.add_forge_now.models import RequestStatus as AddForgeNowRequestStatus from swh.web.api.apidoc import api_doc, format_docstring -from swh.web.api.apiurls import api_route +from swh.web.api.apiurls import APIUrls, api_route from swh.web.auth.utils import is_add_forge_now_moderator from swh.web.utils import reverse from swh.web.utils.exc import BadInputExc def _block_while_testing(): """Replaced by tests to check concurrency behavior""" pass class AddForgeNowRequestForm(ModelForm): forge_contact_comment = CharField( required=False, ) class Meta: model = AddForgeRequest fields = ( "forge_type", "forge_url", "forge_contact_email", "forge_contact_name", "forge_contact_comment", "submitter_forward_username", ) class AddForgeNowRequestHistoryForm(ModelForm): new_status = CharField( max_length=200, required=False, ) class Meta: model = AddForgeNowRequestHistory fields = ("text", "new_status") class AddForgeNowRequestSerializer(serializers.ModelSerializer): inbound_email_address = serializers.CharField() forge_domain = serializers.CharField() class Meta: model = AddForgeRequest fields = "__all__" class AddForgeNowRequestPublicSerializer(serializers.ModelSerializer): """Serializes AddForgeRequest without private fields.""" class Meta: model = AddForgeRequest fields = ("id", "forge_url", "forge_type", "status", "submission_date") class AddForgeNowRequestHistorySerializer(serializers.ModelSerializer): message_source_url = serializers.SerializerMethodField() class Meta: model = AddForgeNowRequestHistory exclude = ("request", "message_source") def get_message_source_url(self, request_history): if request_history.message_source is None: return None return reverse( "forge-add-message-source", url_args={"id": request_history.pk}, request=self.context["request"], ) class AddForgeNowRequestHistoryPublicSerializer(serializers.ModelSerializer): class Meta: model = AddForgeNowRequestHistory fields = ("id", "date", "new_status", "actor_role") +add_forge_now_api_urls = APIUrls() + + @api_route( r"/add-forge/request/create/", "api-1-add-forge-request-create", methods=["POST"], + api_urls=add_forge_now_api_urls, ) @api_doc("/add-forge/request/create", category="Request archival") @format_docstring() @transaction.atomic def api_add_forge_request_create(request: Union[HttpRequest, Request]) -> HttpResponse: """ .. http:post:: /api/1/add-forge/request/create/ Create a new request to add a forge to the list of those crawled regularly by Software Heritage. .. warning:: That endpoint is not publicly available and requires authentication in order to be able to request it. {common_headers} :[0-9]+)/update/", "api-1-add-forge-request-update", methods=["POST"], + api_urls=add_forge_now_api_urls, ) @api_doc("/add-forge/request/update", category="Request archival", tags=["hidden"]) @format_docstring() @transaction.atomic def api_add_forge_request_update( request: Union[HttpRequest, Request], id: int ) -> HttpResponse: """ .. http:post:: /api/1/add-forge/request/update/ Update a request to add a forge to the list of those crawled regularly by Software Heritage. .. warning:: That endpoint is not publicly available and requires authentication in order to be able to request it. {common_headers} :[0-9]+)/get/", "api-1-add-forge-request-get", methods=["GET"], + api_urls=add_forge_now_api_urls, ) @api_doc("/add-forge/request/get", category="Request archival") @format_docstring() def api_add_forge_request_get(request: Request, id: int): """ .. http:get:: /api/1/add-forge/request/get/ Return all details about an add-forge request. {common_headers} :param int id: add-forge request identifier :statuscode 200: request details successfully returned :statuscode 400: request identifier does not exist """ try: add_forge_request = AddForgeRequest.objects.get(id=id) except ObjectDoesNotExist: raise BadInputExc("Request id does not exist") request_history = AddForgeNowRequestHistory.objects.filter( request=add_forge_request ).order_by("id") if is_add_forge_now_moderator(request.user): data = AddForgeNowRequestSerializer(add_forge_request).data history = AddForgeNowRequestHistorySerializer( request_history, many=True, context={"request": request} ).data else: data = AddForgeNowRequestPublicSerializer(add_forge_request).data history = AddForgeNowRequestHistoryPublicSerializer( request_history, many=True ).data return {"request": data, "history": history} diff --git a/swh/web/add_forge_now/urls.py b/swh/web/add_forge_now/urls.py index e8b1423e..d9642637 100644 --- a/swh/web/add_forge_now/urls.py +++ b/swh/web/add_forge_now/urls.py @@ -1,47 +1,47 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import re_path as url from swh.web.add_forge_now.admin_views import ( add_forge_now_request_dashboard, add_forge_now_requests_moderation_dashboard, ) # register Web API endpoints -import swh.web.add_forge_now.api_views # noqa +from swh.web.add_forge_now.api_views import add_forge_now_api_urls from swh.web.add_forge_now.views import ( add_forge_request_list_datatables, create_request_create, create_request_help, create_request_list, create_request_message_source, ) urlpatterns = [ url( r"^add-forge/request/list/datatables/$", add_forge_request_list_datatables, name="add-forge-request-list-datatables", ), url(r"^add-forge/request/create/$", create_request_create, name="forge-add-create"), url(r"^add-forge/request/list/$", create_request_list, name="forge-add-list"), url( r"^add-forge/request/message-source/(?P\d+)/$", create_request_message_source, name="forge-add-message-source", ), url(r"^add-forge/request/help/$", create_request_help, name="forge-add-help"), url( r"^admin/add-forge/requests/$", add_forge_now_requests_moderation_dashboard, name="add-forge-now-requests-moderation", ), url( r"^admin/add-forge/request/(?P(\d)+)/$", add_forge_now_request_dashboard, name="add-forge-now-request-dashboard", ), -] +] + add_forge_now_api_urls.get_url_patterns() diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 96dd96c0..25b1f5c9 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,487 +1,487 @@ # Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict import functools from functools import wraps import os import re import textwrap from typing import List import docutils.nodes import docutils.parsers.rst import docutils.utils from django.shortcuts import redirect from rest_framework.decorators import api_view from swh.web.api.apiresponse import make_api_response -from swh.web.api.apiurls import APIUrls, CategoryId +from swh.web.api.apiurls import CategoryId, api_urls from swh.web.utils import parse_rst, reverse class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed docutils document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain; and produce the main description back into a ReST string """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ("param", "parameter", "arg", "argument") request_json_object_roles = ("reqjsonobj", "reqjson", "jsonobj", ">json") response_json_array_roles = ("resjsonarr", ">jsonarr") query_parameter_roles = ("queryparameter", "queryparam", "qparam", "query") request_header_roles = ("header", "resheader", "responseheader") status_code_roles = ("statuscode", "status", "code") def __init__(self, document, data): super().__init__(document) self.data = data self.args_set = set() self.params_set = set() self.inputs_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.current_json_obj = None self.current_field_name = None def _default_visit(self, node: docutils.nodes.Element) -> str: """Simply visits a text node, drops its start and end tags, visits the children, and concatenates their results.""" return "".join(map(self.dispatch_visit, node.children)) def visit_emphasis(self, node: docutils.nodes.emphasis) -> str: return f"*{self._default_visit(node)}*" def visit_strong(self, node: docutils.nodes.emphasis) -> str: return f"**{self._default_visit(node)}**" def visit_reference(self, node: docutils.nodes.reference) -> str: text = self._default_visit(node) refuri = node.attributes.get("refuri") if refuri is not None: return f"`{text} <{refuri}>`__" else: return f"`{text}`_" def visit_target(self, node: docutils.nodes.reference) -> str: parts = ["\n"] parts.extend( f".. _{name}: {node.attributes['refuri']}" for name in node.attributes["names"] ) return "\n".join(parts) def visit_literal(self, node: docutils.nodes.literal) -> str: return f"``{self._default_visit(node)}``" def visit_field_name(self, node: docutils.nodes.field_name) -> str: self.current_field_name = node.astext() return "" def visit_field_body(self, node: docutils.nodes.field_body) -> str: text = self._default_visit(node).strip() assert text, str(node) field_data = self.current_field_name.split(" ") # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data["args"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data["params"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.params_set.add(field_data[2]) # Request data type if ( field_data[0] in self.request_json_array_roles or field_data[0] in self.request_json_object_roles ): # array if field_data[0] in self.request_json_array_roles: self.data["input_type"] = "array" # object else: self.data["input_type"] = "object" # input object field if field_data[2] not in self.inputs_set: self.data["inputs"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.inputs_set.add(field_data[2]) self.current_json_obj = self.data["inputs"][-1] # Response type if ( field_data[0] in self.response_json_array_roles or field_data[0] in self.response_json_object_roles ): # array if field_data[0] in self.response_json_array_roles: self.data["return_type"] = "array" # object else: self.data["return_type"] = "object" # returned object field if field_data[2] not in self.returns_set: self.data["returns"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.returns_set.add(field_data[2]) self.current_json_obj = self.data["returns"][-1] # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data["status_codes"].append({"code": field_data[1], "doc": text}) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data["reqheaders"].append({"name": field_data[1], "doc": text}) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {"name": field_data[1], "doc": text} self.data["resheaders"].append(resheader) self.resheaders_set.add(field_data[1]) if ( resheader["name"] == "Content-Type" and resheader["doc"] == "application/octet-stream" ): self.data["return_type"] = "octet stream" # Don't return anything in the description; these nodes only add text # to other fields return "" # We ignore these nodes and handle their subtrees directly in # visit_field_name and visit_field_body visit_field = visit_field_list = _default_visit def visit_paragraph(self, node: docutils.nodes.paragraph) -> str: """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs text = self._default_visit(node) return "\n\n" + text def visit_literal_block(self, node: docutils.nodes.literal_block) -> str: """ Visit literal blocks """ text = node.astext() return f"\n\n::\n\n{textwrap.indent(text, ' ')}\n" def visit_bullet_list(self, node: docutils.nodes.bullet_list) -> str: parts = ["\n\n"] for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.dispatch_visit(child) parts.append("\t* %s\n" % textwrap.indent(line_text, "\t ").strip()) return "".join(parts) # visit_bullet_list collects and handles this with a more global view: visit_list_item = _default_visit def visit_warning(self, node: docutils.nodes.warning) -> str: text = self._default_visit(node) return "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") def visit_Text(self, node: docutils.nodes.Text) -> str: """Leaf node""" return str(node).replace("\n", " ") # Prettier in generated HTML def visit_problematic(self, node: docutils.nodes.problematic) -> str: # api urls cleanup to generate valid links afterwards text = self._default_visit(node) subs_made = 1 while subs_made: (text, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", text) subs_made = 1 while subs_made: (text, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", text) text = re.sub(r"([^:])//", r"\1/", text) # transform references to api endpoints doc into valid rst links text = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", text) # transform references to some elements into bold text text = re.sub(":http:header:`(.*)`", r"**\1**", text) text = re.sub(":func:`(.*)`", r"**\1**", text) text = re.sub(":mod:`(.*)`", r"**\1**", text) # extract example urls if ":swh_web_api:" in text: # Extract examples to their own section examples_str = re.sub(":swh_web_api:`(.+)`.*", r"/api/1/\1", text) self.data["examples"] += examples_str.split("\n") return text def visit_block_quote(self, node: docutils.nodes.block_quote) -> str: return self._default_visit(node) return ( f".. code-block::\n" f"{textwrap.indent(self._default_visit(node), ' ')}\n" ) def visit_title_reference(self, node: docutils.nodes.title_reference) -> str: text = self._default_visit(node) raise Exception( f"Unexpected title reference. " f"Possible cause: you used `{text}` instead of ``{text}``" ) def visit_document(self, node: docutils.nodes.document) -> None: text = self._default_visit(node) # Strip examples; they are displayed separately text = re.split("\n\\*\\*Examples?:\\*\\*\n", text)[0] self.data["description"] = text.strip() def visit_system_message(self, node): return "" def unknown_visit(self, node) -> str: raise NotImplementedError( f"Unknown node type: {node.__class__.__name__}. Value: {node}" ) def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split("\n") doc_lines_filtered = [] urls = defaultdict(list) default_http_methods = ["HEAD", "OPTIONS"] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if ".. http" not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find("/") :] # emphasize url arguments for html rendering url = re.sub(r"\((\w+)\)", r" **\(\1\)** ", url) method = re.search(r"http:(\w+)::", doc_line).group(1) urls[url].append(method.upper()) for url, methods in urls.items(): data["urls"].append({"rule": url, "methods": methods + default_http_methods}) # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst("\n".join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ def api_doc( route: str, *, category: CategoryId, noargs: bool = False, tags: List[str] = [], api_version: str = "1", ): """ Decorator for an API endpoint implementation used to generate a dedicated view displaying its HTML documentation. The documentation will be generated from the endpoint docstring based on sphinxcontrib-httpdomain format. Args: route: documentation page's route noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False tags: Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable * deprecated: display the entry point as deprecated in the index api_version: api version string """ tags_set = set(tags) # @api_doc() Decorator call def decorator(f): # if the route is not hidden, add it to the index if "hidden" not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data["description"] - APIUrls.add_doc_route( + api_urls.add_doc_route( route, category, re.split(r"\.\s", doc_desc)[0], noargs=noargs, api_version=api_version, tags=tags_set, ) # create a dedicated view to display endpoint HTML doc @api_view(["GET", "HEAD"]) @wraps(f) def doc_view(request): doc_data = get_doc_data(f, route, noargs) return make_api_response(request, None, doc_data) route_name = "%s-doc" % route[1:-1].replace("/", "-") urlpattern = f"^api/{api_version}{route}doc/$" view_name = "api-%s-%s" % (api_version, route_name) - APIUrls.add_url_pattern(urlpattern, doc_view, view_name) + api_urls.add_url_pattern(urlpattern, doc_view, view_name) # for backward compatibility as previous apidoc URLs were missing # the /api prefix old_view_name = view_name.replace("api-", "") old_urlpattern = f"^{api_version}{route}doc/$" @api_view(["GET", "HEAD"]) def old_doc_view(request): return redirect(reverse(view_name)) - APIUrls.add_url_pattern(old_urlpattern, old_doc_view, old_view_name) + api_urls.add_url_pattern(old_urlpattern, old_doc_view, old_view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = get_doc_data(f, route, noargs) try: return {"data": f(request, **kwargs), "doc_data": doc_data} except Exception as exc: exc.doc_data = doc_data raise exc return documented_view return decorator @functools.lru_cache(maxsize=32) def get_doc_data(f, route, noargs): """ Build documentation data for the decorated api endpoint function """ data = { "description": "", "response_data": None, "urls": [], "args": [], "params": [], "input_type": "", "inputs": [], "resheaders": [], "reqheaders": [], "return_type": "", "returns": [], "status_codes": [], "examples": [], "route": route, "noargs": noargs, } if not f.__doc__: raise APIDocException( "apidoc: expected a docstring" " for function %s" % (f.__name__,) ) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if ".. http" not in f.__doc__: data["description"] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif "SWH_DOC_BUILD" not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process input/returned object info for nicer html display inputs_list = "" returns_list = "" for inp in data["inputs"]: # special case for array of non object type, for instance # :jsonarr string -: an array of string if ret["name"] != "-": returns_list += "\t* **%s (%s)**: %s\n" % ( ret["name"], ret["type"], textwrap.indent(ret["doc"], "\t "), ) data["inputs_list"] = inputs_list data["returns_list"] = returns_list return data DOC_COMMON_HEADERS = """ :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request""" DOC_RESHEADER_LINK = """ :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it """ DEFAULT_SUBSTITUTIONS = { "common_headers": DOC_COMMON_HEADERS, "resheader_link": DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): f.__doc__ = f.__doc__.format(**{**DEFAULT_SUBSTITUTIONS, **substitutions}) return f return decorator diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 3d389204..ac932ee5 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,133 +1,127 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from typing import Dict, List, Optional from typing_extensions import Literal from django.http.response import HttpResponseBase from rest_framework.decorators import api_view from swh.web.api import throttling from swh.web.api.apiresponse import make_api_response from swh.web.utils.urlsindex import UrlsIndex CategoryId = Literal[ "Archive", "Batch download", "Metadata", "Request archival", "Miscellaneous", "test" ] class APIUrls(UrlsIndex): - """ - Class to manage API documentation URLs. - - - Indexes all routes documented using apidoc's decorators. - - Tracks endpoint/request processing method relationships for use in - generating related urls in API documentation - - """ + """Class to manage API URLs and endpoint documentation URLs.""" - _apidoc_routes: Dict[str, Dict[str, str]] = {} - scope = "api" + apidoc_routes: Dict[str, Dict[str, str]] = {} - @classmethod - def get_app_endpoints(cls) -> Dict[str, Dict[str, str]]: - return cls._apidoc_routes + def get_app_endpoints(self) -> Dict[str, Dict[str, str]]: + return self.apidoc_routes - @classmethod def add_doc_route( - cls, + self, route: str, category: CategoryId, docstring: str, noargs: bool = False, api_version: str = "1", **kwargs, ) -> None: """ Add a route to the self-documenting API reference """ route_name = route[1:-1].replace("/", "-") if not noargs: route_name = "%s-doc" % route_name route_view_name = "api-%s-%s" % (api_version, route_name) - if route not in cls._apidoc_routes: + if route not in self.apidoc_routes: d = { "category": category, "docstring": docstring, "route": "/api/%s%s" % (api_version, route), "route_view_name": route_view_name, } for k, v in kwargs.items(): d[k] = v - cls._apidoc_routes[route] = d + self.apidoc_routes[route] = d + + +api_urls = APIUrls() def api_route( url_pattern: str, view_name: str, methods: List[str] = ["GET", "HEAD", "OPTIONS"], throttle_scope: str = "swh_api", api_version: str = "1", checksum_args: Optional[List[str]] = None, never_cache: bool = False, + api_urls: APIUrls = api_urls, ): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route throttle_scope: Named scope for rate limiting api_version: web API version checksum_args: list of view argument names holding checksum values never_cache: define if api response must be cached """ url_pattern = "^api/" + api_version + url_pattern + "$" def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) def api_view_f(request, **kwargs): # never_cache will be handled in apiresponse module request.never_cache = never_cache response = f(request, **kwargs) doc_data = None # check if response has been forwarded by api_doc decorator if isinstance(response, dict) and "doc_data" in response: doc_data = response["doc_data"] response = response["data"] # check if HTTP response needs to be created if not isinstance(response, HttpResponseBase): api_response = make_api_response( request, data=response, doc_data=doc_data ) else: api_response = response return api_response # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index - APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) + api_urls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: - APIUrls.add_redirect_for_checksum_args( + api_urls.add_redirect_for_checksum_args( view_name, [url_pattern], checksum_args ) return f return decorator diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py index 038e7e1c..41b48aaa 100644 --- a/swh/web/api/urls.py +++ b/swh/web/api/urls.py @@ -1,21 +1,21 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.web.api.apiurls import APIUrls +from swh.web.api.apiurls import api_urls import swh.web.api.views.content # noqa import swh.web.api.views.directory # noqa import swh.web.api.views.graph # noqa import swh.web.api.views.identifiers # noqa import swh.web.api.views.metadata # noqa import swh.web.api.views.origin # noqa import swh.web.api.views.ping # noqa import swh.web.api.views.raw # noqa import swh.web.api.views.release # noqa import swh.web.api.views.revision # noqa import swh.web.api.views.snapshot # noqa import swh.web.api.views.stat # noqa -urlpatterns = APIUrls.get_url_patterns() +urlpatterns = api_urls.get_url_patterns() diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py index f3b4346c..8265a75a 100644 --- a/swh/web/api/views/utils.py +++ b/swh/web/api/views/utils.py @@ -1,100 +1,100 @@ # Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from types import GeneratorType from typing import Any, Callable, Dict, List, Optional, Tuple, Union from django.http import HttpRequest from rest_framework.decorators import api_view from rest_framework.request import Request from rest_framework.response import Response -from swh.web.api.apiurls import APIUrls, api_route +from swh.web.api.apiurls import api_route, api_urls from swh.web.utils.exc import NotFoundExc EnrichFunction = Callable[[Dict[str, str], Optional[HttpRequest]], Dict[str, str]] EnrichFunctionSearchResult = Callable[ [Tuple[List[Dict[str, Any]], Optional[str]], Optional[HttpRequest]], Tuple[List[Dict[str, Any]], Optional[str]], ] def api_lookup( lookup_fn: Callable[..., Any], *args: Any, notfound_msg: Optional[str] = "Object not found", enrich_fn: Optional[Union[EnrichFunction, EnrichFunctionSearchResult]] = None, request: Optional[HttpRequest] = None, **kwargs: Any, ): r""" Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or checksum) passed to the function lookup_fn - if nothing is found, raise an NotFoundExc exception with error message notfound_msg. - Otherwise if something is returned: - either as list, map or generator, map the enrich_fn function to it and return the resulting data structure as list. - either as dict and pass to enrich_fn and return the dict enriched. Args: - lookup_fn: function expects one criteria and optional supplementary \*args. - \*args: supplementary arguments to pass to lookup_fn. - notfound_msg: if nothing matching the criteria is found, raise NotFoundExc with this error message. - enrich_fn: Function to use to enrich the result returned by lookup_fn. Default to the identity function if not provided. - request: Input HTTP request that will be provided as parameter to enrich_fn. Raises: NotFoundExp or whatever `lookup_fn` raises. """ def _enrich_fn_noop(x, request): return x if enrich_fn is None: enrich_fn = _enrich_fn_noop res = lookup_fn(*args, **kwargs) if res is None: raise NotFoundExc(notfound_msg) if isinstance(res, (list, GeneratorType)) or type(res) == map: return [enrich_fn(x, request) for x in res] return enrich_fn(res, request) @api_view(["GET", "HEAD"]) def api_home(request: Request): return Response({}, template_name="api.html") -APIUrls.add_url_pattern(r"^api/$", api_home, view_name="api-1-homepage") +api_urls.add_url_pattern(r"^api/$", api_home, view_name="api-1-homepage") @api_route(r"/", "api-1-endpoints") def api_endpoints(request): """Display the list of opened api endpoints.""" routes_by_category = {} - for route, doc in APIUrls.get_app_endpoints().items(): + for route, doc in api_urls.get_app_endpoints().items(): doc["doc_intro"] = doc["docstring"].split("\n\n")[0] routes_by_category.setdefault(doc["category"], []).append(doc) for routes in routes_by_category.values(): routes.sort(key=lambda route: route["route"]) # sort routes by alphabetical category name, with 'miscellaneous' at the end misc_routes = routes_by_category.pop("Miscellaneous") sorted_routes = sorted(routes_by_category.items()) sorted_routes.append(("Miscellaneous", misc_routes)) env = {"doc_routes": sorted_routes} return Response(env, template_name="api-endpoints.html") diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py index 3f67dfa5..48a98524 100644 --- a/swh/web/browse/browseurls.py +++ b/swh/web/browse/browseurls.py @@ -1,48 +1,42 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import List, Optional from swh.web.utils.urlsindex import UrlsIndex - -class BrowseUrls(UrlsIndex): - """ - Class to manage swh-web browse application urls. - """ - - scope = "browse" +browse_urls = UrlsIndex() def browse_route( *url_patterns: str, view_name: Optional[str] = None, checksum_args: Optional[List[str]] = None, ): """ Decorator to ease the registration of a swh-web browse endpoint Args: url_patterns: list of url patterns used by Django to identify the browse routes view_name: the name of the Django view associated to the routes used to reverse the url """ url_patterns = tuple("^browse/" + url_pattern + "$" for url_pattern in url_patterns) view_name = view_name def decorator(f): # register the route and its view in the browse endpoints index for url_pattern in url_patterns: - BrowseUrls.add_url_pattern(url_pattern, f, view_name) + browse_urls.add_url_pattern(url_pattern, f, view_name) if checksum_args: - BrowseUrls.add_redirect_for_checksum_args( + browse_urls.add_redirect_for_checksum_args( view_name, url_patterns, checksum_args ) return f return decorator diff --git a/swh/web/browse/urls.py b/swh/web/browse/urls.py index 8b389151..8dbd1b4d 100644 --- a/swh/web/browse/urls.py +++ b/swh/web/browse/urls.py @@ -1,76 +1,75 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpRequest, HttpResponse from django.shortcuts import redirect, render from django.urls import re_path as url -from swh.web.browse.browseurls import BrowseUrls +from swh.web.browse.browseurls import browse_urls from swh.web.browse.identifiers import swhid_browse import swh.web.browse.views.content # noqa import swh.web.browse.views.directory # noqa import swh.web.browse.views.iframe # noqa import swh.web.browse.views.origin # noqa import swh.web.browse.views.release # noqa import swh.web.browse.views.revision # noqa import swh.web.browse.views.snapshot # noqa from swh.web.utils import is_swh_web_production, origin_visit_types, reverse def _browse_help_view(request: HttpRequest) -> HttpResponse: return render( request, "browse-help.html", {"heading": "How to browse the archive ?"} ) def _browse_search_view(request: HttpRequest) -> HttpResponse: return render( request, "browse-search.html", { "heading": "Search software origins to browse", "visit_types": origin_visit_types(use_cache=is_swh_web_production(request)), }, ) def _browse_origin_save_view(request: HttpRequest) -> HttpResponse: return redirect(reverse("origin-save")) def _browse_swhid_iframe_legacy(request: HttpRequest, swhid: str) -> HttpResponse: return redirect(reverse("browse-swhid-iframe", url_args={"swhid": swhid})) urlpatterns = [ url(r"^browse/$", _browse_search_view), url(r"^browse/help/$", _browse_help_view, name="browse-help"), url(r"^browse/search/$", _browse_search_view, name="browse-search"), # for backward compatibility url(r"^browse/origin/save/$", _browse_origin_save_view, name="browse-origin-save"), url( r"^browse/(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$", swhid_browse, name="browse-swhid-legacy", ), url( r"^embed/(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$", _browse_swhid_iframe_legacy, name="browse-swhid-iframe-legacy", ), # keep legacy SWHID resolving URL with trailing slash for backward compatibility url( r"^(?P(swh|SWH):[0-9]+:[A-Za-z]+:[0-9A-Fa-f]+.*)/$", swhid_browse, name="browse-swhid-legacy", ), url( r"^(?P(swh|SWH):[0-9]+:[A-Za-z]+:[0-9A-Fa-f]+.*)$", swhid_browse, name="browse-swhid", ), + *browse_urls.get_url_patterns(), ] - -urlpatterns += BrowseUrls.get_url_patterns() diff --git a/swh/web/save_code_now/api_views.py b/swh/web/save_code_now/api_views.py index d42bb786..bb796d86 100644 --- a/swh/web/save_code_now/api_views.py +++ b/swh/web/save_code_now/api_views.py @@ -1,127 +1,131 @@ # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Optional, cast from rest_framework.request import Request from swh.web.api.apidoc import api_doc, format_docstring -from swh.web.api.apiurls import api_route +from swh.web.api.apiurls import APIUrls, api_route from swh.web.auth.utils import ( API_SAVE_ORIGIN_PERMISSION, SWH_AMBASSADOR_PERMISSION, privileged_user, ) from swh.web.save_code_now.origin_save import ( create_save_origin_request, get_savable_visit_types, get_save_origin_requests, ) def _savable_visit_types() -> str: docstring = "" if os.environ.get("DJANGO_SETTINGS_MODULE") != "swh.web.settings.tests": visit_types = sorted(get_savable_visit_types()) docstring = "" for visit_type in visit_types[:-1]: docstring += f"**{visit_type}**, " docstring += f"and **{visit_types[-1]}**" return docstring +save_code_now_api_urls = APIUrls() + + @api_route( r"/origin/save/(?P.+)/url/(?P.+)/", "api-1-save-origin", methods=["GET", "POST"], throttle_scope="swh_save_origin", never_cache=True, + api_urls=save_code_now_api_urls, ) @api_doc("/origin/save/", category="Request archival") @format_docstring(visit_types=_savable_visit_types()) def api_save_origin(request: Request, visit_type: str, origin_url: str): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeeded**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are {visit_types}) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeeded** or **failed** :>json string visit_date: the date (in iso format) of the visit if a visit occurred, null otherwise. :>json string visit_status: the status of the visit, either **full**, **partial**, **not_found** or **failed** if a visit occurred, null otherwise. :>json string note: optional note giving details about the save request, for instance why it has been rejected :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ data = request.data or {} if request.method == "POST": sor = create_save_origin_request( visit_type, origin_url, privileged_user( request, permissions=[SWH_AMBASSADOR_PERMISSION, API_SAVE_ORIGIN_PERMISSION], ), user_id=cast(Optional[int], request.user.id), **data, ) del sor["id"] return sor else: sors = get_save_origin_requests(visit_type, origin_url) for sor in sors: del sor["id"] return sors diff --git a/swh/web/save_code_now/urls.py b/swh/web/save_code_now/urls.py index 5703faf1..51404161 100644 --- a/swh/web/save_code_now/urls.py +++ b/swh/web/save_code_now/urls.py @@ -1,99 +1,100 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import re_path as url from swh.web.save_code_now.admin_views import ( admin_origin_save_add_authorized_url, admin_origin_save_add_unauthorized_url, admin_origin_save_authorized_urls_list, admin_origin_save_filters, admin_origin_save_remove_authorized_url, admin_origin_save_remove_unauthorized_url, admin_origin_save_request_accept, admin_origin_save_request_reject, admin_origin_save_request_remove, admin_origin_save_requests, admin_origin_save_unauthorized_urls_list, ) # register Web API endpoints -import swh.web.save_code_now.api_views # noqa +from swh.web.save_code_now.api_views import save_code_now_api_urls from swh.web.save_code_now.views import ( origin_save_help_view, origin_save_list_view, origin_save_requests_list, save_origin_task_info, ) urlpatterns = [ url(r"^save/$", origin_save_help_view, name="origin-save"), url(r"^save/list/$", origin_save_list_view, name="origin-save-list"), url( r"^save/requests/list/(?P.+)/$", origin_save_requests_list, name="origin-save-requests-list", ), url( r"^save/task/info/(?P.+)/$", save_origin_task_info, name="origin-save-task-info", ), url( r"^admin/origin/save/requests/$", admin_origin_save_requests, name="admin-origin-save-requests", ), url( r"^admin/origin/save/filters/$", admin_origin_save_filters, name="admin-origin-save-filters", ), url( r"^admin/origin/save/authorized_urls/list/$", admin_origin_save_authorized_urls_list, name="admin-origin-save-authorized-urls-list", ), url( r"^admin/origin/save/authorized_urls/add/(?P.+)/$", admin_origin_save_add_authorized_url, name="admin-origin-save-add-authorized-url", ), url( r"^admin/origin/save/authorized_urls/remove/(?P.+)/$", admin_origin_save_remove_authorized_url, name="admin-origin-save-remove-authorized-url", ), url( r"^admin/origin/save/unauthorized_urls/list/$", admin_origin_save_unauthorized_urls_list, name="admin-origin-save-unauthorized-urls-list", ), url( r"^admin/origin/save/unauthorized_urls/add/(?P.+)/$", admin_origin_save_add_unauthorized_url, name="admin-origin-save-add-unauthorized-url", ), url( r"^admin/origin/save/unauthorized_urls/remove/(?P.+)/$", admin_origin_save_remove_unauthorized_url, name="admin-origin-save-remove-unauthorized-url", ), url( r"^admin/origin/save/request/accept/(?P.+)/url/(?P.+)/$", admin_origin_save_request_accept, name="admin-origin-save-request-accept", ), url( r"^admin/origin/save/request/reject/(?P.+)/url/(?P.+)/$", admin_origin_save_request_reject, name="admin-origin-save-request-reject", ), url( r"^admin/origin/save/request/remove/(?P.+)/$", admin_origin_save_request_remove, name="admin-origin-save-request-remove", ), + *save_code_now_api_urls.get_url_patterns(), ] diff --git a/swh/web/save_origin_webhooks/generic_receiver.py b/swh/web/save_origin_webhooks/generic_receiver.py index 474eede7..b5a7bee9 100644 --- a/swh/web/save_origin_webhooks/generic_receiver.py +++ b/swh/web/save_origin_webhooks/generic_receiver.py @@ -1,114 +1,117 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import abc from typing import Any, Dict, Tuple from rest_framework.request import Request from swh.web.api.apidoc import api_doc -from swh.web.api.apiurls import api_route +from swh.web.api.apiurls import APIUrls, api_route from swh.web.save_code_now.origin_save import create_save_origin_request from swh.web.utils.exc import BadInputExc +webhooks_api_urls = APIUrls() + class OriginSaveWebhookReceiver(abc.ABC): FORGE_TYPE: str WEBHOOK_GUIDE_URL: str REPO_TYPES: str @abc.abstractmethod def is_forge_request(self, request: Request) -> bool: ... @abc.abstractmethod def is_push_event(self, request: Request) -> bool: ... @abc.abstractmethod def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: ... def __init__(self): self.__doc__ = f""" .. http:post:: /api/1/origin/save/webhook/{self.FORGE_TYPE.lower()}/ Webhook receiver for {self.FORGE_TYPE} to request or update the archival of a repository when new commits are pushed to it. To add such webhook to one of your {self.REPO_TYPES} repository hosted on {self.FORGE_TYPE}, please follow `{self.FORGE_TYPE}'s webhooks guide <{self.WEBHOOK_GUIDE_URL}>`_. The expected content type for the webhook payload must be ``application/json``. :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :statuscode 200: save request for repository has been successfully created from the webhook payload. :statuscode 400: no save request has been created due to invalid POST request or missing data in webhook payload """ self.__name__ = "api_origin_save_webhook_{self.FORGE_TYPE.lower()}" api_doc( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", category="Request archival", )(self) api_route( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", f"api-1-origin-save-webhook-{self.FORGE_TYPE.lower()}", methods=["POST"], + api_urls=webhooks_api_urls, )(self) def __call__( self, request: Request, ) -> Dict[str, Any]: if not self.is_forge_request(request): raise BadInputExc( f"POST request was not sent by a {self.FORGE_TYPE} webhook and " "has not been processed." ) if not self.is_push_event(request): raise BadInputExc( f"Event sent by {self.FORGE_TYPE} webhook is not a push one, request " "has not been processed." ) content_type = request.headers.get("Content-Type") if content_type != "application/json": raise BadInputExc( f"Invalid content type '{content_type}' for the POST request sent by " f"{self.FORGE_TYPE} webhook, it should be 'application/json'." ) repo_url, visit_type = self.extract_repo_url_and_visit_type(request) if not repo_url: raise BadInputExc( f"Repository URL could not be extracted from {self.FORGE_TYPE} webhook " f"payload." ) if not visit_type: raise BadInputExc( f"Visit type could not be determined for repository {repo_url}." ) save_request = create_save_origin_request( visit_type=visit_type, origin_url=repo_url ) return { "origin_url": save_request["origin_url"], "visit_type": save_request["visit_type"], "save_request_date": save_request["save_request_date"], "save_request_status": save_request["save_request_status"], } diff --git a/swh/web/save_origin_webhooks/urls.py b/swh/web/save_origin_webhooks/urls.py index bc2ace8f..6fe8f379 100644 --- a/swh/web/save_origin_webhooks/urls.py +++ b/swh/web/save_origin_webhooks/urls.py @@ -1,17 +1,18 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import List, Union - -from django.urls import URLPattern, URLResolver # register Web API endpoints import swh.web.save_origin_webhooks.bitbucket # noqa import swh.web.save_origin_webhooks.gitea # noqa import swh.web.save_origin_webhooks.github # noqa import swh.web.save_origin_webhooks.gitlab # noqa import swh.web.save_origin_webhooks.sourceforge # noqa -urlpatterns: List[Union[URLPattern, URLResolver]] = [] +from swh.web.save_origin_webhooks.generic_receiver import ( # isort: skip + webhooks_api_urls, +) + +urlpatterns = webhooks_api_urls.get_url_patterns() diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index 72f04e72..9641bcf1 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,364 +1,363 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ from importlib.util import find_spec import os import sys from typing import Any, Dict from django.utils import encoding from swh.web.config import get_config # Fix django-js-reverse 0.9.1 compatibility with django 4.x # TODO: Remove that hack once a new django-js-reverse release # is available on PyPI if not hasattr(encoding, "force_text"): setattr(encoding, "force_text", encoding.force_str) swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config["secret_key"] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config["debug"] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] -# Application definition +# Applications definition SWH_BASE_DJANGO_APPS = [ - "swh.web.webapp", + "swh.web.api", "swh.web.auth", "swh.web.browse", - "swh.web.utils", "swh.web.tests", - "swh.web.api", + "swh.web.utils", + "swh.web.webapp", ] SWH_EXTRA_DJANGO_APPS = [ app for app in swh_web_config["swh_extra_django_apps"] if app not in SWH_BASE_DJANGO_APPS ] -# swh.web.api must be the last loaded application due to the way -# its URLS are registered -SWH_DJANGO_APPS = SWH_EXTRA_DJANGO_APPS + SWH_BASE_DJANGO_APPS + +SWH_DJANGO_APPS = SWH_BASE_DJANGO_APPS + SWH_EXTRA_DJANGO_APPS INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", "rest_framework", "webpack_loader", "django_js_reverse", "corsheaders", ] + SWH_DJANGO_APPS MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.web.utils.middlewares.ThrottlingHeadersMiddleware", "swh.web.utils.middlewares.ExceptionMiddleware", ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config["serve_assets"]: MIDDLEWARE.insert(0, "django.middleware.gzip.GZipMiddleware") ROOT_URLCONF = "swh.web.urls" SWH_APP_TEMPLATES = [os.path.join(PROJECT_DIR, "../templates")] # Add templates directory from each SWH Django application for app in SWH_DJANGO_APPS: try: app_spec = find_spec(app) assert app_spec is not None, f"Django application {app} not found !" assert app_spec.origin is not None SWH_APP_TEMPLATES.append( os.path.join(os.path.dirname(app_spec.origin), "templates") ) except ModuleNotFoundError: assert False, f"Django application {app} not found !" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": SWH_APP_TEMPLATES, "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", "swh.web.utils.context_processor", ], "libraries": { "swh_templatetags": "swh.web.utils.swh_templatetags", }, }, }, ] DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": swh_web_config.get("development_db", ""), } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, { "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = "/static/" # static folder location when swh-web has been installed with pip STATIC_DIR = os.path.join(sys.prefix, "share/swh/web/static") if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") STATICFILES_DIRS = [STATIC_DIR] INTERNAL_IPS = ["127.0.0.1"] throttle_rates = {} http_requests = ["GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"] throttling = swh_web_config["throttling"] for limiter_scope, limiter_conf in throttling["scopes"].items(): if "default" in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"]["default"] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope + "_" + http_request.lower()] = limiter_conf[ "limiter_rate" ][http_request] REST_FRAMEWORK: Dict[str, Any] = { "DEFAULT_RENDERER_CLASSES": ( "rest_framework.renderers.JSONRenderer", "swh.web.api.renderers.YAMLRenderer", "rest_framework.renderers.TemplateHTMLRenderer", ), "DEFAULT_THROTTLE_CLASSES": ( "swh.web.api.throttling.SwhWebRateThrottle", "swh.web.api.throttling.SwhWebUserRateThrottle", ), "DEFAULT_THROTTLE_RATES": throttle_rates, "DEFAULT_AUTHENTICATION_CLASSES": [ "rest_framework.authentication.SessionAuthentication", "swh.auth.django.backends.OIDCBearerTokenAuthentication", ], "EXCEPTION_HANDLER": "swh.web.api.apiresponse.error_response_handler", } LOGGING = { "version": 1, "disable_existing_loggers": False, "filters": { "require_debug_false": { "()": "django.utils.log.RequireDebugFalse", }, "require_debug_true": { "()": "django.utils.log.RequireDebugTrue", }, }, "formatters": { "request": { "format": "[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "simple": { "format": "[%(asctime)s] [%(levelname)s] %(message)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "verbose": { "format": ( "[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s " "- %(message)s" ), "datefmt": "%d/%b/%Y %H:%M:%S", }, }, "handlers": { "console": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "simple", }, "file": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "simple", }, "file_request": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "request", }, "console_verbose": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "verbose", }, "file_verbose": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "verbose", }, "null": { "class": "logging.NullHandler", }, }, "loggers": { "": { "handlers": ["console_verbose", "file_verbose"], "level": "DEBUG" if DEBUG else "WARNING", }, "django": { "handlers": ["console"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.request": { "handlers": ["file_request"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.db.backends": {"handlers": ["null"], "propagate": False}, "django.utils.autoreload": { "level": "INFO", }, "swh.core.statsd": { "level": "INFO", }, "urllib3": { "level": "INFO", }, }, } WEBPACK_LOADER = { "DEFAULT": { "CACHE": False, "BUNDLE_DIR_NAME": "./", "STATS_FILE": os.path.join(STATIC_DIR, "webpack-stats.json"), "POLL_INTERVAL": 0.1, "TIMEOUT": None, "IGNORE": [".+\\.hot-update.js", ".+\\.map"], } } AUTHENTICATION_BACKENDS = [ "django.contrib.auth.backends.ModelBackend", ] oidc_enabled = bool(get_config()["keycloak"]["server_url"]) if not oidc_enabled: LOGIN_URL = "login" LOGOUT_URL = "logout" else: LOGIN_URL = "oidc-login" LOGOUT_URL = "oidc-logout" AUTHENTICATION_BACKENDS.append( "swh.auth.django.backends.OIDCAuthorizationCodePKCEBackend", ) MIDDLEWARE.insert( MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware") + 1, "swh.auth.django.middlewares.OIDCSessionExpiredMiddleware", ) LOGIN_REDIRECT_URL = "swh-web-homepage" SESSION_ENGINE = "django.contrib.sessions.backends.cache" CACHES = { "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, } JS_REVERSE_JS_MINIFY = False CORS_ORIGIN_ALLOW_ALL = True CORS_URLS_REGEX = r"^/(badge|api)/.*$" OIDC_SWH_WEB_CLIENT_ID = "swh-web" SWH_AUTH_SERVER_URL = swh_web_config["keycloak"]["server_url"] SWH_AUTH_REALM_NAME = swh_web_config["keycloak"]["realm_name"] SWH_AUTH_CLIENT_ID = OIDC_SWH_WEB_CLIENT_ID SWH_AUTH_SESSION_EXPIRED_REDIRECT_VIEW = "logout" DEFAULT_AUTO_FIELD = "django.db.models.AutoField" diff --git a/swh/web/tests/save_origin_webhooks/test_app.py b/swh/web/tests/save_origin_webhooks/test_app.py new file mode 100644 index 00000000..c43185e1 --- /dev/null +++ b/swh/web/tests/save_origin_webhooks/test_app.py @@ -0,0 +1,26 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from django.urls import get_resolver + +from swh.web.save_origin_webhooks.urls import urlpatterns + + +@pytest.mark.django_db +def test_save_origin_webhooks_deactivate(django_settings): + """Check webhooks feature is deactivated when the swh.web.save_origin_webhooks + django application is not in installed apps.""" + + django_settings.SWH_DJANGO_APPS = [ + app + for app in django_settings.SWH_DJANGO_APPS + if app != "swh.web.save_origin_webhooks" + ] + + save_origin_webhooks_view_names = set(urlpattern.name for urlpattern in urlpatterns) + all_view_names = set(get_resolver().reverse_dict.keys()) + assert save_origin_webhooks_view_names & all_view_names == set() diff --git a/swh/web/utils/urlsindex.py b/swh/web/utils/urlsindex.py index 21752418..67b28aab 100644 --- a/swh/web/utils/urlsindex.py +++ b/swh/web/utils/urlsindex.py @@ -1,76 +1,67 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import defaultdict -from typing import Dict, List +from typing import Callable, List +from django.http.response import HttpResponseBase from django.shortcuts import redirect from django.urls import URLPattern from django.urls import re_path as url class UrlsIndex: - """ - Simple helper class for centralizing url patterns of a Django - web application. + """Simple helper class for centralizing URL patterns of a Django web application.""" - Derived classes should override the 'scope' class attribute otherwise - all declared patterns will be grouped under the default one. - """ + def __init__(self): + self.urlpatterns: List[URLPattern] = [] - _urlpatterns: Dict[str, List[URLPattern]] = defaultdict(list) - scope = "default" - - @classmethod - def add_url_pattern(cls, url_pattern, view, view_name=None): + def add_url_pattern( + self, + url_pattern: str, + view: Callable[..., HttpResponseBase], + view_name: str = "", + ) -> None: """ - Class method that adds an url pattern to the current scope. + Adds an URL pattern. Args: - url_pattern: regex describing a Django url + url_pattern: regex describing a Django URL view: function implementing the Django view - view_name: name of the view used to reverse the url + view_name: name of the view used to reverse the URL """ - if cls.scope not in cls._urlpatterns: - cls._urlpatterns[cls.scope] = [] if view_name: - cls._urlpatterns[cls.scope].append(url(url_pattern, view, name=view_name)) + self.urlpatterns.append(url(url_pattern, view, name=view_name)) else: - cls._urlpatterns[cls.scope].append(url(url_pattern, view)) + self.urlpatterns.append(url(url_pattern, view)) - @classmethod - def add_redirect_for_checksum_args(cls, view_name, url_patterns, checksum_args): + def add_redirect_for_checksum_args( + self, view_name: str, url_patterns: List[str], checksum_args: List[str] + ) -> None: """ - Class method that redirects to view with lowercase checksums - when upper/mixed case checksums are passed as url arguments. + Adds redirection to view with lowercase checksums when upper/mixed case + checksums are passed as url arguments. Args: - view_name (str): name of the view to redirect requests - url_patterns (List[str]): regexps describing the view urls - checksum_args (List[str]): url argument names corresponding - to checksum values + view_name: name of the view to redirect requests + url_patterns: regexps describing the view URLs + checksum_args: url argument names corresponding to checksum values """ new_view_name = view_name + "-uppercase-checksum" for url_pattern in url_patterns: url_pattern_upper = url_pattern.replace("[0-9a-f]", "[0-9a-fA-F]") def view_redirect(request, *args, **kwargs): for checksum_arg in checksum_args: checksum_upper = kwargs[checksum_arg] kwargs[checksum_arg] = checksum_upper.lower() return redirect(view_name, *args, **kwargs) - cls.add_url_pattern(url_pattern_upper, view_redirect, new_view_name) + self.add_url_pattern(url_pattern_upper, view_redirect, new_view_name) - @classmethod - def get_url_patterns(cls): + def get_url_patterns(self) -> List[URLPattern]: """ - Class method that returns the list of url pattern associated to - the current scope. - - Returns: - The list of url patterns associated to the current scope + Returns the list of registered URL patterns. """ - return cls._urlpatterns[cls.scope] + return self.urlpatterns diff --git a/swh/web/vault/api_views.py b/swh/web/vault/api_views.py index df07c9b6..5e9b3073 100644 --- a/swh/web/vault/api_views.py +++ b/swh/web/vault/api_views.py @@ -1,518 +1,530 @@ # Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict from django.http import HttpResponse from django.shortcuts import redirect from rest_framework.request import Request from swh.model.hashutil import hash_to_hex from swh.model.swhids import CoreSWHID, ObjectType from swh.web.api.apidoc import api_doc, format_docstring -from swh.web.api.apiurls import api_route +from swh.web.api.apiurls import APIUrls, api_route from swh.web.api.views.utils import api_lookup from swh.web.utils import SWHID_RE, archive, query, reverse from swh.web.utils.exc import BadInputExc ###################################################### # Common # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, bundle_type: str, swhid: CoreSWHID): if request.method == "GET": return api_lookup( archive.vault_progress, bundle_type, swhid, notfound_msg=f"Cooking of {swhid} was never requested.", request=request, ) elif request.method == "POST": email = request.POST.get("email", request.GET.get("email", None)) return api_lookup( archive.vault_cook, bundle_type, swhid, email, notfound_msg=f"{swhid} not found.", request=request, ) def _vault_response( vault_response: Dict[str, Any], add_legacy_items: bool ) -> Dict[str, Any]: d = { "fetch_url": vault_response["fetch_url"], "progress_message": vault_response["progress_msg"], "id": vault_response["task_id"], "status": vault_response["task_status"], "swhid": str(vault_response["swhid"]), } if add_legacy_items: d["obj_type"] = vault_response["swhid"].object_type.name.lower() d["obj_id"] = hash_to_hex(vault_response["swhid"].object_id) return d +vault_api_urls = APIUrls() + ###################################################### # Flat bundles @api_route( f"/vault/flat/(?P{SWHID_RE})/", "api-1-vault-cook-flat", methods=["GET", "POST"], throttle_scope="swh_vault_cooking", never_cache=True, + api_urls=vault_api_urls, ) @api_doc("/vault/flat/", category="Batch download") @format_docstring() def api_vault_cook_flat(request: Request, swhid: str): """ .. http:get:: /api/1/vault/flat/(swhid)/ .. http:post:: /api/1/vault/flat/(swhid)/ Request the cooking of a simple archive, typically for a directory. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/flat/(swhid)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/swh_1_*.tar.gz :param string swhid: the object's SWHID :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/flat/(swhid)/raw/`) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string swhid: the identifier of the object to cook :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ parsed_swhid = CoreSWHID.from_string(swhid) if parsed_swhid.object_type == ObjectType.DIRECTORY: res = _dispatch_cook_progress(request, "flat", parsed_swhid) res["fetch_url"] = reverse( "api-1-vault-fetch-flat", url_args={"swhid": swhid}, request=request, ) return _vault_response(res, add_legacy_items=False) elif parsed_swhid.object_type == ObjectType.CONTENT: raise BadInputExc( "Content objects do not need to be cooked, " "use `/api/1/content/raw/` instead." ) elif parsed_swhid.object_type == ObjectType.REVISION: # TODO: support revisions too? (the vault allows it) raise BadInputExc( "Only directories can be cooked as 'flat' bundles. " "Use `/api/1/vault/gitfast/` to cook revisions, as gitfast bundles." ) else: raise BadInputExc("Only directories can be cooked as 'flat' bundles.") @api_route( r"/vault/directory/(?P[0-9a-f]+)/", "api-1-vault-cook-directory", methods=["GET", "POST"], checksum_args=["dir_id"], throttle_scope="swh_vault_cooking", never_cache=True, + api_urls=vault_api_urls, ) @api_doc("/vault/directory/", category="Batch download", tags=["deprecated"]) @format_docstring() def api_vault_cook_directory(request: Request, dir_id: str): """ .. http:get:: /api/1/vault/directory/(dir_id)/ This endpoint was replaced by :http:get:`/api/1/vault/flat/(swhid)/` """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) swhid = f"swh:1:dir:{obj_id.hex()}" res = _dispatch_cook_progress(request, "flat", CoreSWHID.from_string(swhid)) res["fetch_url"] = reverse( "api-1-vault-fetch-flat", url_args={"swhid": swhid}, request=request, ) return _vault_response(res, add_legacy_items=True) @api_route( f"/vault/flat/(?P{SWHID_RE})/raw/", "api-1-vault-fetch-flat", + api_urls=vault_api_urls, ) @api_doc("/vault/flat/raw/", category="Batch download") def api_vault_fetch_flat(request: Request, swhid: str): """ .. http:get:: /api/1/vault/flat/(swhid)/raw/ Fetch the cooked archive for a flat bundle. See :http:get:`/api/1/vault/flat/(swhid)/` to get more details on 'flat' bundle cooking. :param string swhid: the SWHID of the object to cook :resheader Content-Type: application/gzip :statuscode 200: no error :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ res = api_lookup( archive.vault_fetch, "flat", CoreSWHID.from_string(swhid), notfound_msg=f"Cooked archive for {swhid} not found.", request=request, ) fname = "{}.tar.gz".format(swhid) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format( fname.replace(":", "_") ) return response @api_route( r"/vault/directory/(?P[0-9a-f]+)/raw/", "api-1-vault-fetch-directory", checksum_args=["dir_id"], + api_urls=vault_api_urls, ) @api_doc( "/vault/directory/raw/", category="Batch download", tags=["hidden", "deprecated"] ) def api_vault_fetch_directory(request: Request, dir_id: str): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ This endpoint was replaced by :http:get:`/api/1/vault/flat/(swhid)/raw/` """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ["sha1"], "Only sha1_git is supported." ) rev_flat_raw_url = reverse( "api-1-vault-fetch-flat", url_args={"swhid": f"swh:1:dir:{dir_id}"} ) return redirect(rev_flat_raw_url) ###################################################### # gitfast bundles @api_route( f"/vault/gitfast/(?P{SWHID_RE})/", "api-1-vault-cook-gitfast", methods=["GET", "POST"], throttle_scope="swh_vault_cooking", never_cache=True, + api_urls=vault_api_urls, ) @api_doc("/vault/gitfast/", category="Batch download") @format_docstring() def api_vault_cook_gitfast(request: Request, swhid: str): """ .. http:get:: /api/1/vault/gitfast/(swhid)/ .. http:post:: /api/1/vault/gitfast/(swhid)/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/gitfast/(swhid)/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/swh_1_rev_*.gitfast.gz | git fast-import $ git checkout HEAD :param string swhid: the revision's permanent identifiers :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/gitfast/(swhid)/raw/`) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string swhid: the identifier of the object to cook :statuscode 200: no error :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ parsed_swhid = CoreSWHID.from_string(swhid) if parsed_swhid.object_type == ObjectType.REVISION: res = _dispatch_cook_progress(request, "gitfast", parsed_swhid) res["fetch_url"] = reverse( "api-1-vault-fetch-gitfast", url_args={"swhid": swhid}, request=request, ) return _vault_response(res, add_legacy_items=False) elif parsed_swhid.object_type == ObjectType.CONTENT: raise BadInputExc( "Content objects do not need to be cooked, " "use `/api/1/content/raw/` instead." ) elif parsed_swhid.object_type == ObjectType.DIRECTORY: raise BadInputExc( "Only revisions can be cooked as 'gitfast' bundles. " "Use `/api/1/vault/flat/` to cook directories, as flat bundles." ) else: raise BadInputExc("Only revisions can be cooked as 'gitfast' bundles.") @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/", "api-1-vault-cook-revision_gitfast", methods=["GET", "POST"], checksum_args=["rev_id"], throttle_scope="swh_vault_cooking", never_cache=True, + api_urls=vault_api_urls, ) @api_doc("/vault/revision/gitfast/", category="Batch download", tags=["deprecated"]) @format_docstring() def api_vault_cook_revision_gitfast(request: Request, rev_id: str): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ This endpoint was replaced by :http:get:`/api/1/vault/gitfast/(swhid)/` """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ["sha1"], "Only sha1_git is supported." ) swhid = f"swh:1:rev:{obj_id.hex()}" res = _dispatch_cook_progress(request, "gitfast", CoreSWHID.from_string(swhid)) res["fetch_url"] = reverse( "api-1-vault-fetch-gitfast", url_args={"swhid": swhid}, request=request, ) return _vault_response(res, add_legacy_items=True) @api_route( f"/vault/gitfast/(?P{SWHID_RE})/raw/", "api-1-vault-fetch-gitfast", + api_urls=vault_api_urls, ) @api_doc("/vault/gitfast/raw/", category="Batch download") def api_vault_fetch_revision_gitfast(request: Request, swhid: str): """ .. http:get:: /api/1/vault/gitfast/(swhid)/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/gitfast/(swhid)/` to get more details on gitfast cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/gzip :statuscode 200: no error :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ res = api_lookup( archive.vault_fetch, "gitfast", CoreSWHID.from_string(swhid), notfound_msg="Cooked archive for {} not found.".format(swhid), request=request, ) fname = "{}.gitfast.gz".format(swhid) response = HttpResponse(res, content_type="application/gzip") response["Content-disposition"] = "attachment; filename={}".format( fname.replace(":", "_") ) return response @api_route( r"/vault/revision/(?P[0-9a-f]+)/gitfast/raw/", "api-1-vault-fetch-revision_gitfast", checksum_args=["rev_id"], + api_urls=vault_api_urls, ) @api_doc( "/vault/revision_gitfast/raw/", category="Batch download", tags=["hidden", "deprecated"], ) def _api_vault_revision_gitfast_raw(request: Request, rev_id: str): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ This endpoint was replaced by :http:get:`/api/1/vault/gitfast/(swhid)/raw/` """ rev_gitfast_raw_url = reverse( "api-1-vault-fetch-gitfast", url_args={"swhid": f"swh:1:rev:{rev_id}"} ) return redirect(rev_gitfast_raw_url) ###################################################### # git_bare bundles @api_route( f"/vault/git-bare/(?P{SWHID_RE})/", "api-1-vault-cook-git-bare", methods=["GET", "POST"], throttle_scope="swh_vault_cooking", never_cache=True, + api_urls=vault_api_urls, ) @api_doc("/vault/git-bare/", category="Batch download") @format_docstring() def api_vault_cook_git_bare(request: Request, swhid: str): """ .. http:get:: /api/1/vault/git-bare/(swhid)/ .. http:post:: /api/1/vault/git-bare/(swhid)/ Request the cooking of a git-bare archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting git-bare archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/git-bare/(swhid)/raw/`. Then to import the revision in the current directory, use:: $ tar -xf path/to/swh_1_rev_*.git.tar $ git clone swh:1:rev:*.git new_repository (replace ``swh:1:rev:*`` with the SWHID of the requested revision) This will create a directory called ``new_repository``, which is a git repository containing the requested objects. :param string swhid: the revision's permanent identifier :query string email: e-mail to notify when the git-bare archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/git-bare/(swhid)/raw/`) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string swhid: the identifier of the object to cook :statuscode 200: no error :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ parsed_swhid = CoreSWHID.from_string(swhid) if parsed_swhid.object_type == ObjectType.REVISION: res = _dispatch_cook_progress(request, "git_bare", parsed_swhid) res["fetch_url"] = reverse( "api-1-vault-fetch-git-bare", url_args={"swhid": swhid}, request=request, ) return _vault_response(res, add_legacy_items=False) elif parsed_swhid.object_type == ObjectType.CONTENT: raise BadInputExc( "Content objects do not need to be cooked, " "use `/api/1/content/raw/` instead." ) elif parsed_swhid.object_type == ObjectType.DIRECTORY: raise BadInputExc( "Only revisions can be cooked as 'git-bare' bundles. " "Use `/api/1/vault/flat/` to cook directories, as flat bundles." ) else: raise BadInputExc("Only revisions can be cooked as 'git-bare' bundles.") @api_route( f"/vault/git-bare/(?P{SWHID_RE})/raw/", "api-1-vault-fetch-git-bare", + api_urls=vault_api_urls, ) @api_doc("/vault/git-bare/raw/", category="Batch download") def api_vault_fetch_revision_git_bare(request: Request, swhid: str): """ .. http:get:: /api/1/vault/git-bare/(swhid)/raw/ Fetch the cooked git-bare archive for a revision. See :http:get:`/api/1/vault/git-bare/(swhid)/` to get more details on git-bare cooking. :param string swhid: the revision's permanent identifier :resheader Content-Type: application/x-tar :statuscode 200: no error :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ res = api_lookup( archive.vault_fetch, "git_bare", CoreSWHID.from_string(swhid), notfound_msg="Cooked archive for {} not found.".format(swhid), request=request, ) fname = "{}.git.tar".format(swhid) response = HttpResponse(res, content_type="application/x-tar") response["Content-disposition"] = "attachment; filename={}".format( fname.replace(":", "_") ) return response diff --git a/swh/web/vault/urls.py b/swh/web/vault/urls.py index 749d7af9..b5b3cc72 100644 --- a/swh/web/vault/urls.py +++ b/swh/web/vault/urls.py @@ -1,30 +1,31 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpRequest, HttpResponse from django.shortcuts import redirect, render from django.urls import re_path as url # register Web API endpoints -import swh.web.vault.api_views # noqa +from swh.web.vault.api_views import vault_api_urls def vault_view(request: HttpRequest) -> HttpResponse: return render( request, "vault-ui.html", {"heading": "Download archive content from the Vault"}, ) def browse_vault_view(request: HttpRequest) -> HttpResponse: return redirect("vault") urlpatterns = [ url(r"^vault/$", vault_view, name="vault"), # for backward compatibility url(r"^browse/vault/$", browse_vault_view, name="browse-vault"), + *vault_api_urls.get_url_patterns(), ]