diff --git a/docs/conf.py b/docs/conf.py index f5b49402..675e63f8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,16 +1 @@ -# flake8: noqa - -import os - -import django - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "swh.web.settings.development") - -django.setup() - -import swh.docs.sphinx.conf as sphinx_conf -from swh.web.doc_config import customize_sphinx_conf - -customize_sphinx_conf(sphinx_conf) - -from swh.docs.sphinx.conf import * +from swh.docs.sphinx.conf import * # noqa diff --git a/docs/developers-info.rst b/docs/developers-info.rst index 5909d3a6..876acf87 100644 --- a/docs/developers-info.rst +++ b/docs/developers-info.rst @@ -1,127 +1,126 @@ Developers Information ====================== Sample configuration -------------------- The configuration will be taken from the default configuration file: ``~/.config/swh/web/web.yml``. The following introduces a default configuration file: .. sourcecode:: yaml storage: cls: remote args: url: http://localhost:5002 debug: false throttling: cache_uri: None scopes: swh_api: limiter_rate: default: 120/h exempted_networks: - 127.0.0.0/8 Run server ---------- Either use the django manage script directly (useful in development mode as it offers various commands): .. sourcecode:: shell $ python3 -m swh.web.manage runserver or use the following shortcut: .. sourcecode:: shell $ make run Modules description ------------------- Common to all web applications ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Configuration and settings """""""""""""""""""""""""" * :mod:`swh.web.config`: holds the configuration for the web applications. - * :mod:`swh.web.doc_config`: utility module used to extend the sphinx configuration when building the documentation. * :mod:`swh.web.manage`: Django management module for developers. * :mod:`swh.web.urls`: module that holds the whole URI scheme of all the web applications. * :mod:`swh.web.settings.common`: Common Django settings * :mod:`swh.web.settings.development`: Django settings for development * :mod:`swh.web.settings.production`: Django settings for production * :mod:`swh.web.settings.tests`: Django settings for tests Common utilities """""""""""""""" * :mod:`swh.web.common.converters`: conversion module used to transform raw data to serializable ones. It is used by :mod:`swh.web.common.archive`: to convert data before transmitting then to Django views. * :mod:`swh.web.common.exc`: module defining exceptions used in the web applications. * :mod:`swh.web.common.highlightjs`: utility module to ease the use of the highlightjs_ library in produced Django views. * :mod:`swh.web.common.query`: Utilities to parse data from HTTP endpoints. It is used by :mod:`swh.web.common.archive`. * :mod:`swh.web.common.archive`: Orchestration layer used by views module in charge of communication with :mod:`swh.storage` to retrieve information and perform conversion for the upper layer. * :mod:`swh.web.common.swh_templatetags`: Custom Django template tags library for swh. * :mod:`swh.web.common.urlsindex`: Utilities to help the registering of endpoints for the web applications * :mod:`swh.web.common.utils`: Utility functions used in the web applications implementation swh-web API application ^^^^^^^^^^^^^^^^^^^^^^^ * :mod:`swh.web.api.apidoc`: Utilities to document the web api for its html browsable rendering. * :mod:`swh.web.api.apiresponse`: Utility module to ease the generation of web api responses. * :mod:`swh.web.api.apiurls`: Utilities to facilitate the registration of web api endpoints. * :mod:`swh.web.api.throttling`: Custom request rate limiter to use with the `Django REST Framework `_ * :mod:`swh.web.api.urls`: Module that defines the whole URI scheme for the api endpoints * :mod:`swh.web.api.utils`: Utility functions used in the web api implementation. * :mod:`swh.web.api.views.content`: Implementation of API endpoints for getting information about contents. * :mod:`swh.web.api.views.directory`: Implementation of API endpoints for getting information about directories. * :mod:`swh.web.api.views.origin`: Implementation of API endpoints for getting information about origins. * :mod:`swh.web.api.views.person`: Implementation of API endpoints for getting information about persons. * :mod:`swh.web.api.views.release`: Implementation of API endpoints for getting information about releases. * :mod:`swh.web.api.views.revision`: Implementation of API endpoints for getting information about revisions. * :mod:`swh.web.api.views.snapshot`: Implementation of API endpoints for getting information about snapshots. * :mod:`swh.web.api.views.stat`: Implementation of API endpoints for getting information about archive statistics. * :mod:`swh.web.api.views.utils`: Utilities used in the web api endpoints implementation. swh-web browse application ^^^^^^^^^^^^^^^^^^^^^^^^^^ * :mod:`swh.web.browse.browseurls`: Utilities to facilitate the registration of browse endpoints. * :mod:`swh.web.browse.urls`: Module that defines the whole URI scheme for the browse endpoints. * :mod:`swh.web.browse.utils`: Utilities functions used throughout the browse endpoints implementation. * :mod:`swh.web.browse.views.content`: Implementation of endpoints for browsing contents. * :mod:`swh.web.browse.views.directory`: Implementation of endpoints for browsing directories. * :mod:`swh.web.browse.views.identifiers`: Implementation of endpoints for browsing objects through :ref:`persistent-identifiers`. * :mod:`swh.web.browse.views.origin`: Implementation of endpoints for browsing origins. * :mod:`swh.web.browse.views.person`: Implementation of endpoints for browsing persons. * :mod:`swh.web.browse.views.release`: Implementation of endpoints for browsing releases. * :mod:`swh.web.browse.views.revision`: Implementation of endpoints for browsing revisions. * :mod:`swh.web.browse.views.snapshot`: Implementation of endpoints for browsing snapshots. .. _highlightjs: https://highlightjs.org/ diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 25eeffa2..0d484217 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,440 +1,440 @@ # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict import functools from functools import wraps import os import re import textwrap from typing import List import docutils.nodes import docutils.parsers.rst import docutils.utils from rest_framework.decorators import api_view from swh.web.api.apiresponse import make_api_response from swh.web.api.apiurls import APIUrls from swh.web.common.utils import parse_rst class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ("param", "parameter", "arg", "argument") request_json_object_roles = ("reqjsonobj", "reqjson", "jsonobj", ">json") response_json_array_roles = ("resjsonarr", ">jsonarr") query_parameter_roles = ("queryparameter", "queryparam", "qparam", "query") request_header_roles = ("header", "resheader", "responseheader") status_code_roles = ("statuscode", "status", "code") def __init__(self, document, data): super().__init__(document) self.data = data self.args_set = set() self.params_set = set() self.inputs_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False self.current_json_obj = None def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace("\n", " ") # keep emphasized, strong and literal text par = par.replace("", "*") par = par.replace("", "*") par = par.replace("", "**") par = par.replace("", "**") par = par.replace("", "``") par = par.replace("", "``") # keep links to web pages if "', r"`\1 <\2>`_", par, ) # remove parsed document markups but keep rst links par = re.sub(r"<[^<]+?>(?!`_)", "", par) # api urls cleanup to generate valid links afterwards subs_made = 1 while subs_made: (par, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", par) subs_made = 1 while subs_made: (par, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", par) par = re.sub(r"([^:])//", r"\1/", par) # transform references to api endpoints doc into valid rst links par = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", par) # transform references to some elements into bold text par = re.sub(":http:header:`(.*)`", r"**\1**", par) par = re.sub(":func:`(.*)`", r"**\1**", par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(" ") # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data["args"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data["params"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.params_set.add(field_data[2]) # Request data type if ( field_data[0] in self.request_json_array_roles or field_data[0] in self.request_json_object_roles ): # array if field_data[0] in self.request_json_array_roles: self.data["input_type"] = "array" # object else: self.data["input_type"] = "object" # input object field if field_data[2] not in self.inputs_set: self.data["inputs"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.inputs_set.add(field_data[2]) self.current_json_obj = self.data["inputs"][-1] # Response type if ( field_data[0] in self.response_json_array_roles or field_data[0] in self.response_json_object_roles ): # array if field_data[0] in self.response_json_array_roles: self.data["return_type"] = "array" # object else: self.data["return_type"] = "object" # returned object field if field_data[2] not in self.returns_set: self.data["returns"].append( {"name": field_data[2], "type": field_data[1], "doc": text} ) self.returns_set.add(field_data[2]) self.current_json_obj = self.data["returns"][-1] # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data["status_codes"].append( {"code": field_data[1], "doc": text} ) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data["reqheaders"].append( {"name": field_data[1], "doc": text} ) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {"name": field_data[1], "doc": text} self.data["resheaders"].append(resheader) self.resheaders_set.add(field_data[1]) if ( resheader["name"] == "Content-Type" and resheader["doc"] == "application/octet-stream" ): self.data["return_type"] = "octet stream" def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if not text.startswith("**") and text not in self.data["description"]: self.data["description"] += "\n\n" if self.data["description"] else "" self.data["description"] += text def visit_literal_block(self, node): """ Visit literal blocks """ text = node.astext() # literal block in endpoint description if not self.field_list_visited: self.data["description"] += ":\n\n%s\n" % textwrap.indent(text, "\t") # extract example urls if ":swh_web_api:" in text: examples_str = re.sub(".*`(.+)`.*", r"/api/1/\1", text) self.data["examples"] += examples_str.split("\n") def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: self.data["description"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.data["description"] += "\t* %s\n" % line_text elif self.current_json_obj: self.current_json_obj["doc"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.current_json_obj["doc"] += "\t\t* %s\n" % line_text self.current_json_obj = None def visit_warning(self, node): text = self.process_paragraph(str(node)) rst_warning = "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") if rst_warning not in self.data["description"]: self.data["description"] += rst_warning def unknown_visit(self, node): pass def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split("\n") doc_lines_filtered = [] urls = defaultdict(list) default_http_methods = ["HEAD", "OPTIONS"] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if ".. http" not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find("/") :] # emphasize url arguments for html rendering url = re.sub(r"\((\w+)\)", r" **\(\1\)** ", url) method = re.search(r"http:(\w+)::", doc_line).group(1) urls[url].append(method.upper()) for url, methods in urls.items(): data["urls"].append({"rule": url, "methods": methods + default_http_methods}) # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst("\n".join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ def api_doc( route: str, noargs: bool = False, tags: List[str] = [], api_version: str = "1", ): """ Decorator for an API endpoint implementation used to generate a dedicated view displaying its HTML documentation. The documentation will be generated from the endpoint docstring based on sphinxcontrib-httpdomain format. Args: route: documentation page's route noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False tags: Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable api_version: api version string """ tags_set = set(tags) # @api_doc() Decorator call def decorator(f): # if the route is not hidden, add it to the index if "hidden" not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data["description"] first_dot_pos = doc_desc.find(".") APIUrls.add_doc_route( route, doc_desc[: first_dot_pos + 1], noargs=noargs, api_version=api_version, tags=tags_set, ) # create a dedicated view to display endpoint HTML doc @api_view(["GET", "HEAD"]) @wraps(f) def doc_view(request): doc_data = get_doc_data(f, route, noargs) return make_api_response(request, None, doc_data) route_name = "%s-doc" % route[1:-1].replace("/", "-") urlpattern = f"^{api_version}{route}doc/$" view_name = "api-%s-%s" % (api_version, route_name) APIUrls.add_url_pattern(urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = get_doc_data(f, route, noargs) try: return {"data": f(request, **kwargs), "doc_data": doc_data} except Exception as exc: exc.doc_data = doc_data raise exc return documented_view return decorator @functools.lru_cache(maxsize=32) def get_doc_data(f, route, noargs): """ Build documentation data for the decorated api endpoint function """ data = { "description": "", "response_data": None, "urls": [], "args": [], "params": [], "input_type": "", "inputs": [], "resheaders": [], "reqheaders": [], "return_type": "", "returns": [], "status_codes": [], "examples": [], "route": route, "noargs": noargs, } if not f.__doc__: raise APIDocException( "apidoc: expected a docstring" " for function %s" % (f.__name__,) ) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if ".. http" not in f.__doc__: data["description"] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) - elif "SWH_WEB_DOC_BUILD" not in os.environ: + elif "SWH_DOC_BUILD" not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process input/returned object info for nicer html display inputs_list = "" returns_list = "" for inp in data["inputs"]: # special case for array of non object type, for instance # :jsonarr string -: an array of string if ret["name"] != "-": returns_list += "\t* **%s (%s)**: %s\n" % ( ret["name"], ret["type"], ret["doc"], ) data["inputs_list"] = inputs_list data["returns_list"] = returns_list return data DOC_COMMON_HEADERS = """ :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request""" DOC_RESHEADER_LINK = """ :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it """ DEFAULT_SUBSTITUTIONS = { "common_headers": DOC_COMMON_HEADERS, "resheader_link": DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): f.__doc__ = f.__doc__.format(**{**DEFAULT_SUBSTITUTIONS, **substitutions}) return f return decorator diff --git a/swh/web/doc_config.py b/swh/web/doc_config.py deleted file mode 100644 index e56e006a..00000000 --- a/swh/web/doc_config.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2017-2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU Affero General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import importlib.util -import os - -from sphinx.ext import autodoc -from sphinxcontrib import httpdomain - -# guard to avoid ImportError when running tests through sbuild -# as there is no Debian package built for swh-docs -if importlib.util.find_spec("swh.docs"): - from swh.docs.sphinx.conf import setup as orig_setup - - -class SimpleDocumenter(autodoc.FunctionDocumenter): - """ - Custom autodoc directive to display a docstring unindented - and without function signature header. - """ - - objtype = "simple" - # ensure the priority is lesser than the base FunctionDocumenter - # to avoid side effects with autodoc processing - priority = -1 - - # do not indent the content - content_indent = "" - - # do not add a header to the docstring - def add_directive_header(self, sig): - pass - - -_swh_web_base_url = "https://archive.softwareheritage.org" -_swh_web_api_endpoint = "api" -_swh_web_api_version = 1 -_swh_web_api_url = "%s/%s/%s/" % ( - _swh_web_base_url, - _swh_web_api_endpoint, - _swh_web_api_version, -) - -_swh_web_browse_endpoint = "browse" -_swh_web_browse_url = "%s/%s/" % (_swh_web_base_url, _swh_web_browse_endpoint) - - -def setup(app): - orig_setup(app) - app.add_autodocumenter(SimpleDocumenter) - # set an environment variable indicating we are currently - # building the swh-web documentation - os.environ["SWH_WEB_DOC_BUILD"] = "1" - - -def customize_sphinx_conf(sphinx_conf): - """ - Utility function used to customize the sphinx doc build for swh-web - globally (when building doc from swh-docs) or locally (when building doc - from swh-web). - - Args: - sphinx_conf (module): a reference to the sphinx conf.py module - used to build the doc. - """ - # fix for sphinxcontrib.httpdomain 1.3 - if "Link" not in httpdomain.HEADER_REFS: - httpdomain.HEADER_REFS["Link"] = httpdomain.IETFRef(5988, "5") - sphinx_conf.extlinks["swh_web"] = (_swh_web_base_url + "/%s", None) - sphinx_conf.extlinks["swh_web_api"] = (_swh_web_api_url + "%s", None) - sphinx_conf.extlinks["swh_web_browse"] = (_swh_web_browse_url + "%s", None) - sphinx_conf.setup = setup