diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -24,10 +24,11 @@ class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ - docutils visitor for walking on a parsed rst document containing sphinx + docutils visitor for walking on a parsed docutils document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written - using sphinx httpdomain. + using sphinx httpdomain; and produce the main description back into a ReST + string """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) @@ -62,44 +63,35 @@ self.field_list_visited = False self.current_json_obj = None - def process_paragraph(self, par): - """ - Process extracted paragraph text before display. - Cleanup document model markups and transform the - paragraph into a valid raw rst string (as the apidoc - documentation transform rst to html when rendering). - """ - par = par.replace("\n", " ") - # keep emphasized, strong and literal text - par = par.replace("", "*") - par = par.replace("", "*") - par = par.replace("", "**") - par = par.replace("", "**") - par = par.replace("", "``") - par = par.replace("", "``") - # keep links to web pages - if "', - r"`\1 <\2>`_", - par, - ) - # remove parsed document markups but keep rst links - par = re.sub(r"<[^<]+?>(?!`_)", "", par) - # api urls cleanup to generate valid links afterwards - subs_made = 1 - while subs_made: - (par, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", par) - subs_made = 1 - while subs_made: - (par, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", par) - par = re.sub(r"([^:])//", r"\1/", par) - # transform references to api endpoints doc into valid rst links - par = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", par) - # transform references to some elements into bold text - par = re.sub(":http:header:`(.*)`", r"**\1**", par) - par = re.sub(":func:`(.*)`", r"**\1**", par) - return par + def _default_visit(self, node: docutils.nodes.Element) -> str: + """Simply visits a text node, drops its start and end tags, visits + the children, and concatenates their results.""" + return "".join(map(self.dispatch_visit, node.children)) + + def visit_emphasis(self, node: docutils.nodes.emphasis) -> str: + return f"*{self._default_visit(node)}*" + + def visit_strong(self, node: docutils.nodes.emphasis) -> str: + return f"**{self._default_visit(node)}**" + + def visit_reference(self, node: docutils.nodes.reference) -> str: + text = self._default_visit(node) + refuri = node.attributes.get("refuri") + if refuri is not None: + return f"`{text} <{refuri}>`__" + else: + return f"`{text}`_" + + def visit_target(self, node: docutils.nodes.reference) -> str: + parts = ["\n"] + parts.extend( + f".. _{name}: {node.attributes['refuri']}" + for name in node.attributes["names"] + ) + return "\n".join(parts) + + def visit_literal(self, node: docutils.nodes.literal) -> str: + return f"``{self._default_visit(node)}``" def visit_field_list(self, node): """ @@ -108,12 +100,17 @@ """ self.field_list_visited = True for child in node.traverse(): + # TODO: instead of traversing recursively, we should inspect the children + # directly (they can be and directly, or + # a node containing both) + # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text - elif isinstance(child, docutils.nodes.paragraph): - text = self.process_paragraph(str(child)) + elif isinstance(child, docutils.nodes.field_body): + text = self._default_visit(child).strip() + assert text, str(child) field_data = field_name.split(" ") # Parameters if field_data[0] in self.parameter_roles: @@ -191,57 +188,99 @@ ): self.data["return_type"] = "octet stream" - def visit_paragraph(self, node): + # Don't return anything in the description; these nodes only add text + # to other fields + return "" + + # visit_field_list collects and handles these with a more global view: + visit_field = visit_field_name = visit_field_body = _default_visit + + def visit_paragraph(self, node: docutils.nodes.paragraph) -> str: """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs - if isinstance(node.parent, docutils.nodes.block_quote): - text = self.process_paragraph(str(node)) - # endpoint description - if not text.startswith("**") and text not in self.data["description"]: - self.data["description"] += "\n\n" if self.data["description"] else "" - self.data["description"] += text - - def visit_literal_block(self, node): + text = self._default_visit(node) + + return "\n\n" + text + + def visit_literal_block(self, node: docutils.nodes.literal_block) -> str: """ Visit literal blocks """ text = node.astext() - # literal block in endpoint description - if not self.field_list_visited: - self.data["description"] += ":\n\n%s\n" % textwrap.indent(text, "\t") + + return f"\n\n::\n\n{textwrap.indent(text, ' ')}\n" + + def visit_bullet_list(self, node: docutils.nodes.bullet_list) -> str: + parts = ["\n\n"] + for child in node.traverse(): + # process list item + if isinstance(child, docutils.nodes.paragraph): + line_text = self.dispatch_visit(child) + parts.append("\t* %s\n" % textwrap.indent(line_text, "\t ").strip()) + return "".join(parts) + + # visit_bullet_list collects and handles this with a more global view: + visit_list_item = _default_visit + + def visit_warning(self, node: docutils.nodes.warning) -> str: + text = self._default_visit(node) + return "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") + + def visit_Text(self, node: docutils.nodes.Text) -> str: + """Leaf node""" + return str(node).replace("\n", " ") # Prettier in generated HTML + + def visit_problematic(self, node: docutils.nodes.problematic) -> str: + # api urls cleanup to generate valid links afterwards + text = self._default_visit(node) + subs_made = 1 + while subs_made: + (text, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", text) + subs_made = 1 + while subs_made: + (text, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", text) + text = re.sub(r"([^:])//", r"\1/", text) + # transform references to api endpoints doc into valid rst links + text = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", text) + # transform references to some elements into bold text + text = re.sub(":http:header:`(.*)`", r"**\1**", text) + text = re.sub(":func:`(.*)`", r"**\1**", text) + # extract example urls if ":swh_web_api:" in text: - examples_str = re.sub(".*`(.+)`.*", r"/api/1/\1", text) + # Extract examples to their own section + examples_str = re.sub(":swh_web_api:`(.+)`.*", r"/api/1/\1", text) self.data["examples"] += examples_str.split("\n") + return text - def visit_bullet_list(self, node): - # bullet list in endpoint description - if not self.field_list_visited: - self.data["description"] += "\n\n" - for child in node.traverse(): - # process list item - if isinstance(child, docutils.nodes.paragraph): - line_text = self.process_paragraph(str(child)) - self.data["description"] += "\t* %s\n" % line_text - elif self.current_json_obj: - self.current_json_obj["doc"] += "\n\n" - for child in node.traverse(): - # process list item - if isinstance(child, docutils.nodes.paragraph): - line_text = self.process_paragraph(str(child)) - self.current_json_obj["doc"] += "\t\t* %s\n" % line_text - self.current_json_obj = None - - def visit_warning(self, node): - text = self.process_paragraph(str(node)) - rst_warning = "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") - if rst_warning not in self.data["description"]: - self.data["description"] += rst_warning - - def unknown_visit(self, node): - pass + def visit_block_quote(self, node: docutils.nodes.block_quote) -> str: + return self._default_visit(node) + return ( + f".. code-block::\n" + f"{textwrap.indent(self._default_visit(node), ' ')}\n" + ) + + def visit_title_reference(self, node: docutils.nodes.title_reference) -> str: + text = self._default_visit(node) + raise Exception( + f"Unexpected title reference. " + f"Possible cause: you used `{text}` instead of ``{text}``" + ) + + def visit_document(self, node: docutils.nodes.document) -> None: + text = self._default_visit(node) + + # Strip examples; they are displayed separately + text = re.split("\n\\*\\*Examples?:\\*\\*\n", text)[0] + + self.data["description"] = text.strip() + + def unknown_visit(self, node) -> str: + raise NotImplementedError( + f"Unknown node type: {node.__class__.__name__}. Value: {node}" + ) def unknown_departure(self, node): pass @@ -316,10 +355,9 @@ if "hidden" not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data["description"] - first_dot_pos = doc_desc.find(".") APIUrls.add_doc_route( route, - doc_desc[: first_dot_pos + 1], + re.split(r"\.\s", doc_desc)[0], noargs=noargs, api_version=api_version, tags=tags_set, @@ -399,7 +437,7 @@ inputs_list += "\t* **%s (%s)**: %s\n" % ( inp["name"], inp["type"], - inp["doc"], + textwrap.indent(inp["doc"], "\t "), ) for ret in data["returns"]: # special case for array of non object type, for instance @@ -408,7 +446,7 @@ returns_list += "\t* **%s (%s)**: %s\n" % ( ret["name"], ret["type"], - ret["doc"], + textwrap.indent(ret["doc"], "\t "), ) data["inputs_list"] = inputs_list data["returns_list"] = returns_list diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -32,7 +32,7 @@ :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not - provided, it is assumed that the hashing algorithm used is `sha1`. + provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py --- a/swh/web/api/views/metadata.py +++ b/swh/web/api/views/metadata.py @@ -31,13 +31,11 @@ """ .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target) - Returns raw `extrinsic metadata`_ collected on a given object. - - .. _extrinsic metadata: https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata + Returns raw `extrinsic metadata `__ collected on a given object. :param string target: The SWHID of the object whose metadata should be returned :query string authority: A metadata authority identifier, formatted as - ` `. Required. + `` ``. Required. :query string after: An ISO representation of the minimum timestamp of metadata to fetch. Defaults to allowing all metadata. :query int limit: Maximum number of metadata objects to return. @@ -203,11 +201,9 @@ Returns a list of metadata authorities that provided metadata on the given target. - They can then be used to get the raw `extrinsic metadata`_ collected on + They can then be used to get the raw `extrinsic metadata `__ collected on that object from each of the authorities. - .. _extrinsic metadata: https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata - :param string target: The SWHID of the object whose metadata-providing authorities should be returned diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -63,7 +63,7 @@ .. warning:: - This endpoint used to provide an `origin_from` query parameter, + This endpoint used to provide an ``origin_from`` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. @@ -162,7 +162,7 @@ .. warning:: - This endpoint used to provide an `offset` query parameter, + This endpoint used to provide an ``offset`` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -336,6 +336,8 @@ """ settings = { "initial_header_level": 2, + "halt_level": 4, + "traceback": True, } pp = publish_parts(rst, writer=_HTML_WRITER, settings_overrides=settings) return f'
{pp["html_body"]}
' diff --git a/swh/web/tests/api/test_apidoc.py b/swh/web/tests/api/test_apidoc.py --- a/swh/web/tests/api/test_apidoc.py +++ b/swh/web/tests/api/test_apidoc.py @@ -61,7 +61,7 @@ :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive - **Request:** + **Example:** .. parsed-literal:: @@ -402,7 +402,7 @@ " \n" " <swhid> (object)\n" " \n" - " : an object whose keys are input SWHIDs" + " : an object whose keys are input SWHIDs" " and values objects with the following keys:\n" "

\n" "
\n"