diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -33,9 +33,6 @@ [mypy-pygments.*] ignore_missing_imports = True -[mypy-pypandoc.*] -ignore_missing_imports = True - [mypy-pytest.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,6 @@ lxml prometheus_client pygments -pypandoc python-dateutil pyyaml requests diff --git a/swh/web/assets/src/bundles/webapp/webapp.css b/swh/web/assets/src/bundles/webapp/webapp.css --- a/swh/web/assets/src/bundles/webapp/webapp.css +++ b/swh/web/assets/src/bundles/webapp/webapp.css @@ -442,6 +442,21 @@ background-image: linear-gradient(to left, #f0f0f0, #8c8b8b, #f0f0f0); } +/* Ensure that section title with link is colored like standard section title */ +.swh-readme h1 a, +.swh-readme h2 a, +.swh-readme h3 a, +.swh-readme h4 a, +.swh-readme h5 a, +.swh-readme h6 a { + color: #e20026; +} + +/* Make list compact in reStructuredText rendering */ +.swh-rst li p { + margin-bottom: 0; +} + .swh-readme-txt pre { background: none; border: none; diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -5,7 +5,6 @@ import base64 import magic -import pypandoc import stat import textwrap @@ -23,7 +22,7 @@ from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( reverse, format_utc_iso_date, get_swh_persistent_id, - swh_object_icons + swh_object_icons, rst_to_html ) from swh.web.config import get_config @@ -1049,8 +1048,7 @@ else: try: rst_doc = request_content(readme_sha1) - readme_html = pypandoc.convert_text(rst_doc['raw_data'], - 'html', format='rst') + readme_html = rst_to_html(rst_doc['raw_data']) cache.set(cache_entry_id, readme_html) except Exception as exc: sentry_sdk.capture_exception(exc) diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py --- a/swh/web/common/swh_templatetags.py +++ b/swh/web/common/swh_templatetags.py @@ -3,53 +3,30 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from inspect import cleandoc import json import re +from inspect import cleandoc + from django import template from django.core.serializers.json import DjangoJSONEncoder from django.utils.safestring import mark_safe -from docutils.core import publish_parts -from docutils.writers.html4css1 import Writer, HTMLTranslator - import sentry_sdk from swh.web.common.origin_save import get_savable_visit_types +from swh.web.common.utils import rst_to_html register = template.Library() -class NoHeaderHTMLTranslator(HTMLTranslator): - """ - Docutils translator subclass to customize the generation of HTML - from reST-formatted docstrings - """ - def __init__(self, document): - super().__init__(document) - self.body_prefix = [] - self.body_suffix = [] - - def visit_bullet_list(self, node): - self.context.append((self.compact_simple, self.compact_p)) - self.compact_p = None - self.compact_simple = self.is_compactable(node) - self.body.append(self.starttag(node, 'ul', CLASS='docstring')) - - -DOCSTRING_WRITER = Writer() -DOCSTRING_WRITER.translator_class = NoHeaderHTMLTranslator - - @register.filter def safe_docstring_display(docstring): """ Utility function to htmlize reST-formatted documentation in browsable api. """ - docstring = cleandoc(docstring) - return publish_parts(docstring, writer=DOCSTRING_WRITER)['html_body'] + return rst_to_html(cleandoc(docstring)) @register.filter diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -3,8 +3,6 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import docutils.parsers.rst -import docutils.utils import re from datetime import datetime, timezone @@ -13,6 +11,12 @@ from typing import Optional, Dict, Any +import docutils.parsers.rst +import docutils.utils + +from docutils.core import publish_parts +from docutils.writers.html5_polyglot import Writer, HTMLTranslator + from django.urls import reverse as django_reverse from django.http import QueryDict, HttpRequest @@ -349,6 +353,7 @@ Helper class to enforce CSRF validation on a DRF view when a user is not authenticated. """ + def authenticate(self, request): user = getattr(request._request, 'user', None) self.enforce_csrf(request) @@ -424,12 +429,12 @@ not be parsed. """ pids_by_type = { - CONTENT: [], - DIRECTORY: [], - REVISION: [], - RELEASE: [], - SNAPSHOT: [] - } + CONTENT: [], + DIRECTORY: [], + REVISION: [], + RELEASE: [], + SNAPSHOT: [] + } for pid in persistent_ids: obj_id = pid.object_id @@ -437,3 +442,38 @@ pids_by_type[obj_type].append(hash_to_bytes(obj_id)) return pids_by_type + + +class _NoHeaderHTMLTranslator(HTMLTranslator): + """ + Docutils translator subclass to customize the generation of HTML + from reST-formatted docstrings + """ + + def __init__(self, document): + super().__init__(document) + self.body_prefix = [] + self.body_suffix = [] + + +_HTML_WRITER = Writer() +_HTML_WRITER.translator_class = _NoHeaderHTMLTranslator + + +def rst_to_html(rst: str) -> str: + """ + Convert reStructuredText document into HTML. + + Args: + rst: A string containing a reStructuredText document + + Returns: + Body content of the produced HTML conversion. + + """ + settings = { + 'initial_header_level': 2, + } + pp = publish_parts(rst, writer=_HTML_WRITER, + settings_overrides=settings) + return f'
{pp["html_body"]}
' diff --git a/swh/web/templates/includes/readme-display.html b/swh/web/templates/includes/readme-display.html --- a/swh/web/templates/includes/readme-display.html +++ b/swh/web/templates/includes/readme-display.html @@ -13,25 +13,25 @@

{{ readme_name }}

-
+
{% if readme_html %} {% elif readme_name.lower == 'readme' or readme_name.lower == 'readme.txt' %} {% elif readme_name.lower == 'readme.org' %} {% else %} {% endif %} diff --git a/swh/web/tests/common/test_templatetags.py b/swh/web/tests/common/test_templatetags.py --- a/swh/web/tests/common/test_templatetags.py +++ b/swh/web/tests/common/test_templatetags.py @@ -3,8 +3,6 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import pytest - from swh.web.common.swh_templatetags import ( urlize_links_and_mails, urlize_header_links, safe_docstring_display ) @@ -38,9 +36,6 @@ assert urlize_header_links(content) == expected_content -# remove deprecation warnings related to docutils -@pytest.mark.filterwarnings( - 'ignore:.*U.*mode is deprecated:DeprecationWarning') def test_safe_docstring_display(): # update api link with html links content with links docstring = ( @@ -52,13 +47,15 @@ ) expected_docstring = ( + '
' '

This is my list header:

\n' - '
' ) assert safe_docstring_display(docstring) == expected_docstring diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -112,3 +112,44 @@ with pytest.raises(BadInputExc) as e: utils.get_swh_persistent_id(swh_object_type, 'not a valid id') assert e.match('Invalid object') + + +def test_rst_to_html(): + rst = ( + 'Section\n' + '=======\n\n' + '**Some strong text**\n\n' + 'Subsection\n' + '----------\n\n' + '* This is a bulleted list.\n' + '* It has two items, the second\n' + ' item uses two lines.\n' + '\n' + '1. This is a numbered list.\n' + '2. It has two items too.\n' + '\n' + '#. This is a numbered list.\n' + '#. It has two items too.\n' + ) + + expected_html = ( + '

Section

\n' + '

Some strong text

\n' + '
\n' + '

Subsection

\n' + '\n' + '
    \n' + '
  1. This is a numbered list.

  2. \n' + '
  3. It has two items too.

  4. \n' + '
  5. This is a numbered list.

  6. \n' + '
  7. It has two items too.

  8. \n' + '
\n' + '
\n' + '
' + ) + + assert utils.rst_to_html(rst) == expected_html