diff --git a/swh/web/utils/highlightjs.py b/swh/web/utils/highlightjs.py index 0c27ffef..6fe3d871 100644 --- a/swh/web/utils/highlightjs.py +++ b/swh/web/utils/highlightjs.py @@ -1,184 +1,183 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools import json from typing import Dict from pygments.lexers import get_all_lexers, get_lexer_for_filename +from pygments.util import ClassNotFound from django.contrib.staticfiles.finders import find -from swh.web.utils.exc import sentry_capture_exception - @functools.lru_cache() def _hljs_languages_data(): with open(str(find("json/highlightjs-languages.json")), "r") as hljs_languages_file: return json.load(hljs_languages_file) # set of languages ids that can be highlighted by highlight.js library @functools.lru_cache() def _hljs_languages(): return set(_hljs_languages_data()["languages"]) # languages aliases defined in highlight.js @functools.lru_cache() def _hljs_languages_aliases(): language_aliases = _hljs_languages_data()["languages_aliases"] language_aliases.pop("robots.txt", None) return { **language_aliases, "ml": "ocaml", "bsl": "1c", "ep": "mojolicious", "lc": "livecode", "p": "parser3", "pde": "processing", "rsc": "routeros", "s": "armasm", "sl": "rsl", "4dm": "4d", "kaos": "chaos", "dfy": "dafny", "ejs": "eta", "nev": "never", "m": "octave", "shader": "hlsl", "fx": "hlsl", "prg": "xsharp", "xs": "xsharp", } # dictionary mapping pygment lexers to hljs languages _pygments_lexer_to_hljs_language: Dict[str, str] = {} # dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { "text/x-c": "c", "text/x-c++": "cpp", "text/x-msdos-batch": "dos", "text/x-lisp": "lisp", "text/x-shellscript": "bash", } # dictionary mapping filenames to hljs languages _filename_to_hljs_language = { "cmakelists.txt": "cmake", ".htaccess": "apache", "httpd.conf": "apache", "access.log": "accesslog", "nginx.log": "accesslog", "resolv.conf": "dns", "dockerfile": "docker", "nginx.conf": "nginx", "pf.conf": "pf", "robots.txt": "robots-txt", } # function to fill the above dictionaries def _init_pygments_to_hljs_map(): if len(_pygments_lexer_to_hljs_language) == 0: hljs_languages = _hljs_languages() hljs_languages_aliases = _hljs_languages_aliases() for lexer in get_all_lexers(): lexer_name = lexer[0] lang_aliases = lexer[1] lang_mime_types = lexer[3] lang = None for lang_alias in lang_aliases: if lang_alias in hljs_languages: lang = lang_alias _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang_alias in hljs_languages_aliases: lang = hljs_languages_aliases[lang_alias] _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang: for lang_mime_type in lang_mime_types: if lang_mime_type not in _mime_type_to_hljs_language: _mime_type_to_hljs_language[lang_mime_type] = lang def get_hljs_language_from_filename(filename): """Function that tries to associate a language supported by highlight.js from a filename. Args: filename: input filename Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if filename: filename_lower = filename.lower() if filename_lower in _filename_to_hljs_language: return _filename_to_hljs_language[filename_lower] if filename_lower in _hljs_languages(): return filename_lower exts = filename_lower.split(".") # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): if ext in _hljs_languages(): return ext if ext in _hljs_languages_aliases(): return _hljs_languages_aliases()[ext] # otherwise use Pygments language database lexer = None # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) - except Exception as exc: - sentry_capture_exception(exc) + except ClassNotFound: + pass # if there is a correspondence between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: return _pygments_lexer_to_hljs_language[lexer.name] # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: exts = [ext.replace("*.", "") for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases(): return _hljs_languages_aliases()[ext] return None def get_hljs_language_from_mime_type(mime_type): """Function that tries to associate a language supported by highlight.js from a mime type. Args: mime_type: input mime type Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if mime_type and mime_type in _mime_type_to_hljs_language: return _mime_type_to_hljs_language[mime_type] return None @functools.lru_cache() def get_supported_languages(): """ Return the list of programming languages that can be highlighted using the highlight.js library. Returns: List[str]: the list of supported languages """ return sorted(list(_hljs_languages()))