diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py index 90da42af..f365f5a5 100644 --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -1,301 +1,301 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from pygments.lexers import ( get_all_lexers, get_lexer_for_filename ) # set of languages ids that can be highlighted # by highlight.js library _hljs_languages = set([ '1c', 'abnf', 'accesslog', 'actionscript', 'ada', 'apache', 'applescript', 'arduino', 'armasm', 'asciidoc', 'aspectj', 'autohotkey', 'autoit', 'avrasm', 'awk', 'axapta', 'bash', 'basic', 'bnf', 'brainfuck', 'cal', 'capnproto', 'ceylon', 'clean', 'clojure', 'clojure-repl', 'cmake', 'coffeescript', 'coq', 'cos', 'cpp', 'crmsh', 'crystal', 'cs', 'csp', 'css', 'dart', 'delphi', 'diff', 'django', 'd', 'dns', 'dockerfile', 'dos', 'dsconfig', 'dts', 'dust', 'ebnf', 'elixir', 'elm', 'erb', 'erlang', 'erlang-repl', 'excel', 'fix', 'flix', 'fortran', 'fsharp', 'gams', 'gauss', 'gcode', 'gherkin', 'glsl', 'go', 'golo', 'gradle', 'groovy', 'haml', 'handlebars', 'haskell', 'haxe', 'hsp', 'htmlbars', 'http', 'hy', 'inform7', 'ini', 'irpf90', 'java', 'javascript', 'jboss-cli', 'json', 'julia', 'julia-repl', 'kotlin', 'lasso', 'ldif', 'leaf', 'less', 'lisp', 'livecodeserver', 'livescript', 'llvm', 'lsl', 'lua', 'makefile', 'markdown', 'mathematica', 'matlab', 'maxima', 'mel', 'mercury', 'mipsasm', 'mizar', 'mojolicious', 'monkey', 'moonscript', 'n1ql', 'nginx', 'nimrod', 'nix', 'nsis', 'objectivec', 'ocaml', 'openscad', 'oxygene', 'parser3', 'perl', 'pf', 'php', 'pony', 'powershell', 'processing', 'profile', 'prolog', 'protobuf', 'puppet', 'purebasic', 'python', 'q', 'qml', 'rib', 'r', 'roboconf', 'routeros', 'rsl', 'ruby', 'ruleslanguage', 'rust', 'scala', 'scheme', 'scilab', 'scss', 'shell', 'smali', 'smalltalk', 'sml', 'sqf', 'sql', 'stan', 'stata', 'step21', 'stylus', 'subunit', 'swift', 'taggerscript', 'tap', 'tcl', 'tex', 'thrift', 'tp', 'twig', 'typescript', 'vala', 'vbnet', 'vbscript-html', 'vbscript', 'verilog', 'vhdl', 'vim', 'x86asm', 'xl', 'xml', 'xquery', 'yaml', 'zephir', ]) # languages aliases defined in highlight.js _hljs_languages_aliases = { 'ado': 'stata', 'adoc': 'asciidoc', 'ahk': 'autohotkey', 'apacheconf': 'apache', 'arm': 'armasm', 'as': 'actionscript', 'atom': 'xml', 'bat': 'dos', 'bf': 'brainfuck', 'bind': 'dns', 'c': 'cpp', 'c++': 'cpp', 'capnp': 'capnproto', 'cc': 'cpp', 'clean': 'clean', 'clj': 'clojure', 'cls': 'cos', 'cmake.in': 'cmake', 'cmd': 'dos', 'coffee': 'coffeescript', 'console': 'shell', 'cos': 'cos', 'cr': 'crystal', 'craftcms': 'twig', 'crm': 'crmsh', 'csharp': 'cs', 'cson': 'coffeescript', 'dcl': 'clean', 'desktop': 'ini', 'dfm': 'delphi', 'do': 'stata', 'docker': 'dockerfile', 'dpr': 'delphi', 'dst': 'dust', 'el': 'lisp', 'erl': 'erlang', 'f90': 'fortran', 'f95': 'fortran', 'feature': 'gherkin', 'freepascal': 'delphi', 'fs': 'fsharp', 'gemspec': 'ruby', 'gms': 'gams', 'golang': 'go', 'graph': 'roboconf', 'gss': 'gauss', 'gyp': 'python', 'h': 'cpp', 'h++': 'cpp', 'hbs': 'handlebars', 'hpp': 'cpp', 'hs': 'haskell', 'html': 'xml', 'html.handlebars': 'handlebars', 'html.hbs': 'handlebars', 'https': 'http', 'hx': 'haxe', 'hylang': 'hy', 'i7': 'inform7', 'iced': 'coffeescript', 'icl': 'clean', 'instances': 'roboconf', 'ipynb': 'json', 'irb': 'ruby', 'jinja': 'django', 'js': 'javascript', 'jsp': 'java', 'jsx': 'javascript', 'k': 'q', 'kdb': 'q', 'lassoscript': 'lasso', 'lazarus': 'delphi', 'lfm': 'delphi', 'lpr': 'delphi', 'ls': 'livescript', 'm': 'objectivec', 'mak': 'makefile', 'md': 'markdown', 'mikrotik': 'routeros', 'mips': 'mipsasm', 'mk': 'makefile', 'mkd': 'markdown', 'mkdown': 'markdown', 'markdown': 'markdown', 'ml': 'ocaml', 'mm': 'objectivec', 'mma': 'mathematica', 'moo': 'mercury', 'moon': 'moonscript', 'nc': 'gcode', 'nginxconf': 'nginx', 'nim': 'nimrod', 'nixos': 'nix', 'obj-c': 'objectivec', 'objc': 'objectivec', 'osascript': 'applescript', 'p21': 'step21', 'pas': 'delphi', 'pascal': 'delphi', 'patch': 'diff', 'pb': 'purebasic', 'pbi': 'purebasic', 'pcmk': 'crmsh', 'pf.conf': 'pf', 'php3': 'php', 'php4': 'php', 'php5': 'php', 'php6': 'php', 'pl': 'perl', 'plist': 'xml', 'pm': 'perl', 'podspec': 'ruby', 'pp': 'puppet', 'ps': 'powershell', 'py': 'python', 'qrc': 'xml', 'qs': 'javascript', 'qt': 'qml', 'rb': 'ruby', 'routeros': 'routeros', 'rs': 'rust', 'rst': 'nohighlight-swh', 'rss': 'xml', 'ru': 'ruby', 'scad': 'openscad', 'sci': 'scilab', 'scpt': 'applescript', 'sh': 'bash', 'smali': 'smali', 'sqf': 'sqf', 'st': 'smalltalk', 'step': 'step21', 'stp': 'step21', 'styl': 'stylus', 'sv': 'verilog', 'svh': 'verilog', 'tao': 'xl', 'thor': 'ruby', 'tk': 'tcl', 'toml': 'ini', 'ui': 'xml', 'v': 'verilog', 'vb': 'vbnet', 'vbs': 'vbscript', 'wildfly-cli': 'jboss-cli', 'xhtml': 'xml', 'xjb': 'xml', 'xls': 'excel', 'xlsx': 'excel', 'xpath': 'xquery', 'xq': 'xquery', 'xsd': 'xml', 'xsl': 'xml', 'yaml': 'yaml', 'yml': 'yaml', 'zep': 'zephir', 'zone': 'dns', 'zsh': 'bash' } -# dictionnary mapping pygment lexers to hljs languages +# dictionary mapping pygment lexers to hljs languages _pygments_lexer_to_hljs_language = {} -# dictionnary mapping mime types to hljs languages +# dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { 'text/x-c': 'cpp', 'text/x-c++': 'cpp', 'text/x-msdos-batch': 'dos', 'text/x-lisp': 'lisp', 'text/x-shellscript': 'bash', } # function to fill the above dictionnaries def _init_pygments_to_hljs_map(): if len(_pygments_lexer_to_hljs_language) == 0: for lexer in get_all_lexers(): lexer_name = lexer[0] lang_aliases = lexer[1] lang_mime_types = lexer[3] lang = None for lang_alias in lang_aliases: if lang_alias in _hljs_languages: lang = lang_alias _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang: for lang_mime_type in lang_mime_types: _mime_type_to_hljs_language[lang_mime_type] = lang def get_hljs_language_from_filename(filename): """Function that tries to associate a language supported by highlight.js from a filename. Args: filename: input filename Returns: highlight.js language id or None if no correspondance has been found """ _init_pygments_to_hljs_map() if filename: exts = filename.lower().split('.') # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): if ext in _hljs_languages: return ext if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] # otherwise use Pygments language database lexer = None # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) except: pass # if there is a correspondance between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: return _pygments_lexer_to_hljs_language[lexer.name] # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: exts = [ext.replace('*.', '') for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] return None def get_hljs_language_from_mime_type(mime_type): """Function that tries to associate a language supported by highlight.js from a mime type. Args: mime_type: input mime type Returns: highlight.js language id or None if no correspondance has been found """ _init_pygments_to_hljs_map() if mime_type and mime_type in _mime_type_to_hljs_language: return _mime_type_to_hljs_language[mime_type] return None diff --git a/swh/web/common/throttling.py b/swh/web/common/throttling.py index 1141745e..e19a70a5 100644 --- a/swh/web/common/throttling.py +++ b/swh/web/common/throttling.py @@ -1,130 +1,130 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import ipaddress from rest_framework.throttling import ScopedRateThrottle from swh.web.config import get_config class SwhWebRateThrottle(ScopedRateThrottle): """Custom request rate limiter for DRF enabling to exempt specific networks specified in swh-web configuration. Requests are grouped into scopes. It enables to apply different requests rate limiting based on the scope name but also the input HTTP request types. To associate a scope to requests, one must add a 'throttle_scope' attribute when using a class based view, or call the 'throttle_scope' decorator when using a function based view. By default, requests do not have an associated scope and are not rate limited. Rate limiting can also be configured according to the type of the input HTTP requests for fine grained tuning. For instance, the following YAML configuration section sets a rate of: - 1 per minute for POST requests - 60 per minute for other request types - for the 'swh_api' scope while exempting those comming from the + for the 'swh_api' scope while exempting those coming from the 127.0.0.0/8 ip network. .. code-block:: yaml throttling: scopes: swh_api: limiter_rate: default: 60/m POST: 1/m exempted_networks: - 127.0.0.0/8 """ scope = None def __init__(self): super().__init__() self.exempted_networks = None def get_exempted_networks(self, scope_name): if not self.exempted_networks: scopes = get_config()['throttling']['scopes'] scope = scopes.get(scope_name) if scope: networks = scope.get('exempted_networks') if networks: self.exempted_networks = [ipaddress.ip_network(network) for network in networks] return self.exempted_networks def allow_request(self, request, view): # class based view case if not self.scope: default_scope = getattr(view, self.scope_attr, None) # check if there is a specific rate limiting associated # to the request type try: request_scope = default_scope + '_' + request.method.lower() setattr(view, self.scope_attr, request_scope) request_allowed = \ super(SwhWebRateThrottle, self).allow_request(request, view) # noqa setattr(view, self.scope_attr, default_scope) # use default rate limiting otherwise except: setattr(view, self.scope_attr, default_scope) request_allowed = \ super(SwhWebRateThrottle, self).allow_request(request, view) # noqa # function based view case else: default_scope = self.scope # check if there is a specific rate limiting associated # to the request type try: self.scope = default_scope + '_' + request.method.lower() self.rate = self.get_rate() # use default rate limiting otherwise except: self.scope = default_scope self.rate = self.get_rate() self.num_requests, self.duration = self.parse_rate(self.rate) request_allowed = \ super(ScopedRateThrottle, self).allow_request(request, view) self.scope = default_scope exempted_networks = self.get_exempted_networks(default_scope) if exempted_networks: remote_address = ipaddress.ip_address(self.get_ident(request)) return any(remote_address in network for network in exempted_networks) or \ request_allowed return request_allowed def throttle_scope(scope): """Decorator that allows the throttle scope of a DRF function based view to be set:: @api_view(['GET', ]) @throttle_scope('scope') def view(request): ... """ def decorator(func): SwhScopeRateThrottle = type( 'CustomScopeRateThrottle', (SwhWebRateThrottle,), {'scope': scope} ) func.throttle_classes = (SwhScopeRateThrottle, ) return func return decorator diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index 0c240966..32085769 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,182 +1,182 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz from swh.web.common.exc import BadInputExc from django.core import urlresolvers from django.http import QueryDict def reverse(viewname, args=None, kwargs=None, query_params=None, current_app=None, urlconf=None): """An override of django reverse function supporting query parameters. Args: viewname: the name of the django view from which to compute a url args: list of url arguments ordered according to their position it - kwargs: dictionnary of url arguments indexed by their names - query_params: dictionnary of query parameters to append to the + kwargs: dictionary of url arguments indexed by their names + query_params: dictionary of query parameters to append to the reversed url current_app: the name of the django app tighted to the view urlconf: url configuration module Returns: The url of the requested view with processed arguments and query parameters """ url = urlresolvers.reverse( viewname, urlconf=urlconf, args=args, kwargs=kwargs, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v is not None} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/')) return url def fmap(f, data): """Map f to data at each level. This must keep the origin data structure type: - map -> map - dict -> dict - list -> list - None -> None Args: f: function that expects one argument. data: data to traverse to apply the f function. list, map, dict or bare value. Returns: The same data-structure with modified values by the f function. """ if data is None: return data if isinstance(data, map): return map(lambda y: fmap(f, y), (x for x in data)) if isinstance(data, list): return [fmap(f, x) for x in data] if isinstance(data, dict): return {k: fmap(f, v) for (k, v) in data.items()} return f(data) def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: a timezone-aware datetime representing the parsed value. None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) except: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): """Turns a string reprensation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: A formatted string representation of the input iso date """ date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: A list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info