diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..bb75e3ad3 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,208 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "program": "${file}", + "cwd": "${workspaceRoot}", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "PySpark", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "osx": { + "pythonPath": "${env:SPARK_HOME}/bin/spark-submit" + }, + "windows": { + "pythonPath": "${env:SPARK_HOME}/bin/spark-submit.cmd" + }, + "linux": { + "pythonPath": "${env:SPARK_HOME}/bin/spark-submit" + }, + "program": "${file}", + "cwd": "${workspaceRoot}", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "Python Module", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "module": "module.name", + "cwd": "${workspaceRoot}", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "Integrated Terminal/Console", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "program": "${file}", + "cwd": "", + "console": "integratedTerminal", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit" + ] + }, + { + "name": "External Terminal/Console", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "program": "${file}", + "cwd": "", + "console": "externalTerminal", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit" + ] + }, + { + "name": "Django", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "program": "${workspaceRoot}/manage.py", + "cwd": "${workspaceRoot}", + "args": [ + "runserver", + "--noreload", + "--nothreading" + ], + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput", + "DjangoDebugging" + ] + }, + { + "name": "Flask", + "type": "python", + "request": "launch", + "stopOnEntry": false, + "pythonPath": "${config:python.pythonPath}", + "program": "fully qualified path fo 'flask' executable. Generally located along with python interpreter", + "cwd": "${workspaceRoot}", + "env": { + "FLASK_APP": "${workspaceRoot}/quickstart/app.py" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload" + ], + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "Flask (old)", + "type": "python", + "request": "launch", + "stopOnEntry": false, + "pythonPath": "${config:python.pythonPath}", + "program": "${workspaceRoot}/run.py", + "cwd": "${workspaceRoot}", + "args": [], + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "Pyramid", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "cwd": "${workspaceRoot}", + "env": {}, + "envFile": "${workspaceRoot}/.env", + "args": [ + "${workspaceRoot}/development.ini" + ], + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput", + "Pyramid" + ] + }, + { + "name": "Watson", + "type": "python", + "request": "launch", + "stopOnEntry": true, + "pythonPath": "${config:python.pythonPath}", + "program": "${workspaceRoot}/console.py", + "cwd": "${workspaceRoot}", + "args": [ + "dev", + "runserver", + "--noreload=True" + ], + "env": {}, + "envFile": "${workspaceRoot}/.env", + "debugOptions": [ + "WaitOnAbnormalExit", + "WaitOnNormalExit", + "RedirectOutput" + ] + }, + { + "name": "Attach (Remote Debug)", + "type": "python", + "request": "attach", + "localRoot": "${workspaceRoot}", + "remoteRoot": "${workspaceRoot}", + "port": 3000, + "secret": "my_secret", + "host": "localhost" + } + ] +} \ No newline at end of file diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 2ba4a3073..8e6df10aa 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,314 +1,314 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re from collections import defaultdict from functools import wraps from enum import Enum from rest_framework.decorators import api_view from swh.web.common.utils import reverse from swh.web.api.apiurls import APIUrls from swh.web.api.apiresponse import make_api_response, error_response class argtypes(Enum): # noqa: N801 """Class for centralizing argument type descriptions """ ts = 'timestamp' int = 'integer' str = 'string' path = 'path' sha1 = 'sha1' uuid = 'uuid' sha1_git = 'sha1_git' algo_and_hash = 'hash_type:hash' class rettypes(Enum): # noqa: N801 """Class for centralizing return type descriptions """ octet_stream = 'octet stream' list = 'list' dict = 'dict' class excs(Enum): # noqa: N801 """Class for centralizing exception type descriptions """ badinput = 'BadInputExc' notfound = 'NotFoundExc' class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ class route(object): # noqa: N801 """Decorate an API method to register it in the API doc route index and create the corresponding Flask route. This decorator is responsible for bootstrapping the linking of subsequent decorators, as well as traversing the decorator stack to obtain the documentation data from it. Args: route: documentation page's route noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False hidden: set to True to remove the endpoint from being listed in the /api endpoints. Default to False. tags: Further information on api endpoints. Two values are possibly expected: - hidden: remove the entry points from the listing - upcoming: display the entry point but it is not followable """ def __init__(self, route, noargs=False, tags=[], handle_response=False, api_version='1'): super().__init__() self.route = route self.urlpattern = '^' + api_version + route + '$' self.noargs = noargs self.tags = set(tags) self.handle_response = handle_response # @apidoc.route() Decorator call def __call__(self, f): # If the route is not hidden, add it to the index if 'hidden' not in self.tags: - APIUrls.index_add_route(self.route, f.__doc__, tags=self.tags) + APIUrls.add_route(self.route, f.__doc__, tags=self.tags) # If the decorated route has arguments, we create a specific # documentation view if not self.noargs: @api_view(['GET', 'HEAD']) def doc_view(request): doc_data = self.get_doc_data(f) return make_api_response(request, None, doc_data) view_name = self.route[1:-1].replace('/', '-') - APIUrls.index_add_url_pattern(self.urlpattern, doc_view, view_name) + APIUrls.add_url_pattern(self.urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = self.get_doc_data(f) try: rv = f(request, **kwargs) except Exception as exc: return error_response(request, exc, doc_data) if self.handle_response: return rv else: return make_api_response(request, rv, doc_data) return documented_view def filter_api_url(self, endpoint, route_re, noargs): doc_methods = {'GET', 'HEAD', 'OPTIONS'} if re.match(route_re, endpoint['rule']): if endpoint['methods'] == doc_methods and not noargs: return False return True def build_examples(self, urls, args): """Build example documentation. Args: f: function urls: information relative to url for that function args: information relative to arguments for that function Yields: example based on default parameter value if any """ s = set() r = [] for data_url in urls: url = data_url['rule'] defaults = {arg['name']: arg['default'] for arg in args if arg['name'] in url} if defaults and None not in defaults.values(): url = reverse(data_url['name'], kwargs=defaults) if url in s: continue s.add(url) r.append(url) return r def get_doc_data(self, f): """Build documentation data for the decorated function""" data = { 'docstring': None, 'response_data': None, 'urls': None, 'args': None, 'params': None, 'headers': None, 'returns': None, 'excs': None, 'examples': None, 'route': self.route, 'noargs': self.noargs } data.update(getattr(f, 'doc_data', {})) if not f.__doc__: raise APIDocException('Apidoc %s: expected a docstring' ' for function %s' % (self.__class__.__name__, f.__name__)) data['docstring'] = f.__doc__ route_re = re.compile('.*%s$' % data['route']) endpoint_list = APIUrls.get_method_endpoints(f) data['urls'] = [url for url in endpoint_list if self.filter_api_url(url, route_re, data['noargs'])] if data['args']: data['examples'] = self.build_examples(data['urls'], data['args']) data['heading'] = '%s Documentation' % data['route'] return data class DocData(object): """Base description of optional input/output setup for a route. """ destination = None def __init__(self): self.doc_data = {} def __call__(self, f): if not hasattr(f, 'doc_data'): f.doc_data = defaultdict(list) f.doc_data[self.destination].append(self.doc_data) return f class arg(DocData): # noqa: N801 """ Decorate an API method to display an argument's information on the doc page specified by @route above. Args: name: the argument's name. MUST match the method argument's name to create the example request URL. default: the argument's default value argtype: the argument's type as an Enum value from apidoc.argtypes argdoc: the argument's documentation string """ destination = 'args' def __init__(self, name, default, argtype, argdoc): super().__init__() self.doc_data = { 'name': name, 'type': argtype.value, 'doc': argdoc, 'default': default } class header(DocData): # noqa: N801 """ Decorate an API method to display header information the api can potentially return in the response. Args: name: the header name doc: the information about that header """ destination = 'headers' def __init__(self, name, doc): super().__init__() self.doc_data = { 'name': name, 'doc': doc, } class param(DocData): # noqa: N801 """Decorate an API method to display query parameter information the api can potentially accept. Args: name: parameter's name default: parameter's default value argtype: parameter's type as an Enum value from apidoc.argtypes doc: the information about that header """ destination = 'params' def __init__(self, name, default, argtype, doc): super().__init__() self.doc_data = { 'name': name, 'type': argtype.value, 'default': default, 'doc': doc, } class raises(DocData): # noqa: N801 """Decorate an API method to display information pertaining to an exception that can be raised by this method. Args: exc: the exception name as an Enum value from apidoc.excs doc: the exception's documentation string """ destination = 'excs' def __init__(self, exc, doc): super().__init__() self.doc_data = { 'exc': exc.value, 'doc': doc } class returns(DocData): # noqa: N801 """Decorate an API method to display information about its return value. Args: rettype: the return value's type as an Enum value from apidoc.rettypes retdoc: the return value's documentation string """ destination = 'returns' def __init__(self, rettype=None, retdoc=None): super().__init__() self.doc_data = { 'type': rettype.value, 'doc': retdoc } diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 14dd87799..c011b64f3 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,132 +1,124 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re -from django.conf.urls import url from rest_framework.decorators import api_view +from swh.web.common.urlsindex import UrlsIndex from swh.web.common.throttling import throttle_scope -class APIUrls(object): +class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ apidoc_routes = {} method_endpoints = {} - urlpatterns = [] + scope = 'api' @classmethod def get_app_endpoints(cls): return cls.apidoc_routes @classmethod def get_method_endpoints(cls, f): if f.__name__ not in cls.method_endpoints: cls.method_endpoints[f.__name__] = cls.group_routes_by_method(f) return cls.method_endpoints[f.__name__] @classmethod def group_routes_by_method(cls, f): """ Group URL endpoints according to their processing method. Returns: A dict where keys are the processing method names, and values are the routes that are bound to the key method. """ rules = [] - for urlp in cls.urlpatterns: + for urlp in cls.get_url_patterns(): endpoint = urlp.callback.__name__ if endpoint != f.__name__: continue method_names = urlp.callback.http_method_names url_rule = urlp.regex.pattern.replace('^', '/').replace('$', '') url_rule_params = re.findall('\([^)]+\)', url_rule) for param in url_rule_params: param_name = re.findall('<(.*)>', param) param_name = param_name[0] if len(param_name) > 0 else None if param_name and hasattr(f, 'doc_data') and f.doc_data['args']: # noqa param_index = \ next(i for (i, d) in enumerate(f.doc_data['args']) if d['name'] == param_name) if param_index is not None: url_rule = url_rule.replace( param, '<' + f.doc_data['args'][param_index]['name'] + ': ' + f.doc_data['args'][param_index]['type'] + '>').replace('.*', '') rule_dict = {'rule': '/api' + url_rule, 'name': urlp.name, 'methods': {method.upper() for method in method_names} } rules.append(rule_dict) return rules @classmethod - def index_add_route(cls, route, docstring, **kwargs): + def add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = route[1:-1].replace('/', '-') if route not in cls.apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls.apidoc_routes[route] = d - @classmethod - def index_add_url_pattern(cls, url_pattern, view, view_name): - cls.urlpatterns.append(url(url_pattern, view, name=view_name)) - - @classmethod - def get_url_patterns(cls): - return cls.urlpatterns - class api_route(object): # noqa: N801 """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ def __init__(self, url_pattern=None, view_name=None, methods=['GET', 'HEAD', 'OPTIONS'], api_version='1'): super().__init__() self.url_pattern = '^' + api_version + url_pattern + '$' self.view_name = view_name self.methods = methods def __call__(self, f): # create a DRF view from the wrapped function @api_view(self.methods) @throttle_scope('swh_api') def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = self.methods # register the route and its view in the endpoints index - APIUrls.index_add_url_pattern(self.url_pattern, api_view_f, - self.view_name) + APIUrls.add_url_pattern(self.url_pattern, api_view_f, + self.view_name) return f diff --git a/swh/web/api/views/__init__.py b/swh/web/api/views/__init__.py index e48e38da0..7a267b9f0 100644 --- a/swh/web/api/views/__init__.py +++ b/swh/web/api/views/__init__.py @@ -1,92 +1,90 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.conf.urls import url - from rest_framework.response import Response from rest_framework.decorators import api_view from types import GeneratorType from swh.web.common.exc import NotFoundExc from swh.web.api.apiurls import APIUrls, api_route # canned doc string snippets that are used in several doc strings _doc_arg_content_id = """A "[hash_type:]hash" content identifier, where hash_type is one of "sha1" (the default), "sha1_git", "sha256", and hash is a checksum obtained with the hash_type hashing algorithm.""" _doc_arg_last_elt = 'element to start listing from, for pagination purposes' _doc_arg_per_page = 'number of elements to list, for pagination purposes' _doc_exc_bad_id = 'syntax error in the given identifier(s)' _doc_exc_id_not_found = 'no object matching the given criteria could be found' _doc_ret_revision_meta = 'metadata of the revision identified by sha1_git' _doc_ret_revision_log = """list of dictionaries representing the metadata of each revision found in the commit log heading to revision sha1_git. For each commit at least the following information are returned: author/committer, authoring/commit timestamps, revision id, commit message, parent (i.e., immediately preceding) commits, "root" directory id.""" _doc_header_link = """indicates that a subsequent result page is available, pointing to it""" def _api_lookup(lookup_fn, *args, notfound_msg='Object not found', enrich_fn=lambda x: x): """Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or checksum) passed to the function lookup_fn - if nothing is found, raise an NotFoundExc exception with error message notfound_msg. - Otherwise if something is returned: - either as list, map or generator, map the enrich_fn function to it and return the resulting data structure as list. - either as dict and pass to enrich_fn and return the dict enriched. Args: - criteria: discriminating criteria to lookup - lookup_fn: function expects one criteria and optional supplementary *args. - notfound_msg: if nothing matching the criteria is found, raise NotFoundExc with this error message. - enrich_fn: Function to use to enrich the result returned by lookup_fn. Default to the identity function if not provided. - *args: supplementary arguments to pass to lookup_fn. Raises: NotFoundExp or whatever `lookup_fn` raises. """ res = lookup_fn(*args) if not res: raise NotFoundExc(notfound_msg) if isinstance(res, (map, list, GeneratorType)): return [enrich_fn(x) for x in res] return enrich_fn(res) @api_view(['GET', 'HEAD']) def api_home(request): return Response({}, template_name='api.html') -APIUrls.urlpatterns.append(url(r'^$', api_home, name='api_homepage')) +APIUrls.add_url_pattern(r'^$', api_home, view_name='api_homepage') @api_route(r'/', 'endpoints') def api_endpoints(request): """Display the list of opened api endpoints. """ routes = APIUrls.get_app_endpoints().copy() for route, doc in routes.items(): doc['doc_intro'] = doc['docstring'].split('\n\n')[0] # Return a list of routes with consistent ordering env = { 'doc_routes': sorted(routes.items()) } return Response(env, template_name="api-endpoints.html") diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py new file mode 100644 index 000000000..8ef21d372 --- /dev/null +++ b/swh/web/browse/browseurls.py @@ -0,0 +1,38 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.web.common.urlsindex import UrlsIndex + + +class BrowseUrls(UrlsIndex): + """ + Class to manage SWH web browse application urls. + """ + + scope = 'browse' + + +class browse_route(object): # noqa: N801 + """ + Decorator to ease the registration of a SWH web browse endpoint + + Args: + url_patterns: list of url patterns used by Django to identify the browse routes + view_name: the name of the Django view associated to the routes used to + reverse the url + """ # noqa + + def __init__(self, *url_patterns, view_name=None): + super().__init__() + self.url_patterns = [] + for url_pattern in url_patterns: + self.url_patterns.append('^' + url_pattern + '$') + self.view_name = view_name + + def __call__(self, f): + # register the route and its view in the browse endpoints index + for url_pattern in self.url_patterns: + BrowseUrls.add_url_pattern(url_pattern, f, self.view_name) + return f diff --git a/swh/web/browse/urls.py b/swh/web/browse/urls.py index 800089eed..5f076e287 100644 --- a/swh/web/browse/urls.py +++ b/swh/web/browse/urls.py @@ -1,82 +1,40 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from django.shortcuts import redirect from swh.web.common.utils import reverse -from swh.web.browse.views import ( - directory, content, origin -) + +import swh.web.browse.views.directory # noqa +import swh.web.browse.views.content # noqa +import swh.web.browse.views.origin # noqa + +from swh.web.browse.browseurls import BrowseUrls def default_browse_view(request): """Default django view used as an entry point for the swh browse ui web application. The url that point to it is /browse/. Currently, it points to the origin view for the linux kernel source tree github mirror. Args: request: input django http request """ linux_origin_id = '2' linux_origin_url = reverse('browse-origin', kwargs={'origin_id': linux_origin_id}) return redirect(linux_origin_url) urlpatterns = [ - url(r'^$', default_browse_view), - url(r'^directory/(?P[0-9a-f]+)/$', - directory.directory_browse, name='browse-directory'), - url(r'^directory/(?P[0-9a-f]+)/(?P.+)/$', - directory.directory_browse, name='browse-directory'), - - url(r'^content/(?P.+)/raw/$', - content.content_raw, name='browse-content-raw'), - url(r'^content/(?P.+)/$', - content.content_display, name='browse-content'), - - url(r'^origin/(?P[0-9]+)/$', origin.origin_browse, - name='browse-origin'), - url(r'^origin/(?P[a-z]+)/url/(?P.+)/$', - origin.origin_browse, name='browse-origin'), - - url(r'^origin/(?P[0-9]+)/directory/$', - origin.origin_directory_browse, - name='browse-origin-directory'), - url(r'^origin/(?P[0-9]+)/directory/(?P.+)/$', - origin.origin_directory_browse, - name='browse-origin-directory'), - url(r'^origin/(?P[0-9]+)/visit/(?P[0-9]+)/directory/$', # noqa - origin.origin_directory_browse, - name='browse-origin-directory'), - url(r'^origin/(?P[0-9]+)/visit/(?P[0-9]+)' - r'/directory/(?P.+)/$', - origin.origin_directory_browse, - name='browse-origin-directory'), - url(r'^origin/(?P[0-9]+)/ts/(?P[0-9]+)/directory/$', # noqa - origin.origin_directory_browse, - name='browse-origin-directory'), - url(r'^origin/(?P[0-9]+)/ts/(?P[0-9]+)' - r'/directory/(?P.+)/$', - origin.origin_directory_browse, - name='browse-origin-directory'), - - url(r'^origin/(?P[0-9]+)/content/(?P.+)/$', - origin.origin_content_display, - name='browse-origin-content'), - url(r'^origin/(?P[0-9]+)/visit/(?P[0-9]+)' - r'/content/(?P.+)/$', - origin.origin_content_display, - name='browse-origin-content'), - url(r'^origin/(?P[0-9]+)/ts/(?P[0-9]+)' - r'/content/(?P.+)/$', - origin.origin_content_display, - name='browse-origin-content'), + url(r'^$', default_browse_view) ] + +urlpatterns += BrowseUrls.get_url_patterns() diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py index 14d4603a9..c941f1afd 100644 --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -1,131 +1,136 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import render from swh.model.hashutil import hash_to_hex from swh.web.common import query from swh.web.common.utils import reverse from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import ( gen_path_info, request_content, prepare_content_for_display ) +from swh.web.browse.browseurls import browse_route +@browse_route(r'content/(?P.+)/raw/', + view_name='browse-content-raw') +def content_raw(request, query_string): + """Django view that produces a raw display of a SWH content identified + by its hash value. + + The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` + + Args: + request: input django http request + query_string: a string of the form "[ALGO_HASH:]HASH" where + optional ALGO_HASH can be either *sha1*, *sha1_git*, *sha256*, + or *blake2s256* (default to *sha1*) and HASH the hexadecimal + representation of the hash value + + Returns: + The raw bytes of the content. + + + """ # noqa + + try: + algo, checksum = query.parse_hash(query_string) + checksum = hash_to_hex(checksum) + content_data, mime_type = request_content(query_string) + except Exception as exc: + return handle_view_exception(exc) + + filename = request.GET.get('filename', None) + if not filename: + filename = '%s_%s' % (algo, checksum) + + if mime_type.startswith('text/'): + response = HttpResponse(content_data, content_type="text/plain") + response['Content-disposition'] = 'filename=%s' % filename + else: + response = HttpResponse(content_data, + content_type='application/octet-stream') + response['Content-disposition'] = 'attachment; filename=%s' % filename + return response + + +@browse_route(r'content/(?P.+)/', + view_name='browse-content') def content_display(request, query_string): """Django view that produces an HTML display of a SWH content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` Args: request: input django http request query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either *sha1*, *sha1_git*, *sha256*, or *blake2s256* (default to *sha1*) and HASH the hexadecimal representation of the hash value Returns: The HTML rendering of the requested content. """ # noqa try: algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) content_data, mime_type = request_content(query_string) except Exception as exc: return handle_view_exception(exc) path = request.GET.get('path', None) content_display_data = prepare_content_for_display(content_data, mime_type, path) root_dir = None filename = None path_info = None breadcrumbs = [] if path: split_path = path.split('/') root_dir = split_path[0] filename = split_path[-1] path = path.replace(root_dir + '/', '') path = path.replace(filename, '') path_info = gen_path_info(path) breadcrumbs.append({'name': root_dir[:7], 'url': reverse('browse-directory', kwargs={'sha1_git': root_dir})}) for pi in path_info: breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-directory', kwargs={'sha1_git': root_dir, 'path': pi['path']})}) breadcrumbs.append({'name': filename, 'url': None}) query_params = None if filename: query_params = {'filename': filename} content_raw_url = reverse('browse-content-raw', kwargs={'query_string': query_string}, query_params=query_params) return render(request, 'content.html', {'content_hash_algo': algo, 'content_checksum': checksum, 'content': content_display_data['content_data'], 'content_raw_url': content_raw_url, 'mime_type': mime_type, 'language': content_display_data['language'], 'breadcrumbs': breadcrumbs, 'branches': None, 'branch': None}) - - -def content_raw(request, query_string): - """Django view that produces a raw display of a SWH content identified - by its hash value. - - The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` - - Args: - request: input django http request - query_string: a string of the form "[ALGO_HASH:]HASH" where - optional ALGO_HASH can be either *sha1*, *sha1_git*, *sha256*, - or *blake2s256* (default to *sha1*) and HASH the hexadecimal - representation of the hash value - - Returns: - The raw bytes of the content. - - - """ # noqa - - try: - algo, checksum = query.parse_hash(query_string) - checksum = hash_to_hex(checksum) - content_data, mime_type = request_content(query_string) - except Exception as exc: - return handle_view_exception(exc) - - filename = request.GET.get('filename', None) - if not filename: - filename = '%s_%s' % (algo, checksum) - - if mime_type.startswith('text/'): - response = HttpResponse(content_data, content_type="text/plain") - response['Content-disposition'] = 'filename=%s' % filename - else: - response = HttpResponse(content_data, - content_type='application/octet-stream') - response['Content-disposition'] = 'attachment; filename=%s' % filename - return response diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py index d58753612..ab722e3ea 100644 --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -1,77 +1,81 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import ( gen_path_info, get_directory_entries ) +from swh.web.browse.browseurls import browse_route +@browse_route(r'directory/(?P[0-9a-f]+)/', + r'directory/(?P[0-9a-f]+)/(?P.+)/', + view_name='browse-directory') def directory_browse(request, sha1_git, path=None): """Django view for browsing the content of a SWH directory identified by its sha1_git value. The url scheme that points to it is the following: * :http:get:`/browse/directory/(sha1_git)/` * :http:get:`/browse/directory/(sha1_git)/(path)/` Args: request: input django http request sha1_git: swh sha1_git identifer of the directory to browse path: optionnal path parameter used to navigate in directories reachable from the provided root one Returns: The HTML rendering for the content of the provided directory. """ root_sha1_git = sha1_git try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) except Exception as exc: return handle_view_exception(exc) path_info = gen_path_info(path) breadcrumbs = [] breadcrumbs.append({'name': root_sha1_git[:7], 'url': reverse('browse-directory', kwargs={'sha1_git': root_sha1_git})}) for pi in path_info: breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-directory', kwargs={'sha1_git': root_sha1_git, 'path': pi['path']})}) path = '' if path is None else (path + '/') for d in dirs: d['url'] = reverse('browse-directory', kwargs={'sha1_git': root_sha1_git, 'path': path + d['name']}) for f in files: query_string = 'sha1_git:' + f['target'] f['url'] = reverse('browse-content', kwargs={'query_string': query_string}, query_params={'path': root_sha1_git + '/' + path + f['name']}) return render(request, 'directory.html', {'dir_sha1_git': sha1_git, 'dirs': dirs, 'files': files, 'breadcrumbs': breadcrumbs, 'branches': None, 'branch': None}) diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index 01184f7c3..69d36f6c3 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,313 +1,328 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import dateutil from django.shortcuts import render from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.utils import ( get_origin_visits, get_origin_visit_branches, gen_path_info, get_directory_entries, request_content, prepare_content_for_display ) +from swh.web.browse.browseurls import browse_route +@browse_route(r'origin/(?P[0-9]+)/', + r'origin/(?P[a-z]+)/url/(?P.+)/', + view_name='browse-origin') def origin_browse(request, origin_id=None, origin_type=None, origin_url=None): """Django view that produces an HTML display of a swh origin identified by its id or its url. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_id)/` * :http:get:`/browse/origin/(origin_id)/directory/` Args: request: input django http request origin_id: a swh origin id origin_type: type of origin (git, svn, ...) origin_url: url of the origin (e.g. https://github.com//) Returns: The HMTL rendering for the metadata of the provided origin. """ try: if origin_id: origin_request_params = { 'id': origin_id, } else: origin_request_params = { 'type': origin_type, 'url': origin_url } origin_info = service.lookup_origin(origin_request_params) origin_id = origin_info['id'] origin_visits = get_origin_visits(origin_id) except Exception as exc: return handle_view_exception(exc) origin_info['last swh visit browse url'] = \ reverse('browse-origin-directory', kwargs={'origin_id': origin_id}) origin_visits_data = [] for visit in origin_visits: visit_date = dateutil.parser.parse(visit['date']) visit['date'] = visit_date.strftime('%d %B %Y, %H:%M UTC') visit['browse_url'] = reverse('browse-origin-directory', kwargs={'origin_id': origin_id, 'visit_id': visit['visit']}) origin_visits_data.append( {'date': visit_date.timestamp()}) return render(request, 'origin.html', {'origin': origin_info, 'origin_visits_data': origin_visits_data, 'visits': origin_visits, 'browse_url_base': '/browse/revision/origin/%s/' % origin_id}) +@browse_route(r'origin/(?P[0-9]+)/directory/', + r'origin/(?P[0-9]+)/directory/(?P.+)/', + r'origin/(?P[0-9]+)/visit/(?P[0-9]+)/directory/', # noqa + r'origin/(?P[0-9]+)/visit/(?P[0-9]+)/directory/(?P.+)/', # noqa + r'origin/(?P[0-9]+)/ts/(?P[0-9]+)/directory/', + r'origin/(?P[0-9]+)/ts/(?P[0-9]+)/directory/(?P.+)/', # noqa + view_name='browse-origin-directory') def origin_directory_browse(request, origin_id, visit_id=None, ts=None, path=None): """Django view for browsing the content of a swh directory associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_id)/directory/` * :http:get:`/browse/origin/(origin_id)/directory/(path)/` * :http:get:`/browse/origin/(origin_id)/visit/(visit_id)/directory/` * :http:get:`/browse/origin/(origin_id)/visit/(visit_id)/directory/(path)/` * :http:get:`/browse/origin/(origin_id)/ts/(ts)/directory/` * :http:get:`/browse/origin/(origin_id)/ts/(ts)/directory/(path)/` Args: request: input django http request origin_id: a swh origin id visit_id: optionnal visit id parameter (the last one will be used by default) ts: optionnal visit timestamp parameter (the last one will be used by default) path: optionnal path parameter used to navigate in directories reachable from the origin root one Returns: The HTML rendering for the content of the directory associated to the provided origin and visit. """ # noqa try: if not visit_id and not ts: origin_visits = get_origin_visits(origin_id) return origin_directory_browse(request, origin_id, origin_visits[-1]['visit'], path=path) if not visit_id and ts: branches = get_origin_visit_branches(origin_id, visit_ts=ts) url_args = {'origin_id': origin_id, 'ts': ts} else: branches = get_origin_visit_branches(origin_id, visit_id) url_args = {'origin_id': origin_id, 'visit_id': visit_id} branch = request.GET.get('branch', 'master') filtered_branches = [b for b in branches if branch in b['name']] if len(filtered_branches) > 0: root_sha1_git = filtered_branches[0]['directory'] branch = filtered_branches[0]['name'] else: if visit_id: raise NotFoundExc('Branch %s associated to visit with' ' id %s for origin with id %s' ' not found!' % (branch, visit_id, origin_id)) else: raise NotFoundExc('Branch %s associated to visit with' ' timestamp %s for origin with id %s' ' not found!' % (branch, ts, origin_id)) sha1_git = root_sha1_git if path: dir_info = service.lookup_directory_with_path(root_sha1_git, path) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) except Exception as exc: return handle_view_exception(exc) for b in branches: branch_url_args = dict(url_args) if path: b['path'] = path branch_url_args['path'] = path b['url'] = reverse('browse-origin-directory', kwargs=branch_url_args, query_params={'branch': b['name']}) path_info = gen_path_info(path) breadcrumbs = [] breadcrumbs.append({'name': root_sha1_git[:7], 'url': reverse('browse-origin-directory', kwargs=url_args, query_params={'branch': branch})}) for pi in path_info: bc_url_args = dict(url_args) bc_url_args['path'] = pi['path'] breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-origin-directory', kwargs=bc_url_args, query_params={'branch': branch})}) path = '' if path is None else (path + '/') for d in dirs: bc_url_args = dict(url_args) bc_url_args['path'] = path + d['name'] d['url'] = reverse('browse-origin-directory', kwargs=bc_url_args, query_params={'branch': branch}) for f in files: bc_url_args = dict(url_args) bc_url_args['path'] = path + f['name'] f['url'] = reverse('browse-origin-content', kwargs=bc_url_args, query_params={'branch': branch}) return render(request, 'directory.html', {'dir_sha1_git': sha1_git, 'dirs': dirs, 'files': files, 'breadcrumbs': breadcrumbs, 'branches': branches, 'branch': branch}) +@browse_route(r'origin/(?P[0-9]+)/content/(?P.+)/', + r'origin/(?P[0-9]+)/visit/(?P[0-9]+)/content/(?P.+)/', # noqa + r'origin/(?P[0-9]+)/ts/(?P[0-9]+)/content/(?P.+)/', # noqa + view_name='browse-origin-content') def origin_content_display(request, origin_id, path, visit_id=None, ts=None): """Django view that produces an HTML display of a swh content associated to an origin for a given visit. The url scheme that points to it is the following: * :http:get:`/browse/origin/(origin_id)/content/(path)/` * :http:get:`/browse/origin/(origin_id)/visit/(visit_id)/content/(path)/` * :http:get:`/browse/origin/(origin_id)/ts/(ts)/content/(path)/` Args: request: input django http request origin_id: id of a swh origin path: path of the content relative to the origin root directory visit_id: optionnal visit id parameter (the last one will be used by default) ts: optionnal visit timestamp parameter (the last one will be used by default) branch: optionnal query parameter that specifies the origin branch from which to retrieve the content Returns: The HTML rendering of the requested content associated to the provided origin and visit. """ # noqa try: if not visit_id and not ts: origin_visits = get_origin_visits(origin_id) return origin_content_display(request, origin_id, path, origin_visits[-1]['visit']) if not visit_id and ts: branches = get_origin_visit_branches(origin_id, visit_ts=ts) kwargs = {'origin_id': origin_id, 'ts': ts} else: branches = get_origin_visit_branches(origin_id, visit_id) kwargs = {'origin_id': origin_id, 'visit_id': visit_id} for b in branches: bc_kwargs = dict(kwargs) bc_kwargs['path'] = path b['url'] = reverse('browse-origin-content', kwargs=bc_kwargs, query_params={'branch': b['name']}) branch = request.GET.get('branch', 'master') filtered_branches = [b for b in branches if branch in b['name']] if len(filtered_branches) > 0: root_sha1_git = filtered_branches[0]['directory'] branch = filtered_branches[0]['name'] else: if visit_id: raise NotFoundExc('Branch %s associated to visit with' ' id %s for origin with id %s' ' not found!' % (branch, visit_id, origin_id)) else: raise NotFoundExc('Branch %s associated to visit with' ' timestamp %s for origin with id %s' ' not found!' % (branch, ts, origin_id)) content_info = service.lookup_directory_with_path(root_sha1_git, path) sha1_git = content_info['target'] query_string = 'sha1_git:' + sha1_git content_data, mime_type = request_content(query_string) except Exception as exc: return handle_view_exception(exc) content_display_data = prepare_content_for_display(content_data, mime_type, path) filename = None path_info = None breadcrumbs = [] split_path = path.split('/') filename = split_path[-1] path = path.replace(filename, '') path_info = gen_path_info(path) breadcrumbs.append({'name': root_sha1_git[:7], 'url': reverse('browse-origin-directory', kwargs=kwargs, query_params={'branch': branch})}) for pi in path_info: bc_kwargs = dict(kwargs) bc_kwargs['path'] = pi['path'] breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-origin-directory', kwargs=bc_kwargs, query_params={'branch': branch})}) breadcrumbs.append({'name': filename, 'url': None}) content_raw_url = reverse('browse-content-raw', kwargs={'query_string': query_string}, query_params={'filename': filename}) return render(request, 'content.html', {'content_hash_algo': 'sha1_git', 'content_checksum': sha1_git, 'content': content_display_data['content_data'], 'content_raw_url': content_raw_url, 'mime_type': mime_type, 'language': content_display_data['language'], 'breadcrumbs': breadcrumbs, 'branches': branches, 'branch': branch}) diff --git a/swh/web/common/urlsindex.py b/swh/web/common/urlsindex.py new file mode 100644 index 000000000..43686ee7f --- /dev/null +++ b/swh/web/common/urlsindex.py @@ -0,0 +1,45 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.conf.urls import url + + +class UrlsIndex(object): + """ + Simple helper class for centralizing url patterns of a Django + web application. + + Derived classes should override the 'scope' class attribute otherwise + all declared patterns will be grouped under the default one. + """ + + urlpatterns = {} + scope = 'default' + + @classmethod + def add_url_pattern(cls, url_pattern, view, view_name): + """ + Class method that adds an url pattern to the current scope. + + Args: + url_pattern: regex describing a Django url + view: function implementing the Django view + view_name: name of the view used to reverse the url + """ + if cls.scope not in cls.urlpatterns: + cls.urlpatterns[cls.scope] = [] + cls.urlpatterns[cls.scope].append(url(url_pattern, view, + name=view_name)) + + @classmethod + def get_url_patterns(cls): + """ + Class method that returns the list of url pattern associated to + the current scope. + + Returns: + The list of url patterns associated to the current scope + """ + return cls.urlpatterns[cls.scope] diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py index c2aff0922..7566e8d45 100644 --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -1,207 +1,207 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from unittest.mock import patch from nose.tools import istest from django.test import TestCase from django.utils.html import escape from swh.web.common.utils import reverse from swh.web.browse.utils import ( gen_path_info ) from .data.content_test_data import ( stub_content_text_data, stub_content_text_sha1, stub_content_text_path_with_root_dir, stub_content_bin_data, stub_content_bin_sha1, stub_content_bin_filename, stub_content_text_no_highlight_sha1, stub_content_text_no_highlight_data ) class SwhBrowseContentTest(TestCase): @patch('swh.web.browse.views.content.request_content') @istest def content_view_text(self, mock_request_content): mock_request_content.return_value =\ stub_content_text_data, 'text/x-c++' url = reverse('browse-content', kwargs={'query_string': stub_content_text_sha1}) url_raw = reverse('browse-content-raw', kwargs={'query_string': stub_content_text_sha1}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertContains(resp, '') self.assertContains(resp, escape(stub_content_text_data)) self.assertContains(resp, url_raw) @patch('swh.web.browse.views.content.request_content') @istest def content_view_text_no_highlight(self, mock_request_content): mock_request_content.return_value =\ stub_content_text_no_highlight_data, 'text/plain' url = reverse('browse-content', kwargs={'query_string': stub_content_text_no_highlight_sha1}) # noqa url_raw = reverse('browse-content-raw', kwargs={'query_string': stub_content_text_no_highlight_sha1}) # noqa resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertContains(resp, '') self.assertContains(resp, escape(stub_content_text_no_highlight_data)) # noqa self.assertContains(resp, url_raw) @patch('swh.web.browse.views.content.request_content') @istest def content_view_image(self, mock_request_content): mime_type = 'image/png' mock_request_content.return_value =\ stub_content_bin_data, mime_type url = reverse('browse-content', kwargs={'query_string': stub_content_bin_sha1}) url_raw = reverse('browse-content-raw', kwargs={'query_string': stub_content_bin_sha1}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertTemplateUsed('content.html') pngEncoded = base64.b64encode(stub_content_bin_data) \ .decode('utf-8') self.assertContains(resp, '' % (mime_type, pngEncoded)) self.assertContains(resp, url_raw) @patch('swh.web.browse.views.content.request_content') @istest def content_view_with_path(self, mock_request_content): mock_request_content.return_value =\ stub_content_text_data, 'text/x-c++' url = reverse('browse-content', kwargs={'query_string': stub_content_text_sha1}, query_params={'path': stub_content_text_path_with_root_dir}) # noqa resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertContains(resp, '') self.assertContains(resp, escape(stub_content_text_data)) split_path = stub_content_text_path_with_root_dir.split('/') root_dir_sha1 = split_path[0] filename = split_path[-1] path = stub_content_text_path_with_root_dir \ .replace(root_dir_sha1 + '/', '') \ .replace(filename, '') path_info = gen_path_info(path) root_dir_url = reverse('browse-directory', kwargs={'sha1_git': root_dir_sha1}) self.assertContains(resp, '
  • ', count=len(path_info)+1) self.assertContains(resp, '' + root_dir_sha1[:7] + '') for p in path_info: dir_url = reverse('browse-directory', kwargs={'sha1_git': root_dir_sha1, 'path': p['path']}) self.assertContains(resp, '' + p['name'] + '') self.assertContains(resp, '
  • ' + filename + '
  • ') url_raw = reverse('browse-content-raw', kwargs={'query_string': stub_content_text_sha1}, query_params={'filename': filename}) self.assertContains(resp, url_raw) @patch('swh.web.browse.views.content.request_content') @istest - def content_raw_text(self, mock_request_content): + def test_content_raw_text(self, mock_request_content): mock_request_content.return_value =\ stub_content_text_data, 'text/plain' url = reverse('browse-content-raw', kwargs={'query_string': stub_content_text_sha1}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertEqual(resp['Content-Type'], 'text/plain') self.assertEqual(resp['Content-disposition'], 'filename=%s_%s' % ('sha1', stub_content_text_sha1)) self.assertEqual(resp.content, stub_content_text_data) filename = stub_content_text_path_with_root_dir.split('/')[-1] url = reverse('browse-content-raw', kwargs={'query_string': stub_content_text_sha1}, query_params={'filename': filename}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertEqual(resp['Content-Type'], 'text/plain') self.assertEqual(resp['Content-disposition'], 'filename=%s' % filename) self.assertEqual(resp.content, stub_content_text_data) @patch('swh.web.browse.views.content.request_content') @istest def content_raw_bin(self, mock_request_content): mock_request_content.return_value =\ stub_content_bin_data, 'image/png' url = reverse('browse-content-raw', kwargs={'query_string': stub_content_bin_sha1}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertEqual(resp['Content-Type'], 'application/octet-stream') self.assertEqual(resp['Content-disposition'], 'attachment; filename=%s_%s' % ('sha1', stub_content_bin_sha1)) self.assertEqual(resp.content, stub_content_bin_data) url = reverse('browse-content-raw', kwargs={'query_string': stub_content_bin_sha1}, query_params={'filename': stub_content_bin_filename}) resp = self.client.get(url) self.assertEquals(resp.status_code, 200) self.assertEqual(resp['Content-Type'], 'application/octet-stream') self.assertEqual(resp['Content-disposition'], 'attachment; filename=%s' % stub_content_bin_filename) self.assertEqual(resp.content, stub_content_bin_data)