diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 2b661e70..e4b29fec 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,79 +1,86 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.decorators import api_view from swh.web.common.urlsindex import UrlsIndex from swh.web.common.throttling import throttle_scope class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} _method_endpoints = {} scope = 'api' @classmethod def get_app_endpoints(cls): return cls._apidoc_routes @classmethod def add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = 'api-%s' % route[1:-1].replace('/', '-') if route not in cls._apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d class api_route(object): # noqa: N801 """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ def __init__(self, url_pattern=None, view_name=None, methods=['GET', 'HEAD', 'OPTIONS'], throttle_scope='swh_api', - api_version='1'): + api_version='1', + checksum_args=None): super().__init__() self.url_pattern = '^' + api_version + url_pattern + '$' self.view_name = view_name self.methods = methods self.throttle_scope = throttle_scope + self.checksum_args = checksum_args def __call__(self, f): # create a DRF view from the wrapped function @api_view(self.methods) @throttle_scope(self.throttle_scope) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = self.methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(self.url_pattern, api_view_f, self.view_name) + + if self.checksum_args: + APIUrls.add_redirect_for_checksum_args(self.view_name, + [self.url_pattern], + self.checksum_args) return f diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py index c17902ee..a5273542 100644 --- a/swh/web/api/views/directory.py +++ b/swh/web/api/views/directory.py @@ -1,75 +1,77 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup -@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-directory') +@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-directory', + checksum_args=['sha1_git']) @api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', - 'api-directory') + 'api-directory', + checksum_args=['sha1_git']) @api_doc('/directory/') def api_directory(request, sha1_git, path=None): """ .. http:get:: /api/1/directory/(sha1_git)/[(path)/] Get information about directory objects. Directories are identified by **sha1** checksums, compatible with Git directory identifiers. See :func:`swh.model.identifiers.directory_identifier` in our data model module for details about how they are computed. When given only a directory identifier, this endpoint returns information about the directory itself, returning its content (usually a list of directory entries). When given a directory identifier and a path, this endpoint returns information about the directory entry pointed by the relative path, starting path resolution from the given directory. :param string sha1_git: hexadecimal representation of the directory **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>jsonarr object checksums: object holding the computed checksum values for a directory entry (only for file entries) :>jsonarr string dir_id: **sha1_git** identifier of the requested directory :>jsonarr number length: length of a directory entry in bytes (only for file entries) for getting information about the content MIME type :>jsonarr string name: the directory entry name :>jsonarr number perms: permissions for the directory entry :>jsonarr string target: **sha1_git** identifier of the directory entry :>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type :>jsonarr string type: the type of the directory entry, can be either ``dir``, ``file`` or ``rev`` **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested directory can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/` """ # noqa if path: error_msg_path = ('Entry with path %s relative to directory ' 'with sha1_git %s not found.') % (path, sha1_git) return api_lookup( service.lookup_directory_with_path, sha1_git, path, notfound_msg=error_msg_path, enrich_fn=utils.enrich_directory) else: error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_directory, sha1_git, notfound_msg=error_msg_nopath, enrich_fn=utils.enrich_directory) diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py index d1d4547b..a488ff36 100644 --- a/swh/web/browse/browseurls.py +++ b/swh/web/browse/browseurls.py @@ -1,38 +1,45 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common.urlsindex import UrlsIndex class BrowseUrls(UrlsIndex): """ Class to manage swh-web browse application urls. """ scope = 'browse' class browse_route(object): # noqa: N801 """ Decorator to ease the registration of a swh-web browse endpoint Args: url_patterns: list of url patterns used by Django to identify the browse routes view_name: the name of the Django view associated to the routes used to reverse the url """ # noqa - def __init__(self, *url_patterns, view_name=None): + def __init__(self, *url_patterns, view_name=None, checksum_args=None): super().__init__() self.url_patterns = [] + self.checksum_args = checksum_args for url_pattern in url_patterns: self.url_patterns.append('^' + url_pattern + '$') self.view_name = view_name def __call__(self, f): # register the route and its view in the browse endpoints index for url_pattern in self.url_patterns: BrowseUrls.add_url_pattern(url_pattern, f, self.view_name) + + if self.checksum_args: + BrowseUrls.add_redirect_for_checksum_args(self.view_name, + self.url_patterns, + self.checksum_args) + return f diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py index 723fd2a6..f9ef43ee 100644 --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -1,160 +1,161 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render, redirect from django.template.defaultfilters import filesizeformat from swh.web.common import service from swh.web.common.utils import ( reverse, gen_path_info ) from swh.web.common.exc import handle_view_exception, NotFoundExc from swh.web.browse.utils import ( get_directory_entries, get_snapshot_context, get_readme_to_display, get_swh_persistent_ids, gen_link ) from swh.web.browse.browseurls import browse_route @browse_route(r'directory/(?P[0-9a-f]+)/', r'directory/(?P[0-9a-f]+)/(?P.+)/', - view_name='browse-directory') + view_name='browse-directory', + checksum_args=['sha1_git']) def directory_browse(request, sha1_git, path=None): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/[(path)/]` """ # noqa root_sha1_git = sha1_git try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) # some readme files can reference assets reachable from the # browsed directory, handle that special case in order to # correctly displayed them if dir_info and dir_info['type'] == 'file': file_raw_url = reverse( 'browse-content-raw', url_args={'query_string': dir_info['checksums']['sha1']}) return redirect(file_raw_url) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context(None, origin_type, origin_url) except Exception: raw_dir_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a directory ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the directory ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_dir_url))) raise NotFoundExc(error_message) if snapshot_context: snapshot_context['visit_info'] = None except Exception as exc: return handle_view_exception(request, exc) path_info = gen_path_info(path) query_params = {'origin': origin_url} breadcrumbs = [] breadcrumbs.append({'name': root_sha1_git[:7], 'url': reverse('browse-directory', url_args={'sha1_git': root_sha1_git}, query_params=query_params)}) for pi in path_info: breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-directory', url_args={'sha1_git': root_sha1_git, 'path': pi['path']}, query_params=query_params)}) path = '' if path is None else (path + '/') for d in dirs: if d['type'] == 'rev': d['url'] = reverse('browse-revision', url_args={'sha1_git': d['target']}, query_params=query_params) else: d['url'] = reverse('browse-directory', url_args={'sha1_git': root_sha1_git, 'path': path + d['name']}, query_params=query_params) sum_file_sizes = 0 readmes = {} for f in files: query_string = 'sha1_git:' + f['target'] f['url'] = reverse('browse-content', url_args={'query_string': query_string}, query_params={'path': root_sha1_git + '/' + path + f['name'], 'origin': origin_url}) if f['length'] is not None: sum_file_sizes += f['length'] f['length'] = filesizeformat(f['length']) if f['name'].lower().startswith('readme'): readmes[f['name']] = f['checksums']['sha1'] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = {'id': sha1_git, 'number of regular files': len(files), 'number of subdirectories': len(dirs), 'sum of regular file sizes': sum_file_sizes} vault_cooking = { 'directory_context': True, 'directory_id': sha1_git, 'revision_context': False, 'revision_id': None } swh_ids = get_swh_persistent_ids([{'type': 'directory', 'id': sha1_git}]) heading = 'Directory - %s' % sha1_git if breadcrumbs: dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/' heading += ' - %s' % dir_path return render(request, 'browse/directory.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Directory', 'swh_object_metadata': dir_metadata, 'dirs': dirs, 'files': files, 'breadcrumbs': breadcrumbs, 'top_right_link': None, 'readme_name': readme_name, 'readme_url': readme_url, 'readme_html': readme_html, 'snapshot_context': snapshot_context, 'vault_cooking': vault_cooking, 'show_actions_menu': True, 'swh_ids': swh_ids}) diff --git a/swh/web/common/urlsindex.py b/swh/web/common/urlsindex.py index 6e44db7a..8134f015 100644 --- a/swh/web/common/urlsindex.py +++ b/swh/web/common/urlsindex.py @@ -1,48 +1,66 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url +from django.shortcuts import redirect class UrlsIndex(object): """ Simple helper class for centralizing url patterns of a Django web application. Derived classes should override the 'scope' class attribute otherwise all declared patterns will be grouped under the default one. """ _urlpatterns = {} scope = 'default' @classmethod - def add_url_pattern(cls, url_pattern, view, view_name): + def add_url_pattern(cls, url_pattern, view, view_name=None): """ Class method that adds an url pattern to the current scope. Args: url_pattern: regex describing a Django url view: function implementing the Django view view_name: name of the view used to reverse the url """ if cls.scope not in cls._urlpatterns: cls._urlpatterns[cls.scope] = [] if view_name: cls._urlpatterns[cls.scope].append(url(url_pattern, view, name=view_name)) else: cls._urlpatterns[cls.scope].append(url(url_pattern, view)) + @classmethod + def add_redirect_for_checksum_args(cls, view_name, url_patterns, + checksum_args): + new_view_name = view_name+'-uppercase-checksum' + for url_pattern in url_patterns: + url_pattern_upper = url_pattern.replace('[0-9a-f]', + '[0-9a-fA-F]') + + def view_redirect(request, *args, **kwargs): + for checksum_arg in checksum_args: + checksum_upper = kwargs[checksum_arg] + kwargs[checksum_arg] = checksum_upper.lower() + return redirect(view_name, *args, **kwargs) + + cls.add_url_pattern(url_pattern_upper, view_redirect, + new_view_name) + @classmethod def get_url_patterns(cls): """ Class method that returns the list of url pattern associated to the current scope. Returns: The list of url patterns associated to the current scope """ return cls._urlpatterns[cls.scope] diff --git a/swh/web/tests/api/views/test_directory.py b/swh/web/tests/api/views/test_directory.py index d0371d24..61d804a1 100644 --- a/swh/web/tests/api/views/test_directory.py +++ b/swh/web/tests/api/views/test_directory.py @@ -1,92 +1,105 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from rest_framework.test import APITestCase from swh.web.common.utils import reverse from swh.web.tests.strategies import directory, unknown_directory from swh.web.tests.testcase import WebTestCase class DirectoryApiTestCase(WebTestCase, APITestCase): @given(directory()) def test_api_directory(self, directory): url = reverse('api-directory', url_args={'sha1_git': directory}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = list(map(self._enrich_dir_data, self.directory_ls(directory))) self.assertEqual(rv.data, expected_data) @given(unknown_directory()) def test_api_directory_not_found(self, unknown_directory): url = reverse('api-directory', url_args={'sha1_git': unknown_directory}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Directory with sha1_git %s not found' % unknown_directory}) @given(directory()) def test_api_directory_with_path_found(self, directory): directory_content = self.directory_ls(directory) path = random.choice(directory_content) url = reverse('api-directory', url_args={'sha1_git': directory, 'path': path['name']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, self._enrich_dir_data(path)) @given(directory()) def test_api_directory_with_path_not_found(self, directory): path = 'some/path/to/nonexistent/dir/' url = reverse('api-directory', url_args={'sha1_git': directory, 'path': path}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': ('Directory entry with path %s from %s not found' % (path, directory))}) + @given(directory()) + def test_api_directory_uppercase(self, directory): + url = reverse('api-directory-uppercase-checksum', + url_args={'sha1_git': directory.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-directory', + url_args={'sha1_git': directory}) + + self.assertEqual(resp['location'], redirect_url) + @classmethod def _enrich_dir_data(cls, dir_data): if dir_data['type'] == 'file': dir_data['target_url'] = \ reverse('api-content', url_args={'q': 'sha1_git:%s' % dir_data['target']}) elif dir_data['type'] == 'dir': dir_data['target_url'] = \ reverse('api-directory', url_args={'sha1_git': dir_data['target']}) elif dir_data['type'] == 'rev': dir_data['target_url'] = \ reverse('api-revision', url_args={'sha1_git': dir_data['target']}) return dir_data diff --git a/swh/web/tests/browse/views/test_directory.py b/swh/web/tests/browse/views/test_directory.py index 98c22627..b70eb04c 100644 --- a/swh/web/tests/browse/views/test_directory.py +++ b/swh/web/tests/browse/views/test_directory.py @@ -1,118 +1,131 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from swh.web.common.utils import reverse, get_swh_persistent_id from swh.web.common.utils import gen_path_info from swh.web.tests.strategies import ( directory, directory_with_subdirs, invalid_sha1, unknown_directory ) from swh.web.tests.testcase import WebTestCase class SwhBrowseDirectoryTest(WebTestCase): def directory_view(self, root_directory_sha1, directory_entries, path=None): dirs = [e for e in directory_entries if e['type'] in ('dir', 'rev')] files = [e for e in directory_entries if e['type'] == 'file'] url_args = {'sha1_git': root_directory_sha1} if path: url_args['path'] = path url = reverse('browse-directory', url_args=url_args) root_dir_url = reverse('browse-directory', url_args={'sha1_git': root_directory_sha1}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/directory.html') self.assertContains(resp, '' + root_directory_sha1[:7] + '') self.assertContains(resp, '', count=len(dirs)) self.assertContains(resp, '', count=len(files)) for d in dirs: if d['type'] == 'rev': dir_url = reverse('browse-revision', url_args={'sha1_git': d['target']}) else: dir_path = d['name'] if path: dir_path = "%s/%s" % (path, d['name']) dir_url = reverse('browse-directory', url_args={'sha1_git': root_directory_sha1, 'path': dir_path}) self.assertContains(resp, dir_url) for f in files: file_path = "%s/%s" % (root_directory_sha1, f['name']) if path: file_path = "%s/%s/%s" % (root_directory_sha1, path, f['name']) query_string = 'sha1_git:' + f['target'] file_url = reverse('browse-content', url_args={'query_string': query_string}, query_params={'path': file_path}) self.assertContains(resp, file_url) path_info = gen_path_info(path) self.assertContains(resp, '
  • ', count=len(path_info)+1) self.assertContains(resp, '%s' % (root_dir_url, root_directory_sha1[:7])) for p in path_info: dir_url = reverse('browse-directory', url_args={'sha1_git': root_directory_sha1, 'path': p['path']}) self.assertContains(resp, '%s' % (dir_url, p['name'])) self.assertContains(resp, 'vault-cook-directory') swh_dir_id = get_swh_persistent_id('directory', directory_entries[0]['dir_id']) # noqa swh_dir_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_dir_id}) self.assertContains(resp, swh_dir_id) self.assertContains(resp, swh_dir_id_url) @given(directory()) def test_root_directory_view(self, directory): self.directory_view(directory, self.directory_ls(directory)) @given(directory_with_subdirs()) def test_sub_directory_view(self, directory): dir_content = self.directory_ls(directory) subdir = random.choice([e for e in dir_content if e['type'] == 'dir']) subdir_content = self.directory_ls(subdir['target']) self.directory_view(directory, subdir_content, subdir['name']) @given(invalid_sha1(), unknown_directory()) def test_directory_request_errors(self, invalid_sha1, unknown_directory): dir_url = reverse('browse-directory', url_args={'sha1_git': invalid_sha1}) resp = self.client.get(dir_url) self.assertEqual(resp.status_code, 400) self.assertTemplateUsed('browse/error.html') dir_url = reverse('browse-directory', url_args={'sha1_git': unknown_directory}) resp = self.client.get(dir_url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('browse/error.html') + + @given(directory()) + def test_directory_uppercase(self, directory): + url = reverse('browse-directory-uppercase-checksum', + url_args={'sha1_git': directory.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('browse-directory', + url_args={'sha1_git': directory}) + + self.assertEqual(resp['location'], redirect_url)