diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 2b661e70..e4b29fec 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,79 +1,86 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.decorators import api_view from swh.web.common.urlsindex import UrlsIndex from swh.web.common.throttling import throttle_scope class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} _method_endpoints = {} scope = 'api' @classmethod def get_app_endpoints(cls): return cls._apidoc_routes @classmethod def add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = 'api-%s' % route[1:-1].replace('/', '-') if route not in cls._apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d class api_route(object): # noqa: N801 """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ def __init__(self, url_pattern=None, view_name=None, methods=['GET', 'HEAD', 'OPTIONS'], throttle_scope='swh_api', - api_version='1'): + api_version='1', + checksum_args=None): super().__init__() self.url_pattern = '^' + api_version + url_pattern + '$' self.view_name = view_name self.methods = methods self.throttle_scope = throttle_scope + self.checksum_args = checksum_args def __call__(self, f): # create a DRF view from the wrapped function @api_view(self.methods) @throttle_scope(self.throttle_scope) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = self.methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(self.url_pattern, api_view_f, self.view_name) + + if self.checksum_args: + APIUrls.add_redirect_for_checksum_args(self.view_name, + [self.url_pattern], + self.checksum_args) return f diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index d213a495..c327303d 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,374 +1,379 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc from swh.web.api.apidoc import api_doc from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup -@api_route(r'/content/(?P.+)/filetype/', 'api-content-filetype') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/', + 'api-content-filetype', checksum_args=['q']) @api_doc('/content/filetype/') def api_content_filetype(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ Get information about the detected MIME type of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is `sha1`. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string encoding: the detected content encoding :>json string id: the **sha1** identifier of the content :>json string mimetype: the detected MIME type of the content :>json object tool: information about the tool used to detect the content filetype :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/` """ # noqa return api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) -@api_route(r'/content/(?P.+)/language/', 'api-content-language') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/', + 'api-content-language', checksum_args=['q']) @api_doc('/content/language/') def api_content_language(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/language/ Get information about the programming language used in a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json string lang: the detected programming language if any :>json object tool: information about the tool used to detect the programming language :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/` """ # noqa return api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) -@api_route(r'/content/(?P.+)/license/', 'api-content-license') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/', + 'api-content-license', checksum_args=['q']) @api_doc('/content/license/') def api_content_license(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/license/ Get information about the license of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json array licenses: array of strings containing the detected license names if any :>json object tool: information about the tool used to detect the license :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/` """ # noqa return api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) -@api_route(r'/content/(?P.+)/ctags/', 'api-content-ctags') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/', 'api-content-ctags') @api_doc('/content/ctags/', tags=['hidden']) def api_content_ctags(request, q): """ Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) -@api_route(r'/content/(?P.+)/raw/', 'api-content-raw') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/', 'api-content-raw', + checksum_args=['q']) @api_doc('/content/raw/', handle_response=True) def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ # noqa def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'api-content-symbol') @api_doc('/content/symbol/', tags=['hidden']) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = request.query_params.get('last_sha1', None) per_page = int(request.query_params.get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): exp = list(service.lookup_expression(exp, last_sha1, per_page)) return exp if exp else None symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True)) if symbols: nb_symbols = len(symbols) if nb_symbols == per_page: query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-content-symbol', url_args={'q': q}, query_params=query_params) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'api-content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'api-content-known') @api_doc('/content/known/', tags=['hidden']) def api_check_content_known(request, q=None): """ .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ Check whether some content(s) (aka "blob(s)") is present in the archive based on its **sha1** checksum. :param string sha1: hexadecimal representation of the **sha1** checksum value for the content to check existence. Multiple values can be provided separated by ','. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json array search_res: array holding the search result for each provided **sha1** :>json object search_stats: some statistics regarding the number of **sha1** provided and the percentage of those found in the archive **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1** has been provided **Example:** .. parsed-literal:: :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` """ # noqa response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] nb_queries = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) search_stats['nbfiles'] = nb_queries search_stats['pct'] = (nbfound / nb_queries) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response -@api_route(r'/content/(?P.+)/', 'api-content') +@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/', 'api-content', + checksum_args=['q']) @api_doc('/content/') def api_content_metadata(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/ Get information about a content (aka a "blob") object. In the archive, a content object is identified based on checksum values computed using various hashing algorithms. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object checksums: object holding the computed checksum values for the requested content :>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/` for downloading the content raw bytes :>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` for getting information about the content MIME type :>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/` for getting information about the programming language used in the content :>json number length: length of the content in bytes :>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/` for getting information about the license of the content **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: curl -i :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` """ # noqa return api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q)) diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py index c17902ee..a5273542 100644 --- a/swh/web/api/views/directory.py +++ b/swh/web/api/views/directory.py @@ -1,75 +1,77 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup -@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-directory') +@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-directory', + checksum_args=['sha1_git']) @api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', - 'api-directory') + 'api-directory', + checksum_args=['sha1_git']) @api_doc('/directory/') def api_directory(request, sha1_git, path=None): """ .. http:get:: /api/1/directory/(sha1_git)/[(path)/] Get information about directory objects. Directories are identified by **sha1** checksums, compatible with Git directory identifiers. See :func:`swh.model.identifiers.directory_identifier` in our data model module for details about how they are computed. When given only a directory identifier, this endpoint returns information about the directory itself, returning its content (usually a list of directory entries). When given a directory identifier and a path, this endpoint returns information about the directory entry pointed by the relative path, starting path resolution from the given directory. :param string sha1_git: hexadecimal representation of the directory **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>jsonarr object checksums: object holding the computed checksum values for a directory entry (only for file entries) :>jsonarr string dir_id: **sha1_git** identifier of the requested directory :>jsonarr number length: length of a directory entry in bytes (only for file entries) for getting information about the content MIME type :>jsonarr string name: the directory entry name :>jsonarr number perms: permissions for the directory entry :>jsonarr string target: **sha1_git** identifier of the directory entry :>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type :>jsonarr string type: the type of the directory entry, can be either ``dir``, ``file`` or ``rev`` **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested directory can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/` """ # noqa if path: error_msg_path = ('Entry with path %s relative to directory ' 'with sha1_git %s not found.') % (path, sha1_git) return api_lookup( service.lookup_directory_with_path, sha1_git, path, notfound_msg=error_msg_path, enrich_fn=utils.enrich_directory) else: error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_directory, sha1_git, notfound_msg=error_msg_nopath, enrich_fn=utils.enrich_directory) diff --git a/swh/web/api/views/release.py b/swh/web/api/views/release.py index c0e168ec..46301edf 100644 --- a/swh/web/api/views/release.py +++ b/swh/web/api/views/release.py @@ -1,58 +1,59 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup -@api_route(r'/release/(?P[0-9a-f]+)/', 'api-release') +@api_route(r'/release/(?P[0-9a-f]+)/', 'api-release', + checksum_args=['sha1_git']) @api_doc('/release/') def api_release(request, sha1_git): """ .. http:get:: /api/1/release/(sha1_git)/ Get information about a release in the archive. Releases are identified by **sha1** checksums, compatible with Git tag identifiers. See :func:`swh.model.identifiers.release_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the release **sha1_git** identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the release :>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the release :>json string date: ISO representation of the release date (in UTC) :>json string id: the release unique identifier :>json string message: the message associated to the release :>json string name: the name of the release :>json string target: the target identifier of the release :>json string target_type: the type of the target, can be either **release**, **revision**, **content**, **directory** :>json string target_url: a link to the adequate api url based on the target type **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested release can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`release/208f61cc7a5dbc9879ae6e5c2f95891e270f09ef/` """ # noqa error_msg = 'Release with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_release, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_release) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 73461e78..10b3ec86 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,487 +1,489 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.utils import parse_timestamp from swh.web.api import utils from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) elif result['type'] == 'file': # content result['content'] = utils.enrich_content(content) elif result['type'] == 'rev': # revision result['content'] = utils.enrich_revision(content) return result @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/ts/(?P.+)/log/', 'api-revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)' r'/ts/(?P.+)/log/', 'api-revision-origin-log') @api_doc('/revision/origin/log/') def api_revision_log_by(request, origin_id, branch_name='HEAD', ts=None): """ .. http:get:: /api/1/revision/origin/(origin_id)[/branch/(branch_name)][/ts/(timestamp)]/log Show the commit log for a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/`, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. :param int origin_id: a software origin identifier :param string branch_name: optional parameter specifying a fully-qualified branch name associated to the software origin, e.g., "refs/heads/master". Defaults to the HEAD branch. :param string timestamp: optional parameter specifying a timestamp close to which the revision pointed by the given branch should be looked up. The timestamp can be expressed either as an ISO date or as a Unix one (in UTC). Defaults to now. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>jsonarr object author: information about the author of the revision :>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>jsonarr object committer: information about the committer of the revision :>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>jsonarr string committer_date: ISO representation of the commit date (in UTC) :>jsonarr string date: ISO representation of the revision date (in UTC) :>jsonarr string directory: the unique identifier that revision points to :>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>jsonarr string id: the revision unique identifier :>jsonarr boolean merge: whether or not the revision corresponds to a merge commit :>jsonarr string message: the message associated to the revision :>jsonarr array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>jsonarr string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: no revision matching the given criteria could be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/origin/723566/ts/2016-01-17T00:00:00+00:00/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1): return service.lookup_revision_log_by(o_id, br, ts, limit) error_msg = 'No revision matching origin %s ' % origin_id error_msg += ', branch name %s' % branch_name error_msg += (' and time stamp %s.' % ts) if ts else '.' rev_get = api_lookup( lookup_revision_log_by_with_limit, int(origin_id), branch_name, ts, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) nb_rev = len(rev_get) if nb_rev == per_page+1: revisions = rev_get[:-1] last_sha1_git = rev_get[-1]['id'] params = {k: v for k, v in {'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts, }.items() if v is not None} query_params = {} query_params['sha1_git'] = last_sha1_git if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-revision-origin-log', url_args=params, query_params=query_params) } else: revisions = rev_get result.update({'results': revisions}) return result @api_route(r'/revision/origin/(?P[0-9]+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/directory/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/(?P.+)/', 'api-revision-origin-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)' r'/directory/(?P.+)/', 'api-revision-origin-directory') @api_doc('/revision/origin/directory/', tags=['hidden']) def api_directory_through_revision_origin(request, origin_id, branch_name='HEAD', ts=None, path=None, with_data=False): """ Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = parse_timestamp(ts) return _revision_directory_by({'origin_id': int(origin_id), 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data) @api_route(r'/revision/origin/(?P[0-9]+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/', 'api-revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/', 'api-revision-origin') @api_doc('/revision/origin/') def api_revision_with_origin(request, origin_id, branch_name='HEAD', ts=None): """ .. http:get:: /api/1/revision/origin/(origin_id)/[branch/(branch_name)/][ts/(timestamp)/] Get information about a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)/`, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. :param int origin_id: a software origin identifier :param string branch_name: optional parameter specifying a fully-qualified branch name associated to the software origin, e.g., "refs/heads/master". Defaults to the HEAD branch. :param string timestamp: optional parameter specifying a timestamp close to which the revision pointed by the given branch should be looked up. The timestamp can be expressed either as an ISO date or as a Unix one (in UTC). Defaults to now. :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the revision :>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 404: no revision matching the given criteria could be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/` """ # noqa return api_lookup( service.lookup_revision_by, int(origin_id), branch_name, ts, notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), enrich_fn=utils.enrich_revision) -@api_route(r'/revision/(?P[0-9a-f]+)/', 'api-revision') +@api_route(r'/revision/(?P[0-9a-f]+)/', 'api-revision', + checksum_args=['sha1_git']) @api_doc('/revision/') def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the revision :>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ # noqa return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', - 'api-revision-raw-message') + 'api-revision-raw-message', checksum_args=['sha1_git']) @api_doc('/revision/raw/', tags=['hidden'], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', - 'api-revision-directory') + 'api-revision-directory', checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', - 'api-revision-directory') + 'api-revision-directory', checksum_args=['sha1_git']) @api_doc('/revision/directory/') def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ # noqa return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) -@api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-revision-log') +@api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-revision-log', + checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)' - r'/prev/(?P[0-9a-f/]+)/log/', - 'api-revision-log') + r'/prev/(?P[0-9a-f]*/*)/log/', + 'api-revision-log', checksum_args=['sha1_git', 'prev_sha1s']) @api_doc('/revision/log/') def api_revision_log(request, sha1_git, prev_sha1s=None): """ .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ Get a list of all revisions heading to a given one, in other words show the commit log. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string prev_sha1s: optional parameter representing the navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. If provided, revisions information will be added at the beginning of the returned list. :query int per_page: number of elements in the returned list, for pagination purpose :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it :>jsonarr object author: information about the author of the revision :>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the author of the revision :>jsonarr object committer: information about the committer of the revision :>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get information about the committer of the revision :>jsonarr string committer_date: ISO representation of the commit date (in UTC) :>jsonarr string date: ISO representation of the revision date (in UTC) :>jsonarr string directory: the unique identifier that revision points to :>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>jsonarr string id: the revision unique identifier :>jsonarr boolean merge: whether or not the revision corresponds to a merge commit :>jsonarr string message: the message associated to the revision :>jsonarr array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>jsonarr string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) nb_rev = len(rev_get) if nb_rev == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-revision-log', url_args={'sha1_git': new_last_sha1}, query_params=query_params) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/api/views/snapshot.py b/swh/web/api/views/snapshot.py index 9a7ce395..cfeb3286 100644 --- a/swh/web/api/views/snapshot.py +++ b/swh/web/api/views/snapshot.py @@ -1,116 +1,117 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.common.utils import reverse from swh.web.config import get_config from swh.web.api.apidoc import api_doc from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup -@api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-snapshot') +@api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-snapshot', + checksum_args=['snapshot_id']) @api_doc('/snapshot/') def api_snapshot(request, snapshot_id): """ .. http:get:: /api/1/snapshot/(snapshot_id)/ Get information about a snapshot in the archive. A snapshot is a set of named branches, which are pointers to objects at any level of the Software Heritage DAG. It represents a full picture of an origin at a given time. As well as pointing to other objects in the Software Heritage DAG, branches can also be aliases, in which case their target is the name of another branch in the same snapshot, or dangling, in which case the target is unknown. A snapshot identifier is a salted sha1. See :func:`swh.model.identifiers.snapshot_identifier` in our data model module for details about how they are computed. :param sha1 snapshot_id: a snapshot identifier :query str branches_from: optional parameter used to skip branches whose name is lesser than it before returning them :query int branches_count: optional parameter used to restrain the amount of returned branches (default to 1000) :query str target_types: optional comma separated list parameter used to filter the target types of branch to return (possible values that can be contained in that list are ``content``, ``directory``, ``revision``, ``release``, ``snapshot`` or ``alias``) :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it :>json object branches: object containing all branches associated to the snapshot, for each of them the associated target type and id are given but also a link to get information about that target :>json string id: the unique identifier of the snapshot **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid snapshot identifier has been provided :statuscode 404: requested snapshot can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`snapshot/6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a/` """ # noqa def _enrich_snapshot(snapshot): s = snapshot.copy() if 'branches' in s: s['branches'] = { k: utils.enrich_object(v) if v else None for k, v in s['branches'].items() } for k, v in s['branches'].items(): if v and v['target_type'] == 'alias': if v['target'] in s['branches']: branch_alias = s['branches'][v['target']] v['target_url'] = branch_alias['target_url'] else: snp = \ service.lookup_snapshot(s['id'], branches_from=v['target'], branches_count=1) if snp and v['target'] in snp['branches']: branch = snp['branches'][v['target']] branch = utils.enrich_object(branch) v['target_url'] = branch['target_url'] return s snapshot_content_max_size = get_config()['snapshot_content_max_size'] branches_from = request.GET.get('branches_from', '') branches_count = int(request.GET.get('branches_count', snapshot_content_max_size)) target_types = request.GET.get('target_types', None) target_types = target_types.split(',') if target_types else None results = api_lookup( service.lookup_snapshot, snapshot_id, branches_from, branches_count, target_types, notfound_msg='Snapshot with id {} not found.'.format(snapshot_id), enrich_fn=_enrich_snapshot) response = {'results': results, 'headers': {}} if results['next_branch'] is not None: response['headers']['link-next'] = \ reverse('api-snapshot', url_args={'snapshot_id': snapshot_id}, query_params={'branches_from': results['next_branch'], 'branches_count': branches_count, 'target_types': target_types}) return response diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py index d1d4547b..a488ff36 100644 --- a/swh/web/browse/browseurls.py +++ b/swh/web/browse/browseurls.py @@ -1,38 +1,45 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common.urlsindex import UrlsIndex class BrowseUrls(UrlsIndex): """ Class to manage swh-web browse application urls. """ scope = 'browse' class browse_route(object): # noqa: N801 """ Decorator to ease the registration of a swh-web browse endpoint Args: url_patterns: list of url patterns used by Django to identify the browse routes view_name: the name of the Django view associated to the routes used to reverse the url """ # noqa - def __init__(self, *url_patterns, view_name=None): + def __init__(self, *url_patterns, view_name=None, checksum_args=None): super().__init__() self.url_patterns = [] + self.checksum_args = checksum_args for url_pattern in url_patterns: self.url_patterns.append('^' + url_pattern + '$') self.view_name = view_name def __call__(self, f): # register the route and its view in the browse endpoints index for url_pattern in self.url_patterns: BrowseUrls.add_url_pattern(url_pattern, f, self.view_name) + + if self.checksum_args: + BrowseUrls.add_redirect_for_checksum_args(self.view_name, + self.url_patterns, + self.checksum_args) + return f diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py index e2fe724b..5ffadb8d 100644 --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -1,302 +1,304 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import difflib import json from distutils.util import strtobool from django.http import HttpResponse from django.shortcuts import render from django.template.defaultfilters import filesizeformat from swh.model.hashutil import hash_to_hex from swh.web.common import query from swh.web.common.utils import ( reverse, gen_path_info, swh_object_icons ) from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.utils import ( request_content, prepare_content_for_display, content_display_max_size, get_snapshot_context, get_swh_persistent_ids, gen_link ) from swh.web.browse.browseurls import browse_route -@browse_route(r'content/(?P.+)/raw/', - view_name='browse-content-raw') +@browse_route(r'content/(?P[0-9a-z_:]*[0-9a-f]+.)/raw/', + view_name='browse-content-raw', + checksum_args=['query_string']) def content_raw(request, query_string): """Django view that produces a raw display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` """ # noqa try: reencode = bool(strtobool(request.GET.get('reencode', 'false'))) algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) content_data = request_content(query_string, max_size=None, reencode=reencode) except Exception as exc: return handle_view_exception(request, exc) filename = request.GET.get('filename', None) if not filename: filename = '%s_%s' % (algo, checksum) if content_data['mimetype'].startswith('text/') or \ content_data['mimetype'] == 'inode/x-empty': response = HttpResponse(content_data['raw_data'], content_type="text/plain") response['Content-disposition'] = 'filename=%s' % filename else: response = HttpResponse(content_data['raw_data'], content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response _auto_diff_size_limit = 20000 @browse_route(r'content/(?P.*)/diff/(?P.*)', # noqa view_name='diff-contents') def _contents_diff(request, from_query_string, to_query_string): """ Browse endpoint used to compute unified diffs between two contents. Diffs are generated only if the two contents are textual. By default, diffs whose size are greater than 20 kB will not be generated. To force the generation of large diffs, the 'force' boolean query parameter must be used. Args: request: input django http request from_query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either ``sha1``, ``sha1_git``, ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH the hexadecimal representation of the hash value identifying the first content to_query_string: same as above for identifying the second content Returns: A JSON object containing the unified diff. """ diff_data = {} content_from = None content_to = None content_from_size = 0 content_to_size = 0 content_from_lines = [] content_to_lines = [] force = request.GET.get('force', 'false') path = request.GET.get('path', None) language = 'nohighlight' force = bool(strtobool(force)) if from_query_string == to_query_string: diff_str = 'File renamed without changes' else: try: text_diff = True if from_query_string: content_from = \ request_content(from_query_string, max_size=None) content_from_display_data = \ prepare_content_for_display(content_from['raw_data'], content_from['mimetype'], path) language = content_from_display_data['language'] content_from_size = content_from['length'] if not (content_from['mimetype'].startswith('text/') or content_from['mimetype'] == 'inode/x-empty'): text_diff = False if text_diff and to_query_string: content_to = request_content(to_query_string, max_size=None) content_to_display_data = prepare_content_for_display( content_to['raw_data'], content_to['mimetype'], path) language = content_to_display_data['language'] content_to_size = content_to['length'] if not (content_to['mimetype'].startswith('text/') or content_to['mimetype'] == 'inode/x-empty'): text_diff = False diff_size = abs(content_to_size - content_from_size) if not text_diff: diff_str = 'Diffs are not generated for non textual content' language = 'nohighlight' elif not force and diff_size > _auto_diff_size_limit: diff_str = 'Large diffs are not automatically computed' language = 'nohighlight' else: if content_from: content_from_lines = \ content_from['raw_data'].decode('utf-8')\ .splitlines(True) if content_from_lines and \ content_from_lines[-1][-1] != '\n': content_from_lines[-1] += '[swh-no-nl-marker]\n' if content_to: content_to_lines = content_to['raw_data'].decode('utf-8')\ .splitlines(True) if content_to_lines and content_to_lines[-1][-1] != '\n': content_to_lines[-1] += '[swh-no-nl-marker]\n' diff_lines = difflib.unified_diff(content_from_lines, content_to_lines) diff_str = ''.join(list(diff_lines)[2:]) except Exception as e: diff_str = str(e) diff_data['diff_str'] = diff_str diff_data['language'] = language diff_data_json = json.dumps(diff_data, separators=(',', ': ')) return HttpResponse(diff_data_json, content_type='application/json') -@browse_route(r'content/(?P.+)/', - view_name='browse-content') +@browse_route(r'content/(?P[0-9a-z_:]*[0-9a-f]+.)/', + view_name='browse-content', + checksum_args=['query_string']) def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ # noqa try: algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) content_data = request_content(query_string, raise_if_unavailable=False) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context(None, origin_type, origin_url) except Exception: raw_cnt_url = reverse('browse-content', url_args={'query_string': query_string}) error_message = \ ('The Software Heritage archive has a content ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the content ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_cnt_url))) raise NotFoundExc(error_message) if snapshot_context: snapshot_context['visit_info'] = None except Exception as exc: return handle_view_exception(request, exc) path = request.GET.get('path', None) content = None language = None mimetype = None if content_data['raw_data'] is not None: content_display_data = prepare_content_for_display( content_data['raw_data'], content_data['mimetype'], path) content = content_display_data['content_data'] language = content_display_data['language'] mimetype = content_display_data['mimetype'] root_dir = None filename = None path_info = None query_params = {'origin': origin_url} breadcrumbs = [] if path: split_path = path.split('/') root_dir = split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + '/', '') path = path[:-len(filename)] path_info = gen_path_info(path) dir_url = reverse('browse-directory', url_args={'sha1_git': root_dir}, query_params=query_params) breadcrumbs.append({'name': root_dir[:7], 'url': dir_url}) for pi in path_info: dir_url = reverse('browse-directory', url_args={'sha1_git': root_dir, 'path': pi['path']}, query_params=query_params) breadcrumbs.append({'name': pi['name'], 'url': dir_url}) breadcrumbs.append({'name': filename, 'url': None}) query_params = {'filename': filename} content_raw_url = reverse('browse-content-raw', url_args={'query_string': query_string}, query_params=query_params) content_metadata = { 'sha1 checksum': content_data['checksums']['sha1'], 'sha1_git checksum': content_data['checksums']['sha1_git'], 'sha256 checksum': content_data['checksums']['sha256'], 'blake2s256 checksum': content_data['checksums']['blake2s256'], 'mime type': content_data['mimetype'], 'encoding': content_data['encoding'], 'size': filesizeformat(content_data['length']), 'language': content_data['language'], 'licenses': content_data['licenses'], 'filename': filename } if filename: content_metadata['filename'] = filename sha1_git = content_data['checksums']['sha1_git'] swh_ids = get_swh_persistent_ids([{'type': 'content', 'id': sha1_git}]) heading = 'Content - %s' % sha1_git if breadcrumbs: content_path = '/'.join([bc['name'] for bc in breadcrumbs]) heading += ' - %s' % content_path return render(request, 'browse/content.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Content', 'swh_object_metadata': content_metadata, 'content': content, 'content_size': content_data['length'], 'max_content_size': content_display_max_size, 'mimetype': mimetype, 'language': language, 'breadcrumbs': breadcrumbs, 'top_right_link': { 'url': content_raw_url, 'icon': swh_object_icons['content'], 'text': 'Raw File' }, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': True, 'swh_ids': swh_ids, 'error_code': content_data['error_code'], 'error_message': content_data['error_message'], 'error_description': content_data['error_description']}, status=content_data['error_code']) diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py index 723fd2a6..f9ef43ee 100644 --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -1,160 +1,161 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render, redirect from django.template.defaultfilters import filesizeformat from swh.web.common import service from swh.web.common.utils import ( reverse, gen_path_info ) from swh.web.common.exc import handle_view_exception, NotFoundExc from swh.web.browse.utils import ( get_directory_entries, get_snapshot_context, get_readme_to_display, get_swh_persistent_ids, gen_link ) from swh.web.browse.browseurls import browse_route @browse_route(r'directory/(?P[0-9a-f]+)/', r'directory/(?P[0-9a-f]+)/(?P.+)/', - view_name='browse-directory') + view_name='browse-directory', + checksum_args=['sha1_git']) def directory_browse(request, sha1_git, path=None): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/[(path)/]` """ # noqa root_sha1_git = sha1_git try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) # some readme files can reference assets reachable from the # browsed directory, handle that special case in order to # correctly displayed them if dir_info and dir_info['type'] == 'file': file_raw_url = reverse( 'browse-content-raw', url_args={'query_string': dir_info['checksums']['sha1']}) return redirect(file_raw_url) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context(None, origin_type, origin_url) except Exception: raw_dir_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a directory ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the directory ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_dir_url))) raise NotFoundExc(error_message) if snapshot_context: snapshot_context['visit_info'] = None except Exception as exc: return handle_view_exception(request, exc) path_info = gen_path_info(path) query_params = {'origin': origin_url} breadcrumbs = [] breadcrumbs.append({'name': root_sha1_git[:7], 'url': reverse('browse-directory', url_args={'sha1_git': root_sha1_git}, query_params=query_params)}) for pi in path_info: breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-directory', url_args={'sha1_git': root_sha1_git, 'path': pi['path']}, query_params=query_params)}) path = '' if path is None else (path + '/') for d in dirs: if d['type'] == 'rev': d['url'] = reverse('browse-revision', url_args={'sha1_git': d['target']}, query_params=query_params) else: d['url'] = reverse('browse-directory', url_args={'sha1_git': root_sha1_git, 'path': path + d['name']}, query_params=query_params) sum_file_sizes = 0 readmes = {} for f in files: query_string = 'sha1_git:' + f['target'] f['url'] = reverse('browse-content', url_args={'query_string': query_string}, query_params={'path': root_sha1_git + '/' + path + f['name'], 'origin': origin_url}) if f['length'] is not None: sum_file_sizes += f['length'] f['length'] = filesizeformat(f['length']) if f['name'].lower().startswith('readme'): readmes[f['name']] = f['checksums']['sha1'] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = {'id': sha1_git, 'number of regular files': len(files), 'number of subdirectories': len(dirs), 'sum of regular file sizes': sum_file_sizes} vault_cooking = { 'directory_context': True, 'directory_id': sha1_git, 'revision_context': False, 'revision_id': None } swh_ids = get_swh_persistent_ids([{'type': 'directory', 'id': sha1_git}]) heading = 'Directory - %s' % sha1_git if breadcrumbs: dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/' heading += ' - %s' % dir_path return render(request, 'browse/directory.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Directory', 'swh_object_metadata': dir_metadata, 'dirs': dirs, 'files': files, 'breadcrumbs': breadcrumbs, 'top_right_link': None, 'readme_name': readme_name, 'readme_url': readme_url, 'readme_html': readme_html, 'snapshot_context': snapshot_context, 'vault_cooking': vault_cooking, 'show_actions_menu': True, 'swh_ids': swh_ids}) diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py index b9645151..33dd61ae 100644 --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -1,212 +1,213 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from swh.web.common import service from swh.web.common.utils import ( reverse, format_utc_iso_date ) from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route from swh.web.browse.utils import ( gen_person_link, gen_revision_link, get_snapshot_context, gen_link, gen_snapshot_link, get_swh_persistent_ids ) @browse_route(r'release/(?P[0-9a-f]+)/', - view_name='browse-release') + view_name='browse-release', + checksum_args=['sha1_git']) def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release identified by its id. The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ try: release = service.lookup_release(sha1_git) snapshot_context = None origin_info = None snapshot_id = request.GET.get('snapshot_id', None) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) timestamp = request.GET.get('timestamp', None) visit_id = request.GET.get('visit_id', None) if origin_url: try: snapshot_context = \ get_snapshot_context(snapshot_id, origin_type, origin_url, timestamp, visit_id) except Exception: raw_rel_url = reverse('browse-release', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a release ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the release ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_rel_url))) raise NotFoundExc(error_message) origin_info = snapshot_context['origin_info'] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) except Exception as exc: return handle_view_exception(request, exc) release_data = {} author_name = 'None' release_data['author'] = 'None' if release['author']: author_name = release['author']['name'] or \ release['author']['fullname'] release_data['author'] = \ gen_person_link(release['author']['id'], author_name, snapshot_context) release_data['date'] = format_utc_iso_date(release['date']) release_data['id'] = sha1_git release_data['name'] = release['name'] release_data['synthetic'] = release['synthetic'] release_data['target type'] = release['target_type'] if release['target_type'] == 'revision': release_data['target'] = \ gen_revision_link(release['target'], snapshot_context=snapshot_context) elif release['target_type'] == 'content': content_url = \ reverse('browse-content', url_args={'query_string': 'sha1_git:' + release['target']}) release_data['target'] = gen_link(content_url, release['target']) elif release['target_type'] == 'directory': directory_url = \ reverse('browse-directory', url_args={'sha1_git': release['target']}) release_data['target'] = gen_link(directory_url, release['target']) elif release['target_type'] == 'release': release_url = \ reverse('browse-release', url_args={'sha1_git': release['target']}) release_data['target'] = gen_link(release_url, release['target']) release_note_lines = [] if release['message']: release_note_lines = release['message'].split('\n') vault_cooking = None query_params = {} if snapshot_id: query_params = {'snapshot_id': snapshot_id} elif origin_info: query_params = {'origin': origin_info['url']} target_url = '' if release['target_type'] == 'revision': target_url = reverse('browse-revision', url_args={'sha1_git': release['target']}, query_params=query_params) try: revision = service.lookup_revision(release['target']) vault_cooking = { 'directory_context': True, 'directory_id': revision['directory'], 'revision_context': True, 'revision_id': release['target'] } except Exception: pass elif release['target_type'] == 'directory': target_url = reverse('browse-directory', url_args={'sha1_git': release['target']}, query_params=query_params) try: revision = service.lookup_directory(release['target']) vault_cooking = { 'directory_context': True, 'directory_id': revision['directory'], 'revision_context': False, 'revision_id': None } except Exception: pass elif release['target_type'] == 'content': target_url = reverse('browse-content', url_args={'query_string': release['target']}, query_params=query_params) elif release['target_type'] == 'release': target_url = reverse('browse-release', url_args={'sha1_git': release['target']}, query_params=query_params) release['target_url'] = target_url if snapshot_context: release_data['snapshot id'] = snapshot_context['snapshot_id'] if origin_info: release_url = reverse('browse-release', url_args={'sha1_git': release['id']}) release_data['context-independent release'] = \ gen_link(release_url, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) release_data['origin id'] = origin_info['id'] release_data['origin type'] = origin_info['type'] release_data['origin url'] = gen_link(origin_info['url'], origin_info['url']) browse_snapshot_link = \ gen_snapshot_link(snapshot_context['snapshot_id'], link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) release_data['snapshot'] = browse_snapshot_link swh_objects = [{'type': 'release', 'id': sha1_git}] if snapshot_context: snapshot_id = snapshot_context['snapshot_id'] if snapshot_id: swh_objects.append({'type': 'snapshot', 'id': snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) note_header = 'None' if len(release_note_lines) > 0: note_header = release_note_lines[0] release['note_header'] = note_header release['note_body'] = '\n'.join(release_note_lines[1:]) heading = 'Release - %s' % release['name'] if snapshot_context: context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] if origin_info: context_found = 'origin: %s' % origin_info['url'] heading += ' - %s' % context_found return render(request, 'browse/release.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Release', 'swh_object_metadata': release_data, 'release': release, 'snapshot_context': snapshot_context, 'show_actions_menu': True, 'breadcrumbs': None, 'vault_cooking': vault_cooking, 'top_right_link': None, 'swh_ids': swh_ids}) diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py index 3f68ddff..ae41c8ff 100644 --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -1,555 +1,558 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import json import textwrap from django.http import HttpResponse from django.shortcuts import render, redirect from django.template.defaultfilters import filesizeformat from django.utils.html import escape from django.utils.safestring import mark_safe from swh.model.identifiers import persistent_identifier from swh.web.common import service from swh.web.common.utils import ( reverse, format_utc_iso_date, gen_path_info, swh_object_icons ) from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route from swh.web.browse.utils import ( gen_link, gen_person_link, gen_revision_link, gen_revision_url, get_snapshot_context, gen_snapshot_directory_link, get_revision_log_url, get_directory_entries, gen_directory_link, request_content, prepare_content_for_display, content_display_max_size, gen_snapshot_link, get_readme_to_display, get_swh_persistent_ids, format_log_entries ) def _gen_content_url(revision, query_string, path, snapshot_context): if snapshot_context: url_args = snapshot_context['url_args'] url_args['path'] = path query_params = snapshot_context['query_params'] query_params['revision'] = revision['id'] content_url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) else: content_path = '%s/%s' % (revision['directory'], path) content_url = reverse('browse-content', url_args={'query_string': query_string}, query_params={'path': content_path}) return content_url def _gen_diff_link(idx, diff_anchor, link_text): if idx < _max_displayed_file_diffs: return gen_link(diff_anchor, link_text) else: return link_text # TODO: put in conf _max_displayed_file_diffs = 1000 def _gen_revision_changes_list(revision, changes, snapshot_context): """ Returns a HTML string describing the file changes introduced in a revision. As this string will be displayed in the browse revision view, links to adequate file diffs are also generated. Args: revision (str): hexadecimal representation of a revision identifier changes (list): list of file changes in the revision snapshot_context (dict): optional origin context used to reverse the content urls Returns: A string to insert in a revision HTML view. """ changes_msg = [] for i, change in enumerate(changes): hasher = hashlib.sha1() from_query_string = '' to_query_string = '' diff_id = 'diff-' if change['from']: from_query_string = 'sha1_git:' + change['from']['target'] diff_id += change['from']['target'] + '-' + change['from_path'] diff_id += '-' if change['to']: to_query_string = 'sha1_git:' + change['to']['target'] diff_id += change['to']['target'] + change['to_path'] change['path'] = change['to_path'] or change['from_path'] url_args = {'from_query_string': from_query_string, 'to_query_string': to_query_string} query_params = {'path': change['path']} change['diff_url'] = reverse('diff-contents', url_args=url_args, query_params=query_params) hasher.update(diff_id.encode('utf-8')) diff_id = hasher.hexdigest() change['id'] = diff_id panel_diff_link = '#panel_' + diff_id if change['type'] == 'modify': change['content_url'] = \ _gen_content_url(revision, to_query_string, change['to_path'], snapshot_context) changes_msg.append('modified: %s' % _gen_diff_link(i, panel_diff_link, change['to_path'])) elif change['type'] == 'insert': change['content_url'] = \ _gen_content_url(revision, to_query_string, change['to_path'], snapshot_context) changes_msg.append('new file: %s' % _gen_diff_link(i, panel_diff_link, change['to_path'])) elif change['type'] == 'delete': parent = service.lookup_revision(revision['parents'][0]) change['content_url'] = \ _gen_content_url(parent, from_query_string, change['from_path'], snapshot_context) changes_msg.append('deleted: %s' % _gen_diff_link(i, panel_diff_link, change['from_path'])) elif change['type'] == 'rename': change['content_url'] = \ _gen_content_url(revision, to_query_string, change['to_path'], snapshot_context) link_text = change['from_path'] + ' → ' + change['to_path'] changes_msg.append('renamed: %s' % _gen_diff_link(i, panel_diff_link, link_text)) if not changes: changes_msg.append('No changes') return mark_safe('\n'.join(changes_msg)) @browse_route(r'revision/(?P[0-9a-f]+)/diff/', - view_name='diff-revision') + view_name='diff-revision', + checksum_args=['sha1_git']) def _revision_diff(request, sha1_git): """ Browse internal endpoint to compute revision diff """ try: revision = service.lookup_revision(sha1_git) snapshot_context = None origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) timestamp = request.GET.get('timestamp', None) visit_id = request.GET.get('visit_id', None) if origin_url: snapshot_context = get_snapshot_context(None, origin_type, origin_url, timestamp, visit_id) except Exception as exc: return handle_view_exception(request, exc) changes = service.diff_revision(sha1_git) changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context) diff_data = { 'total_nb_changes': len(changes), 'changes': changes[:_max_displayed_file_diffs], 'changes_msg': changes_msg } diff_data_json = json.dumps(diff_data, separators=(',', ': ')) return HttpResponse(diff_data_json, content_type='application/json') NB_LOG_ENTRIES = 100 @browse_route(r'revision/(?P[0-9a-f]+)/log/', - view_name='browse-revision-log') + view_name='browse-revision-log', + checksum_args=['sha1_git']) def revision_log_browse(request, sha1_git): """ Django view that produces an HTML display of the history log for a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/` """ # noqa try: per_page = int(request.GET.get('per_page', NB_LOG_ENTRIES)) offset = int(request.GET.get('offset', 0)) revs_ordering = request.GET.get('revs_ordering', 'committer_date') session_key = 'rev_%s_log_ordering_%s' % (sha1_git, revs_ordering) rev_log_session = request.session.get(session_key, None) rev_log = [] revs_walker_state = None if rev_log_session: rev_log = rev_log_session['rev_log'] revs_walker_state = rev_log_session['revs_walker_state'] if len(rev_log) < offset+per_page: revs_walker = \ service.get_revisions_walker(revs_ordering, sha1_git, max_revs=offset+per_page+1, state=revs_walker_state) rev_log += list(revs_walker) revs_walker_state = revs_walker.export_state() revision_log = rev_log[offset:offset+per_page] request.session[session_key] = { 'rev_log': rev_log, 'revs_walker_state': revs_walker_state } except Exception as exc: return handle_view_exception(request, exc) revs_ordering = request.GET.get('revs_ordering', '') prev_log_url = None if len(rev_log) > offset + per_page: prev_log_url = reverse('browse-revision-log', url_args={'sha1_git': sha1_git}, query_params={'per_page': per_page, 'offset': offset + per_page, 'revs_ordering': revs_ordering}) next_log_url = None if offset != 0: next_log_url = reverse('browse-revision-log', url_args={'sha1_git': sha1_git}, query_params={'per_page': per_page, 'offset': offset - per_page, 'revs_ordering': revs_ordering}) revision_log_data = format_log_entries(revision_log, per_page) swh_rev_id = persistent_identifier('revision', sha1_git) return render(request, 'browse/revision-log.html', {'heading': 'Revision history', 'swh_object_id': swh_rev_id, 'swh_object_name': 'Revisions history', 'swh_object_metadata': None, 'revision_log': revision_log_data, 'revs_ordering': revs_ordering, 'next_log_url': next_log_url, 'prev_log_url': prev_log_url, 'breadcrumbs': None, 'top_right_link': None, 'snapshot_context': None, 'vault_cooking': None, 'show_actions_menu': True, 'swh_ids': None}) @browse_route(r'revision/(?P[0-9a-f]+)/', r'revision/(?P[0-9a-f]+)/(?P.+)/', - view_name='browse-revision') + view_name='browse-revision', + checksum_args=['sha1_git']) def revision_browse(request, sha1_git, extra_path=None): """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ try: revision = service.lookup_revision(sha1_git) # some readme files can reference assets reachable from the # browsed directory, handle that special case in order to # correctly displayed them if extra_path: dir_info = \ service.lookup_directory_with_path(revision['directory'], extra_path) if dir_info and dir_info['type'] == 'file': file_raw_url = reverse( 'browse-content-raw', url_args={'query_string': dir_info['checksums']['sha1']}) return redirect(file_raw_url) origin_info = None snapshot_context = None origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) timestamp = request.GET.get('timestamp', None) visit_id = request.GET.get('visit_id', None) snapshot_id = request.GET.get('snapshot_id', None) path = request.GET.get('path', None) dir_id = None dirs, files = None, None content_data = None if origin_url: try: snapshot_context = get_snapshot_context(None, origin_type, origin_url, timestamp, visit_id) except Exception: raw_rev_url = reverse('browse-revision', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a revision ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the revision ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_rev_url))) raise NotFoundExc(error_message) origin_info = snapshot_context['origin_info'] snapshot_id = snapshot_context['snapshot_id'] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) if path: file_info = \ service.lookup_directory_with_path(revision['directory'], path) if file_info['type'] == 'dir': dir_id = file_info['target'] else: query_string = 'sha1_git:' + file_info['target'] content_data = request_content(query_string, raise_if_unavailable=False) else: dir_id = revision['directory'] if dir_id: path = '' if path is None else (path + '/') dirs, files = get_directory_entries(dir_id) except Exception as exc: return handle_view_exception(request, exc) revision_data = {} author_name = 'None' revision_data['author'] = 'None' if revision['author']: author_name = revision['author']['name'] or \ revision['author']['fullname'] revision_data['author'] = \ gen_person_link(revision['author']['id'], author_name, snapshot_context) revision_data['committer'] = 'None' if revision['committer']: revision_data['committer'] = \ gen_person_link(revision['committer']['id'], revision['committer']['name'], snapshot_context) revision_data['committer date'] = \ format_utc_iso_date(revision['committer_date']) revision_data['date'] = format_utc_iso_date(revision['date']) if snapshot_context: revision_data['snapshot id'] = snapshot_id revision_data['directory'] = \ gen_snapshot_directory_link(snapshot_context, sha1_git, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', # noqa 'role': 'button'}) else: revision_data['directory'] = \ gen_directory_link(revision['directory'], link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['id'] = sha1_git revision_data['merge'] = revision['merge'] revision_data['metadata'] = escape(json.dumps(revision['metadata'], sort_keys=True, indent=4, separators=(',', ': '))) if origin_info: revision_data['context-independent revision'] = \ gen_revision_link(sha1_git, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['origin id'] = origin_info['id'] revision_data['origin type'] = origin_info['type'] revision_data['origin url'] = gen_link(origin_info['url'], origin_info['url']) browse_snapshot_link = \ gen_snapshot_link(snapshot_id, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['snapshot'] = browse_snapshot_link parents = '' for p in revision['parents']: parent_link = gen_revision_link(p, snapshot_context=snapshot_context) parents += parent_link + '
' revision_data['parents'] = mark_safe(parents) revision_data['synthetic'] = revision['synthetic'] revision_data['type'] = revision['type'] message_lines = ['None'] if revision['message']: message_lines = revision['message'].split('\n') parents = [] for p in revision['parents']: parent_url = gen_revision_url(p, snapshot_context) parents.append({'id': p, 'url': parent_url}) path_info = gen_path_info(path) query_params = {'snapshot_id': snapshot_id, 'origin_type': origin_type, 'origin': origin_url, 'timestamp': timestamp, 'visit_id': visit_id} breadcrumbs = [] breadcrumbs.append({'name': revision['directory'][:7], 'url': reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params)}) for pi in path_info: query_params['path'] = pi['path'] breadcrumbs.append({'name': pi['name'], 'url': reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params)}) vault_cooking = { 'directory_context': False, 'directory_id': None, 'revision_context': True, 'revision_id': sha1_git } swh_objects = [{'type': 'revision', 'id': sha1_git}] content = None content_size = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} error_code = 200 error_message = '' error_description = '' if content_data: breadcrumbs[-1]['url'] = None content_size = content_data['length'] mimetype = content_data['mimetype'] if content_data['raw_data']: content_display_data = prepare_content_for_display( content_data['raw_data'], content_data['mimetype'], path) content = content_display_data['content_data'] language = content_display_data['language'] mimetype = content_display_data['mimetype'] query_params = {} if path: filename = path_info[-1]['name'] query_params['filename'] = path_info[-1]['name'] revision_data['filename'] = filename top_right_link = { 'url': reverse('browse-content-raw', url_args={'query_string': query_string}, query_params=query_params), 'icon': swh_object_icons['content'], 'text': 'Raw File' } swh_objects.append({'type': 'content', 'id': file_info['target']}) error_code = content_data['error_code'] error_message = content_data['error_message'] error_description = content_data['error_description'] else: for d in dirs: if d['type'] == 'rev': d['url'] = reverse('browse-revision', url_args={'sha1_git': d['target']}) else: query_params['path'] = path + d['name'] d['url'] = reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) for f in files: query_params['path'] = path + f['name'] f['url'] = reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) if f['length'] is not None: f['length'] = filesizeformat(f['length']) if f['name'].lower().startswith('readme'): readmes[f['name']] = f['checksums']['sha1'] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { 'url': get_revision_log_url(sha1_git, snapshot_context), 'icon': swh_object_icons['revisions history'], 'text': 'History' } vault_cooking['directory_context'] = True vault_cooking['directory_id'] = dir_id swh_objects.append({'type': 'directory', 'id': dir_id}) diff_revision_url = reverse('diff-revision', url_args={'sha1_git': sha1_git}, query_params={'origin_type': origin_type, 'origin': origin_url, 'timestamp': timestamp, 'visit_id': visit_id}) if snapshot_id: swh_objects.append({'type': 'snapshot', 'id': snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) heading = 'Revision - %s - %s' %\ (sha1_git[:7], textwrap.shorten(message_lines[0], width=70)) if snapshot_context: context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] if origin_info: context_found = 'origin: %s' % origin_info['url'] heading += ' - %s' % context_found return render(request, 'browse/revision.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Revision', 'swh_object_metadata': revision_data, 'message_header': message_lines[0], 'message_body': '\n'.join(message_lines[1:]), 'parents': parents, 'snapshot_context': snapshot_context, 'dirs': dirs, 'files': files, 'content': content, 'content_size': content_size, 'max_content_size': content_display_max_size, 'mimetype': mimetype, 'language': language, 'readme_name': readme_name, 'readme_url': readme_url, 'readme_html': readme_html, 'breadcrumbs': breadcrumbs, 'top_right_link': top_right_link, 'vault_cooking': vault_cooking, 'diff_revision_url': diff_revision_url, 'show_actions_menu': True, 'swh_ids': swh_ids, 'error_code': error_code, 'error_message': error_message, 'error_description': error_description}, status=error_code) diff --git a/swh/web/browse/views/snapshot.py b/swh/web/browse/views/snapshot.py index f79afe65..61dc015a 100644 --- a/swh/web/browse/views/snapshot.py +++ b/swh/web/browse/views/snapshot.py @@ -1,91 +1,97 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import redirect from swh.web.browse.browseurls import browse_route from swh.web.common.utils import reverse from .utils.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases ) @browse_route(r'snapshot/(?P[0-9a-f]+)/', - view_name='browse-snapshot') + view_name='browse-snapshot', + checksum_args=['snapshot_id']) def snapshot_browse(request, snapshot_id): """Django view for browsing the content of a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/` """ browse_snapshot_url = reverse('browse-snapshot-directory', url_args={'snapshot_id': snapshot_id}, query_params=request.GET) return redirect(browse_snapshot_url) @browse_route(r'snapshot/(?P[0-9a-f]+)/directory/', r'snapshot/(?P[0-9a-f]+)/directory/(?P.+)/', - view_name='browse-snapshot-directory') + view_name='browse-snapshot-directory', + checksum_args=['snapshot_id']) def snapshot_directory_browse(request, snapshot_id, path=None): """Django view for browsing the content of a directory collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/directory/[(path)/]` """ # noqa origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) return browse_snapshot_directory(request, snapshot_id=snapshot_id, path=path, origin_type=origin_type, origin_url=origin_url) @browse_route(r'snapshot/(?P[0-9a-f]+)/content/(?P.+)/', - view_name='browse-snapshot-content') + view_name='browse-snapshot-content', + checksum_args=['snapshot_id']) def snapshot_content_browse(request, snapshot_id, path): """Django view that produces an HTML display of a content collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/content/(path)/` """ # noqa return browse_snapshot_content(request, snapshot_id=snapshot_id, path=path) @browse_route(r'snapshot/(?P[0-9a-f]+)/log/', - view_name='browse-snapshot-log') + view_name='browse-snapshot-log', + checksum_args=['snapshot_id']) def snapshot_log_browse(request, snapshot_id): """Django view that produces an HTML display of revisions history (aka the commit log) collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/log/` """ # noqa return browse_snapshot_log(request, snapshot_id=snapshot_id) @browse_route(r'snapshot/(?P[0-9a-f]+)/branches/', - view_name='browse-snapshot-branches') + view_name='browse-snapshot-branches', + checksum_args=['snapshot_id']) def snapshot_branches_browse(request, snapshot_id): """Django view that produces an HTML display of the list of releases collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/branches/` """ # noqa return browse_snapshot_branches(request, snapshot_id=snapshot_id) @browse_route(r'snapshot/(?P[0-9a-f]+)/releases/', - view_name='browse-snapshot-releases') + view_name='browse-snapshot-releases', + checksum_args=['snapshot_id']) def snapshot_releases_browse(request, snapshot_id): """Django view that produces an HTML display of the list of releases collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/releases/` """ # noqa return browse_snapshot_releases(request, snapshot_id=snapshot_id) diff --git a/swh/web/common/urlsindex.py b/swh/web/common/urlsindex.py index 6e44db7a..047314e4 100644 --- a/swh/web/common/urlsindex.py +++ b/swh/web/common/urlsindex.py @@ -1,48 +1,76 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url +from django.shortcuts import redirect class UrlsIndex(object): """ Simple helper class for centralizing url patterns of a Django web application. Derived classes should override the 'scope' class attribute otherwise all declared patterns will be grouped under the default one. """ _urlpatterns = {} scope = 'default' @classmethod - def add_url_pattern(cls, url_pattern, view, view_name): + def add_url_pattern(cls, url_pattern, view, view_name=None): """ Class method that adds an url pattern to the current scope. Args: url_pattern: regex describing a Django url view: function implementing the Django view view_name: name of the view used to reverse the url """ if cls.scope not in cls._urlpatterns: cls._urlpatterns[cls.scope] = [] if view_name: cls._urlpatterns[cls.scope].append(url(url_pattern, view, name=view_name)) else: cls._urlpatterns[cls.scope].append(url(url_pattern, view)) + @classmethod + def add_redirect_for_checksum_args(cls, view_name, url_patterns, + checksum_args): + """ + Class method that redirects to view with lowercase checksums + when upper/mixed case checksums are passed as url arguments. + + Args: + view_name (str): name of the view to redirect requests + url_patterns (List[str]): regexps describing the view urls + checksum_args (List[str]): url argument names corresponding + to checksum values + """ + new_view_name = view_name+'-uppercase-checksum' + for url_pattern in url_patterns: + url_pattern_upper = url_pattern.replace('[0-9a-f]', + '[0-9a-fA-F]') + + def view_redirect(request, *args, **kwargs): + for checksum_arg in checksum_args: + checksum_upper = kwargs[checksum_arg] + kwargs[checksum_arg] = checksum_upper.lower() + return redirect(view_name, *args, **kwargs) + + cls.add_url_pattern(url_pattern_upper, view_redirect, + new_view_name) + @classmethod def get_url_patterns(cls): """ Class method that returns the list of url pattern associated to the current scope. Returns: The list of url patterns associated to the current scope """ return cls._urlpatterns[cls.scope] diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py index 96e253bd..ae402069 100644 --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -1,375 +1,388 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from hypothesis import given from rest_framework.test import APITestCase from swh.web.common.utils import reverse from swh.web.tests.strategies import ( content, unknown_content, contents_with_ctags ) from swh.web.tests.testcase import ( WebTestCase, ctags_json_missing, fossology_missing ) class ContentApiTestCase(WebTestCase, APITestCase): @given(content()) def test_api_content_filetype(self, content): self.content_add_mimetype(content['sha1']) url = reverse('api-content-filetype', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') content_url = reverse('api-content', url_args={'q': 'sha1:%s' % content['sha1']}) expected_data = self.content_get_mimetype(content['sha1']) expected_data['content_url'] = content_url self.assertEqual(rv.data, expected_data) @given(unknown_content()) def test_api_content_filetype_sha_not_found(self, unknown_content): url = reverse('api-content-filetype', url_args={'q': 'sha1:%s' % unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No filetype information found for content ' 'sha1:%s.' % unknown_content['sha1'] }) @given(content()) def test_api_content_language(self, content): self.content_add_language(content['sha1']) url = reverse('api-content-language', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') content_url = reverse('api-content', url_args={'q': 'sha1:%s' % content['sha1']}) expected_data = self.content_get_language(content['sha1']) expected_data['content_url'] = content_url self.assertEqual(rv.data, expected_data) @given(unknown_content()) def test_api_content_language_sha_not_found(self, unknown_content): url = reverse('api-content-language', url_args={'q': 'sha1:%s' % unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No language information found for content ' 'sha1:%s.' % unknown_content['sha1'] }) @pytest.mark.skipif(ctags_json_missing, reason="requires ctags with json output support") @given(contents_with_ctags()) def test_api_content_symbol(self, contents_with_ctags): expected_data = {} for content_sha1 in contents_with_ctags['sha1s']: self.content_add_ctags(content_sha1) for ctag in self.content_get_ctags(content_sha1): if ctag['name'] == contents_with_ctags['symbol_name']: expected_data[content_sha1] = ctag break url = reverse('api-content-symbol', url_args={'q': contents_with_ctags['symbol_name']}, query_params={'per_page': 100}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') for entry in rv.data: content_sha1 = entry['sha1'] expected_entry = expected_data[content_sha1] for key, view_name in (('content_url', 'api-content'), ('data_url', 'api-content-raw'), ('license_url', 'api-content-license'), ('language_url', 'api-content-language'), ('filetype_url', 'api-content-filetype')): expected_entry[key] = reverse(view_name, url_args={'q': 'sha1:%s' % content_sha1}) expected_entry['sha1'] = content_sha1 del expected_entry['id'] self.assertEqual(entry, expected_entry) self.assertFalse('Link' in rv) url = reverse('api-content-symbol', url_args={'q': contents_with_ctags['symbol_name']}, query_params={'per_page': 2}) rv = self.client.get(url) next_url = reverse('api-content-symbol', url_args={'q': contents_with_ctags['symbol_name']}, query_params={'last_sha1': rv.data[1]['sha1'], 'per_page': 2}) self.assertEqual(rv['Link'], '<%s>; rel="next"' % next_url) def test_api_content_symbol_not_found(self): url = reverse('api-content-symbol', url_args={'q': 'bar'}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No indexed raw content match expression \'bar\'.' }) self.assertFalse('Link' in rv) @pytest.mark.skipif(ctags_json_missing, reason="requires ctags with json output support") @given(content()) def test_api_content_ctags(self, content): self.content_add_ctags(content['sha1']) url = reverse('api-content-ctags', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') content_url = reverse('api-content', url_args={'q': 'sha1:%s' % content['sha1']}) expected_data = list(self.content_get_ctags(content['sha1'])) for e in expected_data: e['content_url'] = content_url self.assertEqual(rv.data, expected_data) @pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") @given(content()) def test_api_content_license(self, content): self.content_add_license(content['sha1']) url = reverse('api-content-license', url_args={'q': 'sha1_git:%s' % content['sha1_git']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') content_url = reverse('api-content', url_args={'q': 'sha1:%s' % content['sha1']}) expected_data = self.content_get_license(content['sha1']) expected_data['content_url'] = content_url self.assertEqual(rv.data, expected_data) @given(unknown_content()) def test_api_content_license_sha_not_found(self, unknown_content): url = reverse('api-content-license', url_args={'q': 'sha1:%s' % unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No license information found for content ' 'sha1:%s.' % unknown_content['sha1'] }) @given(content()) def test_api_content_metadata(self, content): url = reverse('api-content', {'q': 'sha1:%s' % content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = self.content_get_metadata(content['sha1']) for key, view_name in (('data_url', 'api-content-raw'), ('license_url', 'api-content-license'), ('language_url', 'api-content-language'), ('filetype_url', 'api-content-filetype')): expected_data[key] = reverse(view_name, url_args={'q': 'sha1:%s' % content['sha1']}) self.assertEqual(rv.data, expected_data) @given(unknown_content()) def test_api_content_not_found_as_json(self, unknown_content): url = reverse('api-content', url_args={'q': 'sha1:%s' % unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Content with sha1 checksum equals to %s not found!' % unknown_content['sha1'] }) @given(unknown_content()) def test_api_content_not_found_as_yaml(self, unknown_content): url = reverse('api-content', url_args={'q': 'sha256:%s' % unknown_content['sha256']}) rv = self.client.get(url, HTTP_ACCEPT='application/yaml') self.assertEqual(rv.status_code, 404) self.assertTrue('application/yaml' in rv['Content-Type']) self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Content with sha256 checksum equals to %s not found!' % unknown_content['sha256'] }) @given(unknown_content()) def test_api_content_raw_ko_not_found(self, unknown_content): url = reverse('api-content-raw', url_args={'q': 'sha1:%s' % unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Content with sha1 checksum equals to %s not found!' % unknown_content['sha1'] }) @given(content()) def test_api_content_raw_text(self, content): url = reverse('api-content-raw', url_args={'q': 'sha1:%s' % content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/octet-stream') self.assertEqual( rv['Content-disposition'], 'attachment; filename=content_sha1_%s_raw' % content['sha1']) self.assertEqual( rv['Content-Type'], 'application/octet-stream') expected_data = self.content_get(content['sha1']) self.assertEqual(rv.content, expected_data['data']) @given(content()) def test_api_content_raw_text_with_filename(self, content): url = reverse('api-content-raw', url_args={'q': 'sha1:%s' % content['sha1']}, query_params={'filename': 'filename.txt'}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/octet-stream') self.assertEqual( rv['Content-disposition'], 'attachment; filename=filename.txt') self.assertEqual( rv['Content-Type'], 'application/octet-stream') expected_data = self.content_get(content['sha1']) self.assertEqual(rv.content, expected_data['data']) @given(content()) def test_api_check_content_known(self, content): url = reverse('api-content-known', url_args={'q': content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} }) @given(content()) def test_api_check_content_known_as_yaml(self, content): url = reverse('api-content-known', url_args={'q': content['sha1']}) rv = self.client.get(url, HTTP_ACCEPT='application/yaml') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/yaml') self.assertEqual(rv.data, { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} }) @given(content()) def test_api_check_content_known_post_as_yaml(self, content): url = reverse('api-content-known') rv = self.client.post( url, data={ 'q': content['sha1'] }, HTTP_ACCEPT='application/yaml' ) self.assertEqual(rv.status_code, 200) self.assertTrue('application/yaml' in rv['Content-Type']) self.assertEqual(rv.data, { 'search_res': [ { 'found': True, 'sha1': content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 100.0} }) @given(unknown_content()) def test_api_check_content_known_not_found(self, unknown_content): url = reverse('api-content-known', url_args={'q': unknown_content['sha1']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'search_res': [ { 'found': False, 'sha1': unknown_content['sha1'] } ], 'search_stats': {'nbfiles': 1, 'pct': 0.0} }) + + @given(content()) + def test_api_content_uppercase(self, content): + url = reverse('api-content-uppercase-checksum', + url_args={'q': content['sha1'].upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-content', + url_args={'q': content['sha1']}) + + self.assertEqual(resp['location'], redirect_url) diff --git a/swh/web/tests/api/views/test_directory.py b/swh/web/tests/api/views/test_directory.py index d0371d24..61d804a1 100644 --- a/swh/web/tests/api/views/test_directory.py +++ b/swh/web/tests/api/views/test_directory.py @@ -1,92 +1,105 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from rest_framework.test import APITestCase from swh.web.common.utils import reverse from swh.web.tests.strategies import directory, unknown_directory from swh.web.tests.testcase import WebTestCase class DirectoryApiTestCase(WebTestCase, APITestCase): @given(directory()) def test_api_directory(self, directory): url = reverse('api-directory', url_args={'sha1_git': directory}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = list(map(self._enrich_dir_data, self.directory_ls(directory))) self.assertEqual(rv.data, expected_data) @given(unknown_directory()) def test_api_directory_not_found(self, unknown_directory): url = reverse('api-directory', url_args={'sha1_git': unknown_directory}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Directory with sha1_git %s not found' % unknown_directory}) @given(directory()) def test_api_directory_with_path_found(self, directory): directory_content = self.directory_ls(directory) path = random.choice(directory_content) url = reverse('api-directory', url_args={'sha1_git': directory, 'path': path['name']}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, self._enrich_dir_data(path)) @given(directory()) def test_api_directory_with_path_not_found(self, directory): path = 'some/path/to/nonexistent/dir/' url = reverse('api-directory', url_args={'sha1_git': directory, 'path': path}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': ('Directory entry with path %s from %s not found' % (path, directory))}) + @given(directory()) + def test_api_directory_uppercase(self, directory): + url = reverse('api-directory-uppercase-checksum', + url_args={'sha1_git': directory.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-directory', + url_args={'sha1_git': directory}) + + self.assertEqual(resp['location'], redirect_url) + @classmethod def _enrich_dir_data(cls, dir_data): if dir_data['type'] == 'file': dir_data['target_url'] = \ reverse('api-content', url_args={'q': 'sha1_git:%s' % dir_data['target']}) elif dir_data['type'] == 'dir': dir_data['target_url'] = \ reverse('api-directory', url_args={'sha1_git': dir_data['target']}) elif dir_data['type'] == 'rev': dir_data['target_url'] = \ reverse('api-revision', url_args={'sha1_git': dir_data['target']}) return dir_data diff --git a/swh/web/tests/api/views/test_release.py b/swh/web/tests/api/views/test_release.py index 6c64772e..86608346 100644 --- a/swh/web/tests/api/views/test_release.py +++ b/swh/web/tests/api/views/test_release.py @@ -1,111 +1,124 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from hypothesis import given from rest_framework.test import APITestCase from swh.model.hashutil import hash_to_bytes from swh.web.common.utils import reverse from swh.web.tests.strategies import ( release, unknown_release, sha1, content, directory ) from swh.web.tests.testcase import WebTestCase class ReleaseApiTestCase(WebTestCase, APITestCase): @given(release()) def test_api_release(self, release): url = reverse('api-release', url_args={'sha1_git': release}) rv = self.client.get(url) expected_release = self.release_get(release) author_id = expected_release['author']['id'] target_revision = expected_release['target'] author_url = reverse('api-person', url_args={'person_id': author_id}) target_url = reverse('api-revision', url_args={'sha1_git': target_revision}) expected_release['author_url'] = author_url expected_release['target_url'] = target_url self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_release) @given(sha1(), sha1(), sha1(), content(), directory(), release()) def test_api_release_target_type_not_a_revision(self, new_rel1, new_rel2, new_rel3, content, directory, release): for new_rel_id, target_type, target in ( (new_rel1, 'content', content), (new_rel2, 'directory', directory), (new_rel3, 'release', release)): if target_type == 'content': target = target['sha1_git'] sample_release = { 'author': { 'email': b'author@company.org', 'fullname': b'author ', 'name': b'author' }, 'date': { 'timestamp': int(datetime.now().timestamp()), 'offset': 0, 'negative_utc': False, }, 'id': hash_to_bytes(new_rel_id), 'message': b'sample release message', 'name': b'sample release', 'synthetic': False, 'target': hash_to_bytes(target), 'target_type': target_type } self.storage.release_add([sample_release]) url = reverse('api-release', url_args={'sha1_git': new_rel_id}) rv = self.client.get(url) expected_release = self.release_get(new_rel_id) author_id = expected_release['author']['id'] author_url = reverse('api-person', url_args={'person_id': author_id}) if target_type == 'content': url_args = {'q': 'sha1_git:%s' % target} else: url_args = {'sha1_git': target} target_url = reverse('api-%s' % target_type, url_args=url_args) expected_release['author_url'] = author_url expected_release['target_url'] = target_url self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_release) @given(unknown_release()) def test_api_release_not_found(self, unknown_release): url = reverse('api-release', url_args={'sha1_git': unknown_release}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Release with sha1_git %s not found.' % unknown_release }) + + @given(release()) + def test_api_release_uppercase(self, release): + url = reverse('api-release-uppercase-checksum', + url_args={'sha1_git': release.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-release-uppercase-checksum', + url_args={'sha1_git': release}) + + self.assertEqual(resp['location'], redirect_url) diff --git a/swh/web/tests/api/views/test_revision.py b/swh/web/tests/api/views/test_revision.py index 0c3fee09..c37e535e 100644 --- a/swh/web/tests/api/views/test_revision.py +++ b/swh/web/tests/api/views/test_revision.py @@ -1,526 +1,539 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from rest_framework.test import APITestCase from unittest.mock import patch from swh.model.hashutil import hash_to_hex from swh.web.common.exc import NotFoundExc from swh.web.common.utils import reverse, parse_timestamp from swh.web.tests.strategies import ( revision, unknown_revision, new_revision, unknown_origin_id, origin, origin_with_multiple_visits ) from swh.web.tests.testcase import WebTestCase class RevisionApiTestCase(WebTestCase, APITestCase): @given(revision()) def test_api_revision(self, revision): url = reverse('api-revision', url_args={'sha1_git': revision}) rv = self.client.get(url) expected_revision = self.revision_get(revision) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) @given(unknown_revision()) def test_api_revision_not_found(self, unknown_revision): url = reverse('api-revision', url_args={'sha1_git': unknown_revision}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision}) @given(revision()) def test_api_revision_raw_ok(self, revision): url = reverse('api-revision-raw-message', url_args={'sha1_git': revision}) rv = self.client.get(url) expected_message = self.revision_get(revision)['message'] self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/octet-stream') self.assertEqual(rv.content, expected_message.encode()) @given(new_revision()) def test_api_revision_raw_ok_no_msg(self, new_revision): del new_revision['message'] self.storage.revision_add([new_revision]) new_revision_id = hash_to_hex(new_revision['id']) url = reverse('api-revision-raw-message', url_args={'sha1_git': new_revision_id}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'No message for revision with sha1_git %s.' % new_revision_id}) @given(unknown_revision()) def test_api_revision_raw_ko_no_rev(self, unknown_revision): url = reverse('api-revision-raw-message', url_args={'sha1_git': unknown_revision}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision}) @given(unknown_origin_id()) def test_api_revision_with_origin_not_found(self, unknown_origin_id): url = reverse('api-revision-origin', url_args={'origin_id': unknown_origin_id}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Origin with id %s not found!' % unknown_origin_id}) @given(origin()) def test_api_revision_with_origin(self, origin): url = reverse('api-revision-origin', url_args={'origin_id': origin['id']}) rv = self.client.get(url) snapshot = self.snapshot_get_latest(origin['id']) expected_revision = self.revision_get( snapshot['branches']['HEAD']['target']) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) @given(origin()) def test_api_revision_with_origin_and_branch_name(self, origin): snapshot = self.snapshot_get_latest(origin['id']) branch_name = random.choice( list(b for b in snapshot['branches'].keys() if snapshot['branches'][b]['target_type'] == 'revision')) url = reverse('api-revision-origin', url_args={'origin_id': origin['id'], 'branch_name': branch_name}) rv = self.client.get(url) expected_revision = self.revision_get( snapshot['branches'][branch_name]['target']) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) @given(origin_with_multiple_visits()) def test_api_revision_with_origin_and_branch_name_and_ts(self, origin): visit = random.choice(self.origin_visit_get(origin['id'])) snapshot = self.snapshot_get(visit['snapshot']) branch_name = random.choice( list(b for b in snapshot['branches'].keys() if snapshot['branches'][b]['target_type'] == 'revision')) url = reverse('api-revision-origin', url_args={'origin_id': origin['id'], 'branch_name': branch_name, 'ts': visit['date']}) rv = self.client.get(url) expected_revision = self.revision_get( snapshot['branches'][branch_name]['target']) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) @given(origin_with_multiple_visits()) def test_api_revision_with_origin_and_branch_name_and_ts_escapes(self, origin): visit = random.choice(self.origin_visit_get(origin['id'])) snapshot = self.snapshot_get(visit['snapshot']) branch_name = random.choice( list(b for b in snapshot['branches'].keys() if snapshot['branches'][b]['target_type'] == 'revision')) date = parse_timestamp(visit['date']) formatted_date = date.strftime('Today is %B %d, %Y at %X') url = reverse('api-revision-origin', url_args={'origin_id': origin['id'], 'branch_name': branch_name, 'ts': formatted_date}) rv = self.client.get(url) expected_revision = self.revision_get( snapshot['branches'][branch_name]['target']) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision) @given(unknown_origin_id()) def test_api_directory_through_revision_origin_ko(self, unknown_origin_id): url = reverse('api-revision-origin-directory', url_args={'origin_id': unknown_origin_id}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Origin with id %s not found!' % unknown_origin_id }) @given(origin()) def test_api_directory_through_revision_origin(self, origin): url = reverse('api-revision-origin-directory', url_args={'origin_id': origin['id']}) rv = self.client.get(url) snapshot = self.snapshot_get_latest(origin['id']) revision_id = snapshot['branches']['HEAD']['target'] revision = self.revision_get(revision_id) directory = self.directory_ls(revision['directory']) for entry in directory: if entry['type'] == 'dir': entry['target_url'] = reverse( 'api-directory', url_args={'sha1_git': entry['target']} ) entry['dir_url'] = reverse( 'api-revision-origin-directory', url_args={'origin_id': origin['id'], 'path': entry['name']}) elif entry['type'] == 'file': entry['target_url'] = reverse( 'api-content', url_args={'q': 'sha1_git:%s' % entry['target']} ) entry['file_url'] = reverse( 'api-revision-origin-directory', url_args={'origin_id': origin['id'], 'path': entry['name']}) elif entry['type'] == 'rev': entry['target_url'] = reverse( 'api-revision', url_args={'sha1_git': entry['target']} ) entry['rev_url'] = reverse( 'api-revision-origin-directory', url_args={'origin_id': origin['id'], 'path': entry['name']}) expected_result = { 'content': directory, 'path': '.', 'revision': revision_id, 'type': 'dir' } self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_result) @given(revision()) def test_api_revision_log(self, revision): per_page = 10 url = reverse('api-revision-log', url_args={'sha1_git': revision}, query_params={'per_page': per_page}) rv = self.client.get(url) expected_log = self.revision_log(revision, limit=per_page+1) expected_log = list(map(self._enrich_revision, expected_log)) has_next = len(expected_log) > per_page self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_log[:-1] if has_next else expected_log) if has_next: self.assertIn('Link', rv) next_log_url = reverse( 'api-revision-log', url_args={'sha1_git': expected_log[-1]['id']}, query_params={'per_page': per_page}) self.assertIn(next_log_url, rv['Link']) @given(unknown_revision()) def test_api_revision_log_not_found(self, unknown_revision): url = reverse('api-revision-log', url_args={'sha1_git': unknown_revision}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Revision with sha1_git %s not found.' % unknown_revision}) self.assertFalse(rv.has_header('Link')) @given(revision()) def test_api_revision_log_context(self, revision): revisions = self.revision_log(revision, limit=4) prev_rev = revisions[0]['id'] rev = revisions[-1]['id'] per_page = 10 url = reverse('api-revision-log', url_args={'sha1_git': rev, 'prev_sha1s': prev_rev}, query_params={'per_page': per_page}) rv = self.client.get(url) expected_log = self.revision_log(rev, limit=per_page) prev_revision = self.revision_get(prev_rev) expected_log.insert(0, prev_revision) expected_log = list(map(self._enrich_revision, expected_log)) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_log) @given(origin()) def test_api_revision_log_by(self, origin): per_page = 10 url = reverse('api-revision-origin-log', url_args={'origin_id': origin['id']}, query_params={'per_page': per_page}) rv = self.client.get(url) snapshot = self.snapshot_get_latest(origin['id']) expected_log = self.revision_log( snapshot['branches']['HEAD']['target'], limit=per_page+1) expected_log = list(map(self._enrich_revision, expected_log)) has_next = len(expected_log) > per_page self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_log[:-1] if has_next else expected_log) if has_next: self.assertIn('Link', rv) next_log_url = reverse( 'api-revision-origin-log', url_args={'origin_id': origin['id'], 'branch_name': 'HEAD'}, query_params={'per_page': per_page, 'sha1_git': expected_log[-1]['id']}) self.assertIn(next_log_url, rv['Link']) @given(origin()) def test_api_revision_log_by_ko(self, origin): invalid_branch_name = 'foobar' url = reverse('api-revision-origin-log', url_args={'origin_id': origin['id'], 'branch_name': invalid_branch_name}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertFalse(rv.has_header('Link')) self.assertEqual( rv.data, {'exception': 'NotFoundExc', 'reason': 'Revision for origin %s and branch %s not found.' % (origin['id'], invalid_branch_name)}) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ko_not_found(self, mock_rev_dir): # given mock_rev_dir.side_effect = NotFoundExc('Not found') # then rv = self.client.get('/api/1/revision/999/directory/some/path/to/dir/') self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Not found'}) mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path/to/dir', '/api/1/revision/999/directory/some/path/to/dir/', with_data=False) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ok_returns_dir_entries(self, mock_rev_dir): stub_dir = { 'type': 'dir', 'revision': '999', 'content': [ { 'sha1_git': '789', 'type': 'file', 'target': '101', 'target_url': '/api/1/content/sha1_git:101/', 'name': 'somefile', 'file_url': '/api/1/revision/999/directory/some/path/' 'somefile/' }, { 'sha1_git': '123', 'type': 'dir', 'target': '456', 'target_url': '/api/1/directory/456/', 'name': 'to-subdir', 'dir_url': '/api/1/revision/999/directory/some/path/' 'to-subdir/', }] } # given mock_rev_dir.return_value = stub_dir # then rv = self.client.get('/api/1/revision/999/directory/some/path/') self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, stub_dir) mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path', '/api/1/revision/999/directory/some/path/', with_data=False) @patch('swh.web.api.views.revision._revision_directory_by') def test_api_revision_directory_ok_returns_content(self, mock_rev_dir): stub_content = { 'type': 'file', 'revision': '999', 'content': { 'sha1_git': '789', 'sha1': '101', 'data_url': '/api/1/content/101/raw/', } } # given mock_rev_dir.return_value = stub_content # then url = '/api/1/revision/666/directory/some/other/path/' rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, stub_content) mock_rev_dir.assert_called_once_with( {'sha1_git': '666'}, 'some/other/path', url, with_data=False) def _enrich_revision(self, revision): author_url = reverse( 'api-person', url_args={'person_id': revision['author']['id']}) committer_url = reverse( 'api-person', url_args={'person_id': revision['committer']['id']}) directory_url = reverse( 'api-directory', url_args={'sha1_git': revision['directory']}) history_url = reverse('api-revision-log', url_args={'sha1_git': revision['id']}) parents_id_url = [] for p in revision['parents']: parents_id_url.append({ 'id': p, 'url': reverse('api-revision', url_args={'sha1_git': p}) }) revision_url = reverse('api-revision', url_args={'sha1_git': revision['id']}) revision['author_url'] = author_url revision['committer_url'] = committer_url revision['directory_url'] = directory_url revision['history_url'] = history_url revision['url'] = revision_url revision['parents'] = parents_id_url return revision + + @given(revision()) + def test_api_revision_uppercase(self, revision): + url = reverse('api-revision-uppercase-checksum', + url_args={'sha1_git': revision.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-revision', + url_args={'sha1_git': revision}) + + self.assertEqual(resp['location'], redirect_url) diff --git a/swh/web/tests/api/views/test_snapshot.py b/swh/web/tests/api/views/test_snapshot.py index 7901204e..4934900c 100644 --- a/swh/web/tests/api/views/test_snapshot.py +++ b/swh/web/tests/api/views/test_snapshot.py @@ -1,160 +1,173 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from rest_framework.test import APITestCase from swh.web.common.utils import reverse from swh.web.tests.strategies import snapshot, unknown_snapshot from swh.web.tests.testcase import WebTestCase class SnapshotApiTestCase(WebTestCase, APITestCase): @given(snapshot()) def test_api_snapshot(self, snapshot): url = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = self.snapshot_get(snapshot) expected_data = self._enrich_snapshot(expected_data) self.assertEqual(rv.data, expected_data) @given(snapshot()) def test_api_snapshot_paginated(self, snapshot): branches_offset = 0 branches_count = 2 snapshot_branches = [] for k, v in sorted(self.snapshot_get(snapshot)['branches'].items()): snapshot_branches.append({ 'name': k, 'target_type': v['target_type'], 'target': v['target'] }) whole_snapshot = {'id': snapshot, 'branches': {}, 'next_branch': None} while branches_offset < len(snapshot_branches): branches_from = snapshot_branches[branches_offset]['name'] url = reverse('api-snapshot', url_args={'snapshot_id': snapshot}, query_params={'branches_from': branches_from, 'branches_count': branches_count}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') expected_data = self.snapshot_get_branches(snapshot, branches_from, branches_count) expected_data = self._enrich_snapshot(expected_data) branches_offset += branches_count if branches_offset < len(snapshot_branches): next_branch = snapshot_branches[branches_offset]['name'] expected_data['next_branch'] = next_branch else: expected_data['next_branch'] = None self.assertEqual(rv.data, expected_data) whole_snapshot['branches'].update(expected_data['branches']) if branches_offset < len(snapshot_branches): next_url = reverse( 'api-snapshot', url_args={'snapshot_id': snapshot}, query_params={'branches_from': next_branch, 'branches_count': branches_count}) self.assertEqual(rv['Link'], '<%s>; rel="next"' % next_url) else: self.assertFalse(rv.has_header('Link')) url = reverse('api-snapshot', url_args={'snapshot_id': snapshot}) rv = self.client.get(url) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, whole_snapshot) @given(snapshot()) def test_api_snapshot_filtered(self, snapshot): snapshot_branches = [] for k, v in sorted(self.snapshot_get(snapshot)['branches'].items()): snapshot_branches.append({ 'name': k, 'target_type': v['target_type'], 'target': v['target'] }) target_type = random.choice(snapshot_branches)['target_type'] url = reverse('api-snapshot', url_args={'snapshot_id': snapshot}, query_params={'target_types': target_type}) rv = self.client.get(url) expected_data = self.snapshot_get_branches( snapshot, target_types=target_type) expected_data = self._enrich_snapshot(expected_data) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_data) @given(unknown_snapshot()) def test_api_snapshot_errors(self, unknown_snapshot): url = reverse('api-snapshot', url_args={'snapshot_id': '63ce369'}) rv = self.client.get(url) self.assertEqual(rv.status_code, 400) url = reverse('api-snapshot', url_args={'snapshot_id': unknown_snapshot}) rv = self.client.get(url) self.assertEqual(rv.status_code, 404) def _enrich_snapshot(self, snapshot): def _get_branch_url(target_type, target): url = None if target_type == 'revision': url = reverse('api-revision', url_args={'sha1_git': target}) if target_type == 'release': url = reverse('api-release', url_args={'sha1_git': target}) return url for branch in snapshot['branches'].keys(): target = snapshot['branches'][branch]['target'] target_type = snapshot['branches'][branch]['target_type'] snapshot['branches'][branch]['target_url'] = \ _get_branch_url(target_type, target) for branch in snapshot['branches'].keys(): target = snapshot['branches'][branch]['target'] target_type = snapshot['branches'][branch]['target_type'] if target_type == 'alias': if target in snapshot['branches']: snapshot['branches'][branch]['target_url'] = \ snapshot['branches'][target]['target_url'] else: snp = self.snapshot_get_branches(snapshot['id'], branches_from=target, branches_count=1) alias_target = snp['branches'][target]['target'] alias_target_type = snp['branches'][target]['target_type'] snapshot['branches'][branch]['target_url'] = \ _get_branch_url(alias_target_type, alias_target) return snapshot + + @given(snapshot()) + def test_api_snapshot_uppercase(self, snapshot): + url = reverse('api-snapshot-uppercase-checksum', + url_args={'snapshot_id': snapshot.upper()}) + + resp = self.client.get(url) + self.assertEqual(resp.status_code, 302) + + redirect_url = reverse('api-snapshot-uppercase-checksum', + url_args={'snapshot_id': snapshot}) + + self.assertEqual(resp['location'], redirect_url) diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py index 47e3edd2..3c3766d6 100644 --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -1,350 +1,362 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest.mock import patch from django.utils.html import escape from hypothesis import given from swh.web.browse.utils import ( get_mimetype_and_encoding_for_content, prepare_content_for_display, _reencode_content ) from swh.web.common.exc import NotFoundExc from swh.web.common.utils import reverse, get_swh_persistent_id from swh.web.common.utils import gen_path_info from swh.web.tests.strategies import ( content, content_text_non_utf8, content_text_no_highlight, content_image_type, content_text, invalid_sha1, unknown_content ) from swh.web.tests.testcase import WebTestCase class SwhBrowseContentTest(WebTestCase): @given(content()) def test_content_view_text(self, content): sha1_git = content['sha1_git'] url = reverse('browse-content', url_args={'query_string': content['sha1']}, query_params={'path': content['path']}) url_raw = reverse('browse-content-raw', url_args={'query_string': content['sha1']}) resp = self.client.get(url) content_display = self._process_content_for_display(content) mimetype = content_display['mimetype'] self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/content.html') if mimetype.startswith('text/'): self.assertContains(resp, '' % content_display['language']) self.assertContains(resp, escape(content_display['content_data'])) self.assertContains(resp, url_raw) swh_cnt_id = get_swh_persistent_id('content', sha1_git) swh_cnt_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_cnt_id}) self.assertContains(resp, swh_cnt_id) self.assertContains(resp, swh_cnt_id_url) @given(content_text_no_highlight()) def test_content_view_text_no_highlight(self, content): sha1_git = content['sha1_git'] url = reverse('browse-content', url_args={'query_string': content['sha1']}) url_raw = reverse('browse-content-raw', url_args={'query_string': content['sha1']}) resp = self.client.get(url) content_display = self._process_content_for_display(content) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/content.html') self.assertContains(resp, '') self.assertContains(resp, escape(content_display['content_data'])) # noqa self.assertContains(resp, url_raw) swh_cnt_id = get_swh_persistent_id('content', sha1_git) swh_cnt_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_cnt_id}) self.assertContains(resp, swh_cnt_id) self.assertContains(resp, swh_cnt_id_url) @given(content_text_non_utf8()) def test_content_view_no_utf8_text(self, content): sha1_git = content['sha1_git'] url = reverse('browse-content', url_args={'query_string': content['sha1']}) resp = self.client.get(url) content_display = self._process_content_for_display(content) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/content.html') swh_cnt_id = get_swh_persistent_id('content', sha1_git) swh_cnt_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_cnt_id}) self.assertContains(resp, swh_cnt_id_url) self.assertContains(resp, escape(content_display['content_data'])) @given(content_image_type()) def test_content_view_image(self, content): url = reverse('browse-content', url_args={'query_string': content['sha1']}) url_raw = reverse('browse-content-raw', url_args={'query_string': content['sha1']}) resp = self.client.get(url) content_display = self._process_content_for_display(content) mimetype = content_display['mimetype'] content_data = content_display['content_data'] self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/content.html') self.assertContains(resp, '' % (mimetype, content_data.decode('utf-8'))) self.assertContains(resp, url_raw) @given(content()) def test_content_view_with_path(self, content): path = content['path'] url = reverse('browse-content', url_args={'query_string': content['sha1']}, query_params={'path': path}) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/content.html') self.assertContains(resp, '