diff --git a/debian/control b/debian/control index 55801f46..802f2b0d 100644 --- a/debian/control +++ b/debian/control @@ -1,32 +1,32 @@ Source: swh-web Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-all, python3-docutils, python3-nose, - python3-django, - python3-djangorestframework, + python3-django (>= 1.10.7~), + python3-djangorestframework (>= 3.4.0~), python3-pygments, python3-setuptools, python3-yaml, - python3-magic, + python3-magic (>= 0.3.0~), python3-sphinx, python3-sphinxcontrib.httpdomain, python3-swh.core (>= 0.0.20~), python3-swh.model (>= 0.0.15~), python3-swh.storage (>= 0.0.83~) Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DWUI/ Package: python3-swh.web Architecture: all Depends: python3-swh.core (>= 0.0.20~), python3-swh.model (>= 0.0.15~), python3-swh.storage (>= 0.0.83~), python3-swh.vault (>= 0.0.1~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Web Applications diff --git a/requirements.txt b/requirements.txt index 33fc1e18..d379cd47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,19 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html # Runtime dependencies -django -djangorestframework +django >= 1.10.7 +djangorestframework >= 3.4.0 docutils -file_magic +file_magic >= 0.3.0 pygments python-dateutil pyyaml #Doc dependencies sphinx sphinxcontrib-httpdomain # Test dependencies diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 03e97611..e88348cf 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,173 +1,173 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.common.exc import NotFoundExc, ForbiddenExc from swh.web.common.utils import shorten_path from swh.web.api import utils def compute_link_header(rv, options): """Add Link header in returned value results. Args: rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if 'headers' not in rv: return {} rv_headers = rv['headers'] if 'link-next' in rv_headers: link_headers.append('<%s>; rel="next"' % ( rv_headers['link-next'])) if 'link-prev' in rv_headers: link_headers.append('<%s>; rel="previous"' % ( rv_headers['link-prev'])) if link_headers: link_header_str = ','.join(link_headers) headers = options.get('headers', {}) headers.update({ 'Link': link_header_str }) return headers return {} def filter_by_fields(request, data): """Extract a request parameter 'fields' if it exists to permit the filtering on he data dict's keys. If such field is not provided, returns the data as is. """ - fields = utils.get_query_params(request).get('fields') + fields = request.query_params.get('fields') if fields: fields = set(fields.split(',')) data = utils.filter_field_keys(data, fields) return data def transform(rv): """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if 'results' in rv: return rv['results'] if 'headers' in rv: rv.pop('headers') return rv def make_api_response(request, data, doc_data={}, options={}): """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optionnal data that can be used to generate the response Returns: a DRF Response a object """ if data: options['headers'] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} if 'headers' in options: doc_env['headers_data'] = options['headers'] headers = options['headers'] # get request status code doc_env['status_code'] = options.get('status', 200) response_args = {'status': doc_env['status_code'], 'headers': headers, 'content_type': request.accepted_media_type} # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering if request.accepted_media_type == 'text/html': if data: data = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) doc_env['response_data'] = data doc_env['request'] = request doc_env['heading'] = shorten_path(str(request.path)) response_args['data'] = doc_env response_args['template_name'] = 'apidoc.html' # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response_args['data'] = data return Response(**response_args) def error_response(request, error, doc_data): """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 400 if isinstance(error, NotFoundExc): error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): error_code = 503 error_opts = {'status': error_code} error_data = { 'exception': error.__class__.__name__, 'reason': str(error), } return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py index 3117afbd..db1ab093 100644 --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -1,364 +1,353 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re from swh.web.common.utils import reverse, fmap from swh.web.common.query import parse_hash def filter_endpoints(url_map, prefix_url_rule, blacklist=[]): """Filter endpoints by prefix url rule. Args: - url_map: Url Werkzeug.Map of rules - prefix_url_rule: prefix url string - blacklist: blacklist of some url Returns: Dictionary of url_rule with values methods and endpoint. The key is the url, the associated value is a dictionary of 'methods' (possible http methods) and 'endpoint' (python function) """ out = {} for r in url_map: rule = r['rule'] if rule == prefix_url_rule or rule in blacklist: continue if rule.startswith(prefix_url_rule): out[rule] = {'methods': sorted(map(str, r['methods'])), 'endpoint': r['endpoint']} return out def prepare_data_for_view(data, encoding='utf-8'): def prepare_data(s): # Note: can only be 'data' key with bytes of raw content if isinstance(s, bytes): try: return s.decode(encoding) except: return "Cannot decode the data bytes, try and set another " \ "encoding in the url (e.g. ?encoding=utf8) or " \ "download directly the " \ "content's raw data." if isinstance(s, str): return re.sub(r'/api/1/', r'/browse/', s) return s return fmap(prepare_data, data) def filter_field_keys(data, field_keys): """Given an object instance (directory or list), and a csv field keys to filter on. Return the object instance with filtered keys. Note: Returns obj as is if it's an instance of types not in (dictionary, list) Args: - data: one object (dictionary, list...) to filter. - field_keys: csv or set of keys to filter the object on Returns: obj filtered on field_keys """ if isinstance(data, map): return map(lambda x: filter_field_keys(x, field_keys), data) if isinstance(data, list): return [filter_field_keys(x, field_keys) for x in data] if isinstance(data, dict): return {k: v for (k, v) in data.items() if k in field_keys} return data def person_to_string(person): """Map a person (person, committer, tagger, etc...) to a string. """ return ''.join([person['name'], ' <', person['email'], '>']) def enrich_object(object): """Enrich an object (revision, release) with link to the 'target' of type 'target_type'. Args: object: An object with target and target_type keys (e.g. release, revision) Returns: Object enriched with target_url pointing to the right swh.web.ui.api urls for the pointing object (revision, release, content, directory) """ obj = object.copy() if 'target' in obj and 'target_type' in obj: if obj['target_type'] == 'revision': obj['target_url'] = reverse('revision', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'release': obj['target_url'] = reverse('release', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'content': obj['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:' + obj['target']}) elif obj['target_type'] == 'directory': obj['target_url'] = reverse('directory', kwargs={'sha1_git': obj['target']}) if 'author' in obj: author = obj['author'] obj['author_url'] = reverse('person', kwargs={'person_id': author['id']}) return obj enrich_release = enrich_object def enrich_directory(directory, context_url=None): """Enrich directory with url to content or directory. """ if 'type' in directory: target_type = directory['type'] target = directory['target'] if target_type == 'file': directory['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % target}) if context_url: directory['file_url'] = context_url + directory['name'] + '/' elif target_type == 'dir': directory['target_url'] = reverse('directory', kwargs={'sha1_git': target}) if context_url: directory['dir_url'] = context_url + directory['name'] + '/' else: directory['target_url'] = reverse('revision', kwargs={'sha1_git': target}) if context_url: directory['rev_url'] = context_url + directory['name'] + '/' return directory def enrich_metadata_endpoint(content): """Enrich metadata endpoint with link to the upper metadata endpoint. """ c = content.copy() c['content_url'] = reverse('content', args=['sha1:%s' % c['id']]) return c def enrich_content(content, top_url=False, query_string=None): """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information - language_url: its programming language information - license_url: its licensing information Args: content: dict of data associated to a swh content object top_url: whether or not to include the content url in the enriched data query_string: optional query string of type ':' used when requesting the content, it acts as a hint for picking the same hash method when computing the url listed above Returns: An enriched content dict filled with additional urls """ checksums = content if 'checksums' in content: checksums = content['checksums'] hash_algo = 'sha1' if query_string: hash_algo = parse_hash(query_string)[0] if hash_algo in checksums: q = '%s:%s' % (hash_algo, checksums[hash_algo]) if top_url: content['content_url'] = reverse('content', kwargs={'q': q}) content['data_url'] = reverse('content-raw', kwargs={'q': q}) content['filetype_url'] = reverse('content-filetype', kwargs={'q': q}) content['language_url'] = reverse('content-language', kwargs={'q': q}) content['license_url'] = reverse('content-license', kwargs={'q': q}) return content def enrich_entity(entity): """Enrich entity with """ if 'uuid' in entity: entity['uuid_url'] = reverse('entity', kwargs={'uuid': entity['uuid']}) if 'parent' in entity and entity['parent']: entity['parent_url'] = reverse('entity', kwargs={'uuid': entity['parent']}) return entity def _get_path_list(path_string): """Helper for enrich_revision: get a list of the sha1 id of the navigation breadcrumbs, ordered from the oldest to the most recent. Args: path_string: the path as a '/'-separated string Returns: The navigation context as a list of sha1 revision ids """ return path_string.split('/') def _get_revision_contexts(rev_id, context): """Helper for enrich_revision: retrieve for the revision id and potentially the navigation breadcrumbs the context to pass to parents and children of of the revision. Args: rev_id: the revision's sha1 id context: the current navigation context Returns: The context for parents, children and the url of the direct child as a tuple in that order. """ context_for_parents = None context_for_children = None url_direct_child = None if not context: return (rev_id, None, None) path_list = _get_path_list(context) context_for_parents = '%s/%s' % (context, rev_id) prev_for_children = path_list[:-1] if len(prev_for_children) > 0: context_for_children = '/'.join(prev_for_children) child_id = path_list[-1] # This commit is not the first commit in the path if context_for_children: url_direct_child = reverse('revision-context', kwargs={'sha1_git': child_id, 'context': context_for_children}) # This commit is the first commit in the path else: url_direct_child = reverse('revision', kwargs={'sha1_git': child_id}) return (context_for_parents, context_for_children, url_direct_child) def _make_child_url(rev_children, context): """Helper for enrich_revision: retrieve the list of urls corresponding to the children of the current revision according to the navigation breadcrumbs. Args: rev_children: a list of revision id context: the '/'-separated navigation breadcrumbs Returns: the list of the children urls according to the context """ children = [] for child in rev_children: if context and child != _get_path_list(context)[-1]: children.append(reverse('revision', kwargs={'sha1_git': child})) elif not context: children.append(reverse('revision', kwargs={'sha1_git': child})) return children def enrich_revision(revision, context=None): """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. Args: revision: the revision as a dict context: the navigation breadcrumbs as a /-separated string of revision sha1_git """ ctx_parents, ctx_children, url_direct_child = _get_revision_contexts( revision['id'], context) revision['url'] = reverse('revision', kwargs={'sha1_git': revision['id']}) revision['history_url'] = reverse('revision-log', kwargs={'sha1_git': revision['id']}) if context: revision['history_context_url'] = reverse( 'revision-log', kwargs={'sha1_git': revision['id'], 'prev_sha1s': context}) if 'author' in revision: author = revision['author'] revision['author_url'] = reverse('person', kwargs={'person_id': author['id']}) if 'committer' in revision: committer = revision['committer'] revision['committer_url'] = \ reverse('person', kwargs={'person_id': committer['id']}) if 'directory' in revision: revision['directory_url'] = \ reverse('directory', kwargs={'sha1_git': revision['directory']}) if 'parents' in revision: parents = [] for parent in revision['parents']: parents.append({ 'id': parent, 'url': reverse('revision', kwargs={'sha1_git': parent}) }) revision['parents'] = parents if 'children' in revision: children = _make_child_url(revision['children'], context) if url_direct_child: children.append(url_direct_child) revision['children_urls'] = children else: if url_direct_child: revision['children_urls'] = [url_direct_child] if 'message_decoding_failed' in revision: revision['message_url'] = reverse('revision-raw-message', kwargs={'sha1_git': revision['id']}) return revision - - -def get_query_params(request): - """Utility functions for retrieving query parameters from a DRF request - object. Its purpose is to handle multiple versions of DRF.""" - if hasattr(request, 'query_params'): - # DRF >= 3.0 uses query_params attribute - return request.query_params - else: - # while DRF < 3.0 uses QUERY_PARAMS attribute - return request.QUERY_PARAMS diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index 989db1d5..699a0253 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,340 +1,340 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc, ForbiddenExc from swh.web.api import apidoc as api_doc from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_last_elt, doc_arg_per_page, doc_exc_bad_id, doc_arg_content_id ) @api_route(r'/content/(?P.+)/provenance/', 'content-provenance') @api_doc.route('/content/provenance/', tags=['hidden']) @api_doc.arg('q', default='sha1_git:88b9b366facda0b5ff8d8640ee9279bed346f242', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""List of provenance information (dict) for the matched content.""") def api_content_provenance(request, q): """Return content's provenance information if any. """ def _enrich_revision(provenance): p = provenance.copy() p['revision_url'] = \ reverse('revision', kwargs={'sha1_git': provenance['revision']}) p['content_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % provenance['content']}) p['origin_url'] = \ reverse('origin', kwargs={'origin_id': provenance['origin']}) p['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': provenance['origin']}) p['origin_visit_url'] = \ reverse('origin-visit', kwargs={'origin_id': provenance['origin'], 'visit_id': provenance['visit']}) return p return api_lookup( service.lookup_content_provenance, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=_enrich_revision) @api_route(r'/content/(?P.+)/filetype/', 'content-filetype') @api_doc.route('/content/filetype/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Filetype information (dict) for the matched content.""") def api_content_filetype(request, q): """Get information about the detected MIME type of a content object. """ return api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/language/', 'content-language') @api_doc.route('/content/language/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Language information (dict) for the matched content.""") def api_content_language(request, q): """Get information about the detected (programming) language of a content object. """ return api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/license/', 'content-license') @api_doc.route('/content/license/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""License information (dict) for the matched content.""") def api_content_license(request, q): """Get information about the detected license of a content object. """ return api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/ctags/', 'content-ctags') @api_doc.route('/content/ctags/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Ctags symbol (dict) for the matched content.""") def api_content_ctags(request, q): """Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/raw/', 'content-raw') @api_doc.route('/content/raw/', handle_response=True) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.param('filename', default=None, argtype=api_doc.argtypes.str, doc='User\'s desired filename. If provided, the downloaded' ' content will get that filename.') @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc='The raw content data as an octet stream') def api_content_raw(request, q): """Get the raw content of a content object (AKA "blob"), as a byte sequence. """ def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) content_filetype = service.lookup_content_filetype(q) if not content_filetype: raise NotFoundExc('Content %s is not available for download.' % q) mimetype = content_filetype['mimetype'] if 'text/' not in mimetype: raise ForbiddenExc('Only textual content is available for download. ' 'Actual content mimetype is %s.' % mimetype) - filename = utils.get_query_params(request).get('filename') + filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'content-symbol') @api_doc.route('/content/symbol/', tags=['upcoming']) @api_doc.arg('q', default='hello', argtype=api_doc.argtypes.str, argdoc="""An expression string to lookup in swh's raw content""") @api_doc.header('Link', doc=doc_header_link) @api_doc.param('last_sha1', default=None, argtype=api_doc.argtypes.str, doc=doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""A list of dict whose content matches the expression. Each dict has the following keys: - id (bytes): identifier of the content - name (text): symbol whose content match the expression - kind (text): kind of the symbol that matched - lang (text): Language for that entry - line (int): Number line for the symbol """) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} - last_sha1 = utils.get_query_params(request).get('last_sha1', None) - per_page = int(utils.get_query_params(request).get('per_page', '10')) + last_sha1 = request.query_params.get('last_sha1', None) + per_page = int(request.query_params.get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): return service.lookup_expression(exp, last_sha1, per_page) symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True)) if symbols: l = len(symbols) if l == per_page: query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 - if utils.get_query_params(request).get('per_page'): + if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('content-symbol', kwargs={'q': q}, query_params=query_params) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'content-known') @api_doc.route('/content/known/', tags=['hidden']) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.sha1, argdoc='content identifier as a sha1 checksum') @api_doc.param('q', default=None, argtype=api_doc.argtypes.str, doc="""(POST request) An algo_hash:hash string, where algo_hash is one of sha1, sha1_git or sha256 and hash is the hash to search for in SWH""") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""a dictionary with results (found/not found for each given identifier) and statistics about how many identifiers were found""") def api_check_content_known(request, q=None): """Check whether some content (AKA "blob") is present in the archive. Lookup can be performed by various means: - a GET request with one or several hashes, separated by ',' - a POST request with one or several hashes, passed as (multiple) values for parameter 'q' """ response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': - data = request.data if hasattr(request, 'data') else request.DATA + data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] l = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) search_stats['nbfiles'] = l search_stats['pct'] = (nbfound / l) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response @api_route(r'/content/(?P.+)/', 'content') @api_doc.route('/content/') @api_doc.arg('q', default='dc2830a9e72f23c1dfebef4413003221baa5fb62', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""known metadata for content identified by q""") def api_content_metadata(request, q): """Get information about a content (AKA "blob") object. """ return api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q)) diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 0299abe1..34ec532e 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,179 +1,179 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.common.utils import reverse from swh.web.api import utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_last_elt, doc_arg_per_page ) @api_route(r'/origin/(?P[0-9]+)/', 'origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)', 'origin') @api_doc.route('/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='origin identifier (when looking up by ID)') @api_doc.arg('origin_type', default='git', argtype=api_doc.argtypes.str, argdoc='origin type (when looking up by type+URL)') @api_doc.arg('origin_url', default='https://github.com/hylang/hy', argtype=api_doc.argtypes.path, argdoc='origin URL (when looking up by type+URL)') @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the origin corresponding to the given criteria""") def api_origin(request, origin_id=None, origin_type=None, origin_url=None): """Get information about a software origin. Software origins might be looked up by origin type and canonical URL (e.g., "git" + a "git clone" URL), or by their unique (but otherwise meaningless) identifier. """ ori_dict = { 'id': origin_id, 'type': origin_type, 'url': origin_url } ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]} if 'id' in ori_dict: error_msg = 'Origin with id %s not found.' % ori_dict['id'] else: error_msg = 'Origin with type %s and URL %s not found' % ( ori_dict['type'], ori_dict['url']) def _enrich_origin(origin): if 'id' in origin: o = origin.copy() o['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': origin['id']}) return o return origin return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=_enrich_origin) @api_route(r'/origin/(?P[0-9]+)/visits/', 'origin-visits') @api_doc.route('/origin/visits/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.header('Link', doc=doc_header_link) @api_doc.param('last_visit', default=None, argtype=api_doc.argtypes.int, doc=doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""a list of dictionaries describing individual visits. For each visit, its identifier, timestamp (as UNIX time), outcome, and visit-specific URL for more information are given.""") def api_origin_visits(request, origin_id): """Get information about all visits of a given software origin. """ result = {} - per_page = int(utils.get_query_params(request).get('per_page', '10')) - last_visit = utils.get_query_params(request).get('last_visit') + per_page = int(request.query_params.get('per_page', '10')) + last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page): return service.lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page) def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_visit_url'] = reverse('origin-visit', kwargs={'origin_id': origin_id, 'visit_id': ov['visit']}) return ov r = api_lookup( _lookup_origin_visits, origin_id, notfound_msg='No origin {} found'.format(origin_id), enrich_fn=_enrich_origin_visit) if r: l = len(r) if l == per_page: new_last_visit = r[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit - if utils.get_query_params(request).get('per_page'): + if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('origin-visits', kwargs={'origin_id': origin_id}, query_params=query_params) } result.update({ 'results': r }) return result @api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/', 'origin-visit') @api_doc.route('/origin/visit/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('visit_id', default=1, argtype=api_doc.argtypes.int, argdoc="""visit identifier, relative to the origin identified by origin_id""") @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""dictionary containing both metadata for the entire visit (e.g., timestamp as UNIX time, visit outcome, etc.) and what was at the software origin during the visit (i.e., a mapping from branches to other archive objects)""") def api_origin_visit(request, origin_id, visit_id): """Get information about a specific visit of a software origin. """ def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_url'] = reverse('origin', kwargs={'origin_id': ov['origin']}) if 'occurrences' in ov: ov['occurrences'] = { k: utils.enrich_object(v) for k, v in ov['occurrences'].items() } return ov return api_lookup( service.lookup_origin_visit, origin_id, visit_id, notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_id)), enrich_fn=_enrich_origin_visit) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index e8c02088..017ace52 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,421 +1,421 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.utils import parse_timestamp from swh.web.api import utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_per_page, doc_exc_bad_id, doc_ret_revision_log, doc_ret_revision_meta ) def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) else: # content result['content'] = utils.enrich_content(content) return result @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_doc.route('/revision/origin/log/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's SWH origin identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(Optional) The revision's branch name within the origin specified. Defaults to 'refs/heads/master'.""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""(Optional) A time or timestamp string to parse""") @api_doc.header('Link', doc=doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_log) def api_revision_log_by(request, origin_id, branch_name='refs/heads/master', ts=None): """Show the commit log for a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/log``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ result = {} - per_page = int(utils.get_query_params(request).get('per_page', '10')) + per_page = int(request.query_params.get('per_page', '10')) if ts: ts = parse_timestamp(ts) def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1): return service.lookup_revision_log_by(o_id, br, ts, limit) error_msg = 'No revision matching origin %s ' % origin_id error_msg += ', branch name %s' % branch_name error_msg += (' and time stamp %s.' % ts) if ts else '.' rev_get = api_lookup( lookup_revision_log_by_with_limit, origin_id, branch_name, ts, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) l = len(rev_get) if l == per_page+1: revisions = rev_get[:-1] last_sha1_git = rev_get[-1]['id'] params = {k: v for k, v in {'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts, }.items() if v is not None} query_params = {} query_params['sha1_git'] = last_sha1_git - if utils.get_query_params(request).get('per_page'): + if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('revision-origin-log', kwargs=params, query_params=query_params) } else: revisions = rev_get result.update({'results': revisions}) return result @api_route(r'/revision/origin/(?P[0-9]+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)' r'/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/origin/directory/', tags=['hidden']) @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's origin's SWH identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""The optional branch for the given origin (default to master""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""Optional timestamp (default to the nearest time crawl of timestamp)""") @api_doc.arg('path', default='Dockerfile', argtype=api_doc.argtypes.path, argdoc='The path to the directory or file to display') @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the revision corresponding to the given criteria""") def api_directory_through_revision_origin(request, origin_id, branch_name="refs/heads/master", ts=None, path=None, with_data=False): """Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = parse_timestamp(ts) return _revision_directory_by({'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data) @api_route(r'/revision/origin/(?P[0-9]+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/', 'revision-origin') @api_doc.route('/revision/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(optional) fully-qualified branch name, e.g., "refs/heads/master". Defaults to the master branch.""") @api_doc.arg('ts', default=None, argtype=api_doc.argtypes.ts, argdoc="""(optional) timestamp close to which the revision pointed by the given branch should be looked up. Defaults to now.""") @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_meta) def api_revision_with_origin(request, origin_id, branch_name="refs/heads/master", ts=None): """Get information about a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/revision``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ ts = parse_timestamp(ts) return api_lookup( service.lookup_revision_by, origin_id, branch_name, ts, notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/prev/(?P[0-9a-f/]+)/', 'revision-context') @api_doc.route('/revision/prev/', tags=['hidden']) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The revision's sha1_git identifier") @api_doc.arg('context', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc='The navigation breadcrumbs -- use at your own risk') @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc='The metadata of the revision identified by sha1_git') def api_revision_with_context(request, sha1_git, context): """Return information about revision with id sha1_git. """ def _enrich_revision(revision, context=context): return utils.enrich_revision(revision, context) return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git %s not found.' % sha1_git, enrich_fn=_enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/', 'revision') @api_doc.route('/revision/') @api_doc.arg('sha1_git', default='aafb16d69fd30ff58afdd69036a26047f3aebdc6', argtype=api_doc.argtypes.sha1_git, argdoc="revision identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_meta) def api_revision(request, sha1_git): """Get information about a revision. Revisions are identified by SHA1 checksums, compatible with Git commit identifiers. See ``revision_identifier`` in our `data model module `_ for details about how they are computed. """ return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'revision-raw-message') @api_doc.route('/revision/raw/', tags=['hidden'], handle_response=True) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The queried revision's sha1_git identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc="""The message of the revision identified by sha1_git as a downloadable octet stream""") def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'revision-directory') @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/directory/') @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('dir_path', default='Documentation/BUG-HUNTING', argtype=api_doc.argtypes.path, argdoc="""path relative to the root directory of revision identifier by sha1_git""") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""either a list of directory entries with their metadata, or the metadata of a single directory entry""") def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like ``/directory/``, but operates on the root directory associated to a given revision. """ return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'revision-log') @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f/]+)/log/', 'revision-log') @api_doc.route('/revision/log/') @api_doc.arg('sha1_git', default='37fc9e08d0c4b71807a4f1ecb06112e78d91c283', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('prev_sha1s', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc="""(Optional) Navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. """) @api_doc.header('Link', doc=doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_log) def api_revision_log(request, sha1_git, prev_sha1s=None): """Get a list of all revisions heading to a given one, i.e., show the commit log. """ result = {} - per_page = int(utils.get_query_params(request).get('per_page', '10')) + per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) l = len(rev_get) if l == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} - if utils.get_query_params(request).get('per_page'): + if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('revision-log', kwargs={'sha1_git': new_last_sha1}, query_params=query_params) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py index 0089292c..8c2df7ff 100644 --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -1,473 +1,464 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import magic import math import stat import textwrap from django.core.cache import cache from django.utils.safestring import mark_safe from swh.web.common import highlightjs, service from swh.web.common.exc import NotFoundExc from swh.web.common.utils import ( reverse, format_utc_iso_date, parse_timestamp ) def get_directory_entries(sha1_git): """Function that retrieves the content of a SWH directory from the SWH archive. The directories entries are first sorted in lexicographical order. Sub-directories and regular files are then extracted. Args: sha1_git: sha1_git identifier of the directory Returns: A tuple whose first member corresponds to the sub-directories list and second member the regular files list Raises: NotFoundExc if the directory is not found """ cache_entry_id = 'directory_entries_%s' % sha1_git cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry entries = list(service.lookup_directory(sha1_git)) entries = sorted(entries, key=lambda e: e['name']) for entry in entries: entry['perms'] = stat.filemode(entry['perms']) dirs = [e for e in entries if e['type'] == 'dir'] files = [e for e in entries if e['type'] == 'file'] cache.set(cache_entry_id, (dirs, files)) return dirs, files def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: A list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info def get_mimetype_and_encoding_for_content(content): """Function that returns the mime type and the encoding associated to a content buffer using the magic module under the hood. Args: content (bytes): a content buffer Returns: A tuple (mimetype, encoding), for instance ('text/plain', 'us-ascii'), associated to the provided content. """ - if hasattr(magic, 'detect_from_content'): - magic_result = magic.detect_from_content(content) - return magic_result.mime_type, magic_result.encoding - # for old api version of magic module (debian jessie) - else: - m = magic.open(magic.MAGIC_MIME) - m.load() - magic_result = m.buffer(content).split(';') - mimetype = magic_result[0] - encoding = magic_result[1].split('=')[1] - return mimetype, encoding + magic_result = magic.detect_from_content(content) + return magic_result.mime_type, magic_result.encoding def request_content(query_string): """Function that retrieves a SWH content from the SWH archive. Raw bytes content is first retrieved, then the content mime type. If the mime type is not stored in the archive, it will be computed using Python magic module. Args: query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either *sha1*, *sha1_git*, *sha256*, or *blake2s256* (default to *sha1*) and HASH the hexadecimal representation of the hash value Returns: A tuple whose first member corresponds to the content raw bytes and second member the content mime type Raises: NotFoundExc if the content is not found """ content_data = service.lookup_content(query_string) content_raw = service.lookup_content_raw(query_string) content_data['raw_data'] = content_raw['data'] filetype = service.lookup_content_filetype(query_string) language = service.lookup_content_language(query_string) license = service.lookup_content_license(query_string) if filetype: mimetype = filetype['mimetype'] encoding = filetype['encoding'] else: mimetype, encoding = \ get_mimetype_and_encoding_for_content(content_data['raw_data']) content_data['mimetype'] = mimetype content_data['encoding'] = encoding # encode textual content to utf-8 if needed if mimetype.startswith('text/') and 'ascii' not in encoding \ and 'utf-8' not in encoding: content_data['raw_data'] = \ content_data['raw_data'].decode(encoding).encode('utf-8') if language: content_data['language'] = language['lang'] else: content_data['language'] = 'not detected' if license: content_data['licenses'] = ', '.join(license['licenses']) else: content_data['licenses'] = 'not detected' return content_data _browsers_supported_image_mimes = set(['image/gif', 'image/png', 'image/jpeg', 'image/bmp', 'image/webp']) def prepare_content_for_display(content_data, mime_type, path): """Function that prepares a content for HTML display. The function tries to associate a programming language to a content in order to perform syntax highlighting client-side using highlightjs. The language is determined using either the content filename or its mime type. If the mime type corresponds to an image format supported by web browsers, the content will be encoded in base64 for displaying the image. Args: content_data (bytes): raw bytes of the content mime_type (string): mime type of the content path (string): path of the content including filename Returns: A dict containing the content bytes (possibly different from the one provided as parameter if it is an image) under the key 'content_data and the corresponding highlightjs language class under the key 'language'. """ language = highlightjs.get_hljs_language_from_filename(path) if not language: language = highlightjs.get_hljs_language_from_mime_type(mime_type) if not language: language = 'nohighlight-swh' elif mime_type.startswith('application/'): mime_type = mime_type.replace('application/', 'text/') if mime_type.startswith('image/'): if mime_type in _browsers_supported_image_mimes: content_data = base64.b64encode(content_data) else: content_data = None return {'content_data': content_data, 'language': language} def get_origin_visits(origin_id): """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_id (int): the id of the swh origin to fetch visits from Returns: A list of dict describing the origin visits:: [{'date': , 'origin': , 'status': <'full' | 'partial'>, 'visit': }, ... ] Raises: NotFoundExc if the origin is not found """ cache_entry_id = 'origin_%s_visits' % origin_id cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry origin_visits = [] per_page = service.MAX_LIMIT last_visit = None while 1: visits = list(service.lookup_origin_visits(origin_id, last_visit=last_visit, per_page=per_page)) origin_visits += visits if len(visits) < per_page: break else: if not last_visit: last_visit = per_page else: last_visit += per_page cache.set(cache_entry_id, origin_visits) return origin_visits def get_origin_visit_branches(origin_id, visit_id=None, visit_ts=None): """Function that returns the list of branches associated to a swh origin for a given visit. The visit can be expressed by its id or a timestamp. In the latter case, the closest visit from the provided timestamp will be used. If no visit parameter is provided, it returns the list of branches found for the latest visit. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_id (int): the id of the swh origin to fetch branches from visit_id (int): the id of the origin visit visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A list of dict describing the origin branches for the given visit:: [{'name': , 'revision': , 'directory': }, ... ] Raises: NotFoundExc if the origin or its visit are not found """ if not visit_id and visit_ts: parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) visits = get_origin_visits(origin_id) for i, visit in enumerate(visits): ts = math.floor(parse_timestamp(visit['date']).timestamp()) if i == 0: if parsed_visit_ts <= ts: return get_origin_visit_branches(origin_id, visit['visit']) elif i == len(visits) - 1: if parsed_visit_ts >= ts: return get_origin_visit_branches(origin_id, visit['visit']) else: next_ts = math.floor( parse_timestamp(visits[i+1]['date']).timestamp()) if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): return get_origin_visit_branches(origin_id, visit['visit']) else: return get_origin_visit_branches(origin_id, visits[i+1]['visit']) raise NotFoundExc( 'Visit with timestamp %s for origin with id %s not found!' % (visit_ts, origin_id)) cache_entry_id = 'origin_%s_visit_%s_branches' % (origin_id, visit_id) cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry origin_visit_data = service.lookup_origin_visit(origin_id, visit_id) branches = [] revision_ids = [] occurrences = origin_visit_data['occurrences'] for key in sorted(occurrences.keys()): if occurrences[key]['target_type'] == 'revision': branches.append({'name': key, 'revision': occurrences[key]['target']}) revision_ids.append(occurrences[key]['target']) revisions = service.lookup_revision_multiple(revision_ids) branches_to_remove = [] for idx, revision in enumerate(revisions): if revision: branches[idx]['directory'] = revision['directory'] else: branches_to_remove.append(branches[idx]) for b in branches_to_remove: branches.remove(b) cache.set(cache_entry_id, branches) return branches def gen_link(url, link_text): """ Utility function for generating an HTML link to insert in Django templates. Args: url (str): an url link_text (str): the text for the produced link Returns: An HTML link in the form 'link_text' """ link = '%s' % (url, link_text) return mark_safe(link) def gen_person_link(person_id, person_name): """ Utility function for generating a link to a SWH person HTML view to insert in Django templates. Args: person_id (int): a SWH person id person_name (str): the associated person name Returns: An HTML link in the form 'person_name' """ person_url = reverse('browse-person', kwargs={'person_id': person_id}) return gen_link(person_url, person_name) def gen_revision_link(revision_id, shorten_id=False): """ Utility function for generating a link to a SWH revision HTML view to insert in Django templates. Args: revision_id (int): a SWH revision id shorten_id (boolean): wheter to shorten the revision id to 7 characters for the link text Returns: An HTML link in the form 'revision_id' """ revision_url = reverse('browse-revision', kwargs={'sha1_git': revision_id}) if shorten_id: return gen_link(revision_url, revision_id[:7]) else: return gen_link(revision_url, revision_id) def _format_log_entries(revision_log, per_page): revision_log_data = [] for i, log in enumerate(revision_log): if i == per_page: break revision_log_data.append( {'author': gen_person_link(log['author']['id'], log['author']['name']), 'revision': gen_revision_link(log['id'], True), 'message': log['message'], 'message_shorten': textwrap.shorten(log['message'], width=80, placeholder='...'), 'date': format_utc_iso_date(log['date']), 'directory': log['directory']}) return revision_log_data def prepare_revision_log_for_display(revision_log, per_page, revs_breadcrumb, origin_context=False): """ Utility functions that process raw revision log data for HTML display. Its purpose is to: * add links to relevant SWH browse views * format date in human readable format * truncate the message log It also computes the data needed to generate the links for navigating back and forth in the history log. Args: revision_log (list): raw revision log as returned by the SWH web api per_page (int): number of log entries per page revs_breadcrumb (str): breadcrumbs of revisions navigated so far, in the form 'rev1[/rev2/../revN]'. Each revision corresponds to the first one displayed in the HTML view for history log. origin_context (boolean): wheter or not the revision log is browsed from an origin view. """ current_rev = revision_log[0]['id'] next_rev = None prev_rev = None next_revs_breadcrumb = None prev_revs_breadcrumb = None if len(revision_log) == per_page + 1: prev_rev = revision_log[-1]['id'] prev_rev_bc = current_rev if origin_context: prev_rev_bc = prev_rev if revs_breadcrumb: revs = revs_breadcrumb.split('/') next_rev = revs[-1] if len(revs) > 1: next_revs_breadcrumb = '/'.join(revs[:-1]) if len(revision_log) == per_page + 1: prev_revs_breadcrumb = revs_breadcrumb + '/' + prev_rev_bc else: prev_revs_breadcrumb = prev_rev_bc return {'revision_log_data': _format_log_entries(revision_log, per_page), 'prev_rev': prev_rev, 'prev_revs_breadcrumb': prev_revs_breadcrumb, 'next_rev': next_rev, 'next_revs_breadcrumb': next_revs_breadcrumb} diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index b1148e94..834c5707 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,143 +1,133 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re from datetime import datetime, timezone from dateutil import parser as date_parser from swh.web.common.exc import BadInputExc -import urllib - from django.core import urlresolvers from django.http import QueryDict -# override django reverse function in order to get -# the same result on debian jessie and stretch -# (see https://code.djangoproject.com/ticket/22223) def reverse(viewname, args=None, kwargs=None, query_params=None, current_app=None, urlconf=None): - """An override of django reverse function supporting multiple - django versions (from 1.7 to current) and query parameters. + """An override of django reverse function supporting query parameters. Args: - viewname: the name of the django view from which to compute - a url + viewname: the name of the django view from which to compute a url args: list of url arguments ordered according to their position it kwargs: dictionnary of url arguments indexed by their names query_params: dictionnary of query parameters to append to the - reversed url + reversed url current_app: the name of the django app tighted to the view urlconf: url configuration module Returns: The url of the requested view with processed arguments and query parameters """ - url = urllib.parse.unquote( - urlresolvers.reverse( + url = urlresolvers.reverse( viewname, urlconf=urlconf, args=args, - kwargs=kwargs, current_app=current_app - ) - ) + kwargs=kwargs, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v is not None} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k, v in query_params.items(): query_dict[k] = v url += ('?' + query_dict.urlencode(safe='/')) return url def fmap(f, data): """Map f to data at each level. This must keep the origin data structure type: - map -> map - dict -> dict - list -> list - None -> None Args: f: function that expects one argument. data: data to traverse to apply the f function. list, map, dict or bare value. Returns: The same data-structure with modified values by the f function. """ if data is None: return data if isinstance(data, map): return map(lambda y: fmap(f, y), (x for x in data)) if isinstance(data, list): return [fmap(f, x) for x in data] if isinstance(data, dict): return {k: fmap(f, v) for (k, v) in data.items()} return f(data) def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as datetime. Returns: a timezone-aware datetime representing the parsed value. None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: return date_parser.parse(timestamp, ignoretz=False, fuzzy=True) except: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date): """Turns a string reprensation of an UTC iso date into a more human readable one. More precisely, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 13:27 UTC'. Args: iso_date (str): a string representation of an UTC iso date Returns: A human readable string representation of the input iso date """ date = date_parser.parse(iso_date) return date.strftime('%d %B %Y, %H:%M UTC') diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index 128a64bd..57cc078d 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,193 +1,189 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django settings for swhweb project. Generated by 'django-admin startproject' using Django 1.11.3. For more information on this file, see https://docs.djangoproject.com/en/1.11/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.11/ref/settings/ """ import os from swh.web.config import get_config swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config['secret_key'] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config['debug'] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config['debug'] ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] + swh_web_config['allowed_hosts'] # Application definition INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'rest_framework', 'swh.web.api', 'swh.web.browse' ] MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] ROOT_URLCONF = 'swh.web.urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [os.path.join(PROJECT_DIR, "../templates")], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], 'libraries': { 'swh_templatetags': 'swh.web.common.swh_templatetags', }, }, }, ] -TEMPLATE_DIRS = TEMPLATES[0]['DIRS'] - WSGI_APPLICATION = 'swh.web.wsgi.application' # Database # https://docs.djangoproject.com/en/1.11/ref/settings/#databases DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(PROJECT_DIR, 'db.sqlite3'), } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa }, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'UTC' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = '/static/' STATICFILES_DIRS = [ os.path.join(PROJECT_DIR, "../static") ] INTERNAL_IPS = ['127.0.0.1'] throttle_rates = {} throttling = swh_web_config['throttling'] for limiter_scope, limiter_conf in throttling['scopes'].items(): throttle_rates[limiter_scope] = limiter_conf['limiter_rate'] REST_FRAMEWORK = { 'DEFAULT_RENDERER_CLASSES': ( 'rest_framework.renderers.JSONRenderer', 'swh.web.api.renderers.YAMLRenderer', 'rest_framework.renderers.TemplateHTMLRenderer' ), 'DEFAULT_THROTTLE_CLASSES': ( 'swh.web.common.throttling.SwhWebRateThrottle', ), 'DEFAULT_THROTTLE_RATES': throttle_rates } LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'filters': { 'require_debug_false': { '()': 'django.utils.log.RequireDebugFalse', }, 'require_debug_true': { '()': 'django.utils.log.RequireDebugTrue', }, }, 'handlers': { 'console': { 'level': 'DEBUG', 'filters': ['require_debug_true'], 'class': 'logging.StreamHandler', }, 'file': { 'level': 'INFO', 'filters': ['require_debug_false'], 'class': 'logging.FileHandler', 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), }, }, 'loggers': { 'django': { 'handlers': ['console', 'file'], 'level': 'DEBUG' if DEBUG else 'INFO', 'propagate': True, } }, } - -SILENCED_SYSTEM_CHECKS = ['1_7.W001', '1_8.W001'] diff --git a/swh/web/urls.py b/swh/web/urls.py index 5c6a766e..07556784 100644 --- a/swh/web/urls.py +++ b/swh/web/urls.py @@ -1,48 +1,37 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import django - from django.conf.urls import ( url, include, handler400, handler403, handler404, handler500 ) from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.shortcuts import redirect from django.views.generic.base import RedirectView from swh.web.common.exc import ( swh_handle400, swh_handle403, swh_handle404, swh_handle500 ) favicon_view = RedirectView.as_view(url='/static/img/icons/swh-logo-32x32.png', permanent=True) def default_view(request): return redirect('api_homepage') urlpatterns = [ url(r'^favicon\.ico$', favicon_view), url(r'^api/', include('swh.web.api.urls')), url(r'^browse/', include('swh.web.browse.urls')), url(r'^$', default_view, name='swh-web-homepage'), ] urlpatterns += staticfiles_urlpatterns() handler400 = swh_handle400 # noqa handler403 = swh_handle403 # noqa handler404 = swh_handle404 # noqa handler500 = swh_handle500 # noqa - -# hack in order for our custom template tag library -# to load on django 1.7 (debian jessie version) -if django.VERSION < (1, 8): - from django.template.base import templatetags_modules # noqa - templatetags_modules += ['django.templatetags', - 'django.contrib.admin.templatetags', - 'django.contrib.staticfiles.templatetags', - 'rest_framework.templatetags', 'swh.web.common']