diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py index 770932f1..33a23450 100644 --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -1,353 +1,353 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re from swh.web.common.utils import reverse, fmap from swh.web.common.query import parse_hash def filter_endpoints(url_map, prefix_url_rule, blacklist=[]): """Filter endpoints by prefix url rule. Args: - url_map: Url Werkzeug.Map of rules - prefix_url_rule: prefix url string - blacklist: blacklist of some url Returns: Dictionary of url_rule with values methods and endpoint. The key is the url, the associated value is a dictionary of 'methods' (possible http methods) and 'endpoint' (python function) """ out = {} for r in url_map: rule = r['rule'] if rule == prefix_url_rule or rule in blacklist: continue if rule.startswith(prefix_url_rule): out[rule] = {'methods': sorted(map(str, r['methods'])), 'endpoint': r['endpoint']} return out def prepare_data_for_view(data, encoding='utf-8'): def prepare_data(s): # Note: can only be 'data' key with bytes of raw content if isinstance(s, bytes): try: return s.decode(encoding) - except: + except Exception: return "Cannot decode the data bytes, try and set another " \ "encoding in the url (e.g. ?encoding=utf8) or " \ "download directly the " \ "content's raw data." if isinstance(s, str): return re.sub(r'/api/1/', r'/browse/', s) return s return fmap(prepare_data, data) def filter_field_keys(data, field_keys): """Given an object instance (directory or list), and a csv field keys to filter on. Return the object instance with filtered keys. Note: Returns obj as is if it's an instance of types not in (dictionary, list) Args: - data: one object (dictionary, list...) to filter. - field_keys: csv or set of keys to filter the object on Returns: obj filtered on field_keys """ if isinstance(data, map): return map(lambda x: filter_field_keys(x, field_keys), data) if isinstance(data, list): return [filter_field_keys(x, field_keys) for x in data] if isinstance(data, dict): return {k: v for (k, v) in data.items() if k in field_keys} return data def person_to_string(person): """Map a person (person, committer, tagger, etc...) to a string. """ return ''.join([person['name'], ' <', person['email'], '>']) def enrich_object(object): """Enrich an object (revision, release) with link to the 'target' of type 'target_type'. Args: object: An object with target and target_type keys (e.g. release, revision) Returns: Object enriched with target_url pointing to the right swh.web.ui.api urls for the pointing object (revision, release, content, directory) """ obj = object.copy() if 'target' in obj and 'target_type' in obj: if obj['target_type'] == 'revision': obj['target_url'] = reverse('revision', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'release': obj['target_url'] = reverse('release', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'content': obj['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:' + obj['target']}) elif obj['target_type'] == 'directory': obj['target_url'] = reverse('directory', kwargs={'sha1_git': obj['target']}) if 'author' in obj: author = obj['author'] obj['author_url'] = reverse('person', kwargs={'person_id': author['id']}) return obj enrich_release = enrich_object def enrich_directory(directory, context_url=None): """Enrich directory with url to content or directory. """ if 'type' in directory: target_type = directory['type'] target = directory['target'] if target_type == 'file': directory['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % target}) if context_url: directory['file_url'] = context_url + directory['name'] + '/' elif target_type == 'dir': directory['target_url'] = reverse('directory', kwargs={'sha1_git': target}) if context_url: directory['dir_url'] = context_url + directory['name'] + '/' else: directory['target_url'] = reverse('revision', kwargs={'sha1_git': target}) if context_url: directory['rev_url'] = context_url + directory['name'] + '/' return directory def enrich_metadata_endpoint(content): """Enrich metadata endpoint with link to the upper metadata endpoint. """ c = content.copy() c['content_url'] = reverse('content', args=['sha1:%s' % c['id']]) return c def enrich_content(content, top_url=False, query_string=None): """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information - language_url: its programming language information - license_url: its licensing information Args: content: dict of data associated to a swh content object top_url: whether or not to include the content url in the enriched data query_string: optional query string of type ':' used when requesting the content, it acts as a hint for picking the same hash method when computing the url listed above Returns: An enriched content dict filled with additional urls """ checksums = content if 'checksums' in content: checksums = content['checksums'] hash_algo = 'sha1' if query_string: hash_algo = parse_hash(query_string)[0] if hash_algo in checksums: q = '%s:%s' % (hash_algo, checksums[hash_algo]) if top_url: content['content_url'] = reverse('content', kwargs={'q': q}) content['data_url'] = reverse('content-raw', kwargs={'q': q}) content['filetype_url'] = reverse('content-filetype', kwargs={'q': q}) content['language_url'] = reverse('content-language', kwargs={'q': q}) content['license_url'] = reverse('content-license', kwargs={'q': q}) return content def enrich_entity(entity): """Enrich entity with """ if 'uuid' in entity: entity['uuid_url'] = reverse('entity', kwargs={'uuid': entity['uuid']}) if 'parent' in entity and entity['parent']: entity['parent_url'] = reverse('entity', kwargs={'uuid': entity['parent']}) return entity def _get_path_list(path_string): """Helper for enrich_revision: get a list of the sha1 id of the navigation breadcrumbs, ordered from the oldest to the most recent. Args: path_string: the path as a '/'-separated string Returns: The navigation context as a list of sha1 revision ids """ return path_string.split('/') def _get_revision_contexts(rev_id, context): """Helper for enrich_revision: retrieve for the revision id and potentially the navigation breadcrumbs the context to pass to parents and children of of the revision. Args: rev_id: the revision's sha1 id context: the current navigation context Returns: The context for parents, children and the url of the direct child as a tuple in that order. """ context_for_parents = None context_for_children = None url_direct_child = None if not context: return (rev_id, None, None) path_list = _get_path_list(context) context_for_parents = '%s/%s' % (context, rev_id) prev_for_children = path_list[:-1] if len(prev_for_children) > 0: context_for_children = '/'.join(prev_for_children) child_id = path_list[-1] # This commit is not the first commit in the path if context_for_children: url_direct_child = reverse('revision-context', kwargs={'sha1_git': child_id, 'context': context_for_children}) # This commit is the first commit in the path else: url_direct_child = reverse('revision', kwargs={'sha1_git': child_id}) return (context_for_parents, context_for_children, url_direct_child) def _make_child_url(rev_children, context): """Helper for enrich_revision: retrieve the list of urls corresponding to the children of the current revision according to the navigation breadcrumbs. Args: rev_children: a list of revision id context: the '/'-separated navigation breadcrumbs Returns: the list of the children urls according to the context """ children = [] for child in rev_children: if context and child != _get_path_list(context)[-1]: children.append(reverse('revision', kwargs={'sha1_git': child})) elif not context: children.append(reverse('revision', kwargs={'sha1_git': child})) return children def enrich_revision(revision, context=None): """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. Args: revision: the revision as a dict context: the navigation breadcrumbs as a /-separated string of revision sha1_git """ ctx_parents, ctx_children, url_direct_child = _get_revision_contexts( revision['id'], context) revision['url'] = reverse('revision', kwargs={'sha1_git': revision['id']}) revision['history_url'] = reverse('revision-log', kwargs={'sha1_git': revision['id']}) if context: revision['history_context_url'] = reverse( 'revision-log', kwargs={'sha1_git': revision['id'], 'prev_sha1s': context}) if 'author' in revision: author = revision['author'] revision['author_url'] = reverse('person', kwargs={'person_id': author['id']}) if 'committer' in revision: committer = revision['committer'] revision['committer_url'] = \ reverse('person', kwargs={'person_id': committer['id']}) if 'directory' in revision: revision['directory_url'] = \ reverse('directory', kwargs={'sha1_git': revision['directory']}) if 'parents' in revision: parents = [] for parent in revision['parents']: parents.append({ 'id': parent, 'url': reverse('revision', kwargs={'sha1_git': parent}) }) revision['parents'] = parents if 'children' in revision: children = _make_child_url(revision['children'], context) if url_direct_child: children.append(url_direct_child) revision['children_urls'] = children else: if url_direct_child: revision['children_urls'] = [url_direct_child] if 'message_decoding_failed' in revision: revision['message_url'] = reverse('revision-raw-message', kwargs={'sha1_git': revision['id']}) return revision diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index df404b29..6769f66b 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,340 +1,340 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc, ForbiddenExc from swh.web.api import apidoc as api_doc from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_last_elt, doc_arg_per_page, doc_exc_bad_id, doc_arg_content_id ) @api_route(r'/content/(?P.+)/provenance/', 'content-provenance') @api_doc.route('/content/provenance/', tags=['hidden']) @api_doc.arg('q', default='sha1_git:88b9b366facda0b5ff8d8640ee9279bed346f242', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""List of provenance information (dict) for the matched content.""") def api_content_provenance(request, q): """Return content's provenance information if any. """ def _enrich_revision(provenance): p = provenance.copy() p['revision_url'] = \ reverse('revision', kwargs={'sha1_git': provenance['revision']}) p['content_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % provenance['content']}) p['origin_url'] = \ reverse('origin', kwargs={'origin_id': provenance['origin']}) p['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': provenance['origin']}) p['origin_visit_url'] = \ reverse('origin-visit', kwargs={'origin_id': provenance['origin'], 'visit_id': provenance['visit']}) return p return api_lookup( service.lookup_content_provenance, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=_enrich_revision) @api_route(r'/content/(?P.+)/filetype/', 'content-filetype') @api_doc.route('/content/filetype/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Filetype information (dict) for the matched content.""") def api_content_filetype(request, q): """Get information about the detected MIME type of a content object. """ return api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/language/', 'content-language') @api_doc.route('/content/language/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Language information (dict) for the matched content.""") def api_content_language(request, q): """Get information about the detected (programming) language of a content object. """ return api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/license/', 'content-license') @api_doc.route('/content/license/') @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""License information (dict) for the matched content.""") def api_content_license(request, q): """Get information about the detected license of a content object. """ return api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/ctags/', 'content-ctags') @api_doc.route('/content/ctags/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Ctags symbol (dict) for the matched content.""") def api_content_ctags(request, q): """Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/raw/', 'content-raw') @api_doc.route('/content/raw/', handle_response=True) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.param('filename', default=None, argtype=api_doc.argtypes.str, doc='User\'s desired filename. If provided, the downloaded' ' content will get that filename.') @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc='The raw content data as an octet stream') def api_content_raw(request, q): """Get the raw content of a content object (AKA "blob"), as a byte sequence. """ def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) content_filetype = service.lookup_content_filetype(q) if not content_filetype: raise NotFoundExc('Content %s is not available for download.' % q) mimetype = content_filetype['mimetype'] if 'text/' not in mimetype: raise ForbiddenExc('Only textual content is available for download. ' 'Actual content mimetype is %s.' % mimetype) filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'content-symbol') @api_doc.route('/content/symbol/', tags=['upcoming']) @api_doc.arg('q', default='hello', argtype=api_doc.argtypes.str, argdoc="""An expression string to lookup in swh's raw content""") @api_doc.header('Link', doc=doc_header_link) @api_doc.param('last_sha1', default=None, argtype=api_doc.argtypes.str, doc=doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""A list of dict whose content matches the expression. Each dict has the following keys: - id (bytes): identifier of the content - name (text): symbol whose content match the expression - kind (text): kind of the symbol that matched - lang (text): Language for that entry - line (int): Number line for the symbol """) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = request.query_params.get('last_sha1', None) per_page = int(request.query_params.get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): return service.lookup_expression(exp, last_sha1, per_page) symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True)) if symbols: - l = len(symbols) + nb_symbols = len(symbols) - if l == per_page: + if nb_symbols == per_page: query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('content-symbol', kwargs={'q': q}, query_params=query_params) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'content-known') @api_doc.route('/content/known/', tags=['hidden']) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.sha1, argdoc='content identifier as a sha1 checksum') @api_doc.param('q', default=None, argtype=api_doc.argtypes.str, doc="""(POST request) An algo_hash:hash string, where algo_hash is one of sha1, sha1_git or sha256 and hash is the hash to search for in SWH""") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""a dictionary with results (found/not found for each given identifier) and statistics about how many identifiers were found""") def api_check_content_known(request, q=None): """Check whether some content (AKA "blob") is present in the archive. Lookup can be performed by various means: - a GET request with one or several hashes, separated by ',' - a POST request with one or several hashes, passed as (multiple) values for parameter 'q' """ response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] - l = len(queries) + nb_queries = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) - search_stats['nbfiles'] = l - search_stats['pct'] = (nbfound / l) * 100 + search_stats['nbfiles'] = nb_queries + search_stats['pct'] = (nbfound / nb_queries) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response @api_route(r'/content/(?P.+)/', 'content') @api_doc.route('/content/') @api_doc.arg('q', default='dc2830a9e72f23c1dfebef4413003221baa5fb62', argtype=api_doc.argtypes.algo_and_hash, argdoc=doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""known metadata for content identified by q""") def api_content_metadata(request, q): """Get information about a content (AKA "blob") object. """ return api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q)) diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 75db3a27..dce5d436 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,255 +1,254 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from swh.web.common import service from swh.web.common.utils import reverse from swh.web.api import utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_last_elt, doc_arg_per_page ) def _enrich_origin(origin): if 'id' in origin: o = origin.copy() o['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': origin['id']}) return o return origin @api_route(r'/origin/(?P[0-9]+)/', 'origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)', 'origin') @api_doc.route('/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='origin identifier (when looking up by ID)') @api_doc.arg('origin_type', default='git', argtype=api_doc.argtypes.str, argdoc='origin type (when looking up by type+URL)') @api_doc.arg('origin_url', default='https://github.com/hylang/hy', argtype=api_doc.argtypes.path, argdoc='origin URL (when looking up by type+URL)') @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the origin corresponding to the given criteria""") def api_origin(request, origin_id=None, origin_type=None, origin_url=None): """Get information about a software origin. Software origins might be looked up by origin type and canonical URL (e.g., "git" + a "git clone" URL), or by their unique (but otherwise meaningless) identifier. """ ori_dict = { 'id': origin_id, 'type': origin_type, 'url': origin_url } ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]} if 'id' in ori_dict: error_msg = 'Origin with id %s not found.' % ori_dict['id'] else: error_msg = 'Origin with type %s and URL %s not found' % ( ori_dict['type'], ori_dict['url']) return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=_enrich_origin) @api_route(r'/origin/search/(?P.+)/', 'origin-search') @api_doc.route('/origin/search/') @api_doc.arg('url_pattern', default='python', argtype=api_doc.argtypes.str, argdoc='string pattern to search for in origin urls') @api_doc.header('Link', doc=doc_header_link) @api_doc.param('offset', default=0, argtype=api_doc.argtypes.int, doc='number of found origins to skip before returning results') # noqa @api_doc.param('limit', default=70, argtype=api_doc.argtypes.int, doc='the maximum number of found origins to return') @api_doc.param('regexp', default='false', argtype=api_doc.argtypes.str, doc="""if that query parameter is set to 'true', consider provided pattern as a regular expression and search origins whose urls match it""") @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""The metadata of the origins whose urls match the provided string pattern""") def api_origin_search(request, url_pattern): """Search for origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. """ result = {} offset = int(request.query_params.get('offset', '0')) limit = int(request.query_params.get('limit', '70')) regexp = request.query_params.get('regexp', 'false') - r = api_lookup(service.search_origin, url_pattern, offset, limit, - bool(strtobool(regexp)), enrich_fn=_enrich_origin) + results = api_lookup(service.search_origin, url_pattern, offset, limit, + bool(strtobool(regexp)), enrich_fn=_enrich_origin) - l = len(r) - if l == limit: + nb_results = len(results) + if nb_results == limit: query_params = {} query_params['offset'] = offset + limit query_params['limit'] = limit query_params['regexp'] = regexp result['headers'] = { 'link-next': reverse('origin-search', kwargs={'url_pattern': url_pattern}, query_params=query_params) } result.update({ - 'results': r + 'results': results }) return result @api_route(r'/origin/(?P[0-9]+)/visits/', 'origin-visits') @api_doc.route('/origin/visits/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.header('Link', doc=doc_header_link) @api_doc.param('last_visit', default=None, argtype=api_doc.argtypes.int, doc=doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""a list of dictionaries describing individual visits. For each visit, its identifier, timestamp (as UNIX time), outcome, and visit-specific URL for more information are given.""") def api_origin_visits(request, origin_id): """Get information about all visits of a given software origin. """ result = {} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page): return service.lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page) def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_visit_url'] = reverse('origin-visit', kwargs={'origin_id': origin_id, 'visit_id': ov['visit']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('snapshot', kwargs={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None return ov - r = api_lookup( - _lookup_origin_visits, origin_id, - notfound_msg='No origin {} found'.format(origin_id), - enrich_fn=_enrich_origin_visit) + results = api_lookup(_lookup_origin_visits, origin_id, + notfound_msg='No origin {} found'.format(origin_id), + enrich_fn=_enrich_origin_visit) - if r: - l = len(r) - if l == per_page: - new_last_visit = r[-1]['visit'] + if results: + nb_results = len(results) + if nb_results == per_page: + new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('origin-visits', kwargs={'origin_id': origin_id}, query_params=query_params) } result.update({ - 'results': r + 'results': results }) return result @api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/', 'origin-visit') @api_doc.route('/origin/visit/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('visit_id', default=1, argtype=api_doc.argtypes.int, argdoc="""visit identifier, relative to the origin identified by origin_id""") @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""dictionary containing both metadata for the entire visit (e.g., timestamp as UNIX time, visit outcome, etc.) and what was at the software origin during the visit (i.e., a mapping from branches to other archive objects)""") def api_origin_visit(request, origin_id, visit_id): """Get information about a specific visit of a software origin. """ def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_url'] = reverse('origin', kwargs={'origin_id': ov['origin']}) snapshot = ov['snapshot'] if snapshot: ov['snapshot_url'] = reverse('snapshot', kwargs={'snapshot_id': snapshot}) else: ov['snapshot_url'] = None # TODO: remove that piece of code once the snapshot migration # is totally effective in storage (no more occurrences) if 'occurrences' in ov: ov['occurrences'] = { k: utils.enrich_object(v) if v else None for k, v in ov['occurrences'].items() } return ov return api_lookup( service.lookup_origin_visit, origin_id, visit_id, notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_id)), enrich_fn=_enrich_origin_visit) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 5edda3fa..62d6f1a9 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,421 +1,422 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.utils import parse_timestamp from swh.web.api import utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_header_link, doc_arg_per_page, doc_exc_bad_id, doc_ret_revision_log, doc_ret_revision_meta ) def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) else: # content result['content'] = utils.enrich_content(content) return result @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_doc.route('/revision/origin/log/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's SWH origin identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(Optional) The revision's branch name within the origin specified. Defaults to 'refs/heads/master'.""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""(Optional) A time or timestamp string to parse""") @api_doc.header('Link', doc=doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_log) def api_revision_log_by(request, origin_id, branch_name='refs/heads/master', ts=None): """Show the commit log for a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/log``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ result = {} per_page = int(request.query_params.get('per_page', '10')) if ts: ts = parse_timestamp(ts) def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1): return service.lookup_revision_log_by(o_id, br, ts, limit) error_msg = 'No revision matching origin %s ' % origin_id error_msg += ', branch name %s' % branch_name error_msg += (' and time stamp %s.' % ts) if ts else '.' rev_get = api_lookup( lookup_revision_log_by_with_limit, origin_id, branch_name, ts, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) - l = len(rev_get) - if l == per_page+1: + + nb_rev = len(rev_get) + if nb_rev == per_page+1: revisions = rev_get[:-1] last_sha1_git = rev_get[-1]['id'] params = {k: v for k, v in {'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts, }.items() if v is not None} query_params = {} query_params['sha1_git'] = last_sha1_git if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('revision-origin-log', kwargs=params, query_params=query_params) } else: revisions = rev_get result.update({'results': revisions}) return result @api_route(r'/revision/origin/(?P[0-9]+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)' r'/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/origin/directory/', tags=['hidden']) @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's origin's SWH identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""The optional branch for the given origin (default to master""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""Optional timestamp (default to the nearest time crawl of timestamp)""") @api_doc.arg('path', default='Dockerfile', argtype=api_doc.argtypes.path, argdoc='The path to the directory or file to display') @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the revision corresponding to the given criteria""") def api_directory_through_revision_origin(request, origin_id, branch_name="refs/heads/master", ts=None, path=None, with_data=False): """Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = parse_timestamp(ts) return _revision_directory_by({'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data) @api_route(r'/revision/origin/(?P[0-9]+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/', 'revision-origin') @api_doc.route('/revision/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(optional) fully-qualified branch name, e.g., "refs/heads/master". Defaults to the master branch.""") @api_doc.arg('ts', default=None, argtype=api_doc.argtypes.ts, argdoc="""(optional) timestamp close to which the revision pointed by the given branch should be looked up. Defaults to now.""") @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_meta) def api_revision_with_origin(request, origin_id, branch_name="refs/heads/master", ts=None): """Get information about a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/revision``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ ts = parse_timestamp(ts) return api_lookup( service.lookup_revision_by, origin_id, branch_name, ts, notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/prev/(?P[0-9a-f/]+)/', 'revision-context') @api_doc.route('/revision/prev/', tags=['hidden']) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The revision's sha1_git identifier") @api_doc.arg('context', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc='The navigation breadcrumbs -- use at your own risk') @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc='The metadata of the revision identified by sha1_git') def api_revision_with_context(request, sha1_git, context): """Return information about revision with id sha1_git. """ def _enrich_revision(revision, context=context): return utils.enrich_revision(revision, context) return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git %s not found.' % sha1_git, enrich_fn=_enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/', 'revision') @api_doc.route('/revision/') @api_doc.arg('sha1_git', default='aafb16d69fd30ff58afdd69036a26047f3aebdc6', argtype=api_doc.argtypes.sha1_git, argdoc="revision identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_meta) def api_revision(request, sha1_git): """Get information about a revision. Revisions are identified by SHA1 checksums, compatible with Git commit identifiers. See the `documentation `_ for details about how they are computed. """ return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'revision-raw-message') @api_doc.route('/revision/raw/', tags=['hidden'], handle_response=True) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The queried revision's sha1_git identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc="""The message of the revision identified by sha1_git as a downloadable octet stream""") def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'revision-directory') @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/directory/') @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('dir_path', default='Documentation/BUG-HUNTING', argtype=api_doc.argtypes.path, argdoc="""path relative to the root directory of revision identifier by sha1_git""") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""either a list of directory entries with their metadata, or the metadata of a single directory entry""") def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like ``/directory/``, but operates on the root directory associated to a given revision. """ return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'revision-log') @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f/]+)/log/', 'revision-log') @api_doc.route('/revision/log/') @api_doc.arg('sha1_git', default='37fc9e08d0c4b71807a4f1ecb06112e78d91c283', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('prev_sha1s', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc="""(Optional) Navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. """) @api_doc.header('Link', doc=doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=doc_ret_revision_log) def api_revision_log(request, sha1_git, prev_sha1s=None): """Get a list of all revisions heading to a given one, i.e., show the commit log. """ result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) - l = len(rev_get) - if l == per_page+1: + nb_rev = len(rev_get) + if nb_rev == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('revision-log', kwargs={'sha1_git': new_last_sha1}, query_params=query_params) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py index f365f5a5..bfc8b489 100644 --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -1,301 +1,301 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from pygments.lexers import ( get_all_lexers, get_lexer_for_filename ) # set of languages ids that can be highlighted # by highlight.js library _hljs_languages = set([ '1c', 'abnf', 'accesslog', 'actionscript', 'ada', 'apache', 'applescript', 'arduino', 'armasm', 'asciidoc', 'aspectj', 'autohotkey', 'autoit', 'avrasm', 'awk', 'axapta', 'bash', 'basic', 'bnf', 'brainfuck', 'cal', 'capnproto', 'ceylon', 'clean', 'clojure', 'clojure-repl', 'cmake', 'coffeescript', 'coq', 'cos', 'cpp', 'crmsh', 'crystal', 'cs', 'csp', 'css', 'dart', 'delphi', 'diff', 'django', 'd', 'dns', 'dockerfile', 'dos', 'dsconfig', 'dts', 'dust', 'ebnf', 'elixir', 'elm', 'erb', 'erlang', 'erlang-repl', 'excel', 'fix', 'flix', 'fortran', 'fsharp', 'gams', 'gauss', 'gcode', 'gherkin', 'glsl', 'go', 'golo', 'gradle', 'groovy', 'haml', 'handlebars', 'haskell', 'haxe', 'hsp', 'htmlbars', 'http', 'hy', 'inform7', 'ini', 'irpf90', 'java', 'javascript', 'jboss-cli', 'json', 'julia', 'julia-repl', 'kotlin', 'lasso', 'ldif', 'leaf', 'less', 'lisp', 'livecodeserver', 'livescript', 'llvm', 'lsl', 'lua', 'makefile', 'markdown', 'mathematica', 'matlab', 'maxima', 'mel', 'mercury', 'mipsasm', 'mizar', 'mojolicious', 'monkey', 'moonscript', 'n1ql', 'nginx', 'nimrod', 'nix', 'nsis', 'objectivec', 'ocaml', 'openscad', 'oxygene', 'parser3', 'perl', 'pf', 'php', 'pony', 'powershell', 'processing', 'profile', 'prolog', 'protobuf', 'puppet', 'purebasic', 'python', 'q', 'qml', 'rib', 'r', 'roboconf', 'routeros', 'rsl', 'ruby', 'ruleslanguage', 'rust', 'scala', 'scheme', 'scilab', 'scss', 'shell', 'smali', 'smalltalk', 'sml', 'sqf', 'sql', 'stan', 'stata', 'step21', 'stylus', 'subunit', 'swift', 'taggerscript', 'tap', 'tcl', 'tex', 'thrift', 'tp', 'twig', 'typescript', 'vala', 'vbnet', 'vbscript-html', 'vbscript', 'verilog', 'vhdl', 'vim', 'x86asm', 'xl', 'xml', 'xquery', 'yaml', 'zephir', ]) # languages aliases defined in highlight.js _hljs_languages_aliases = { 'ado': 'stata', 'adoc': 'asciidoc', 'ahk': 'autohotkey', 'apacheconf': 'apache', 'arm': 'armasm', 'as': 'actionscript', 'atom': 'xml', 'bat': 'dos', 'bf': 'brainfuck', 'bind': 'dns', 'c': 'cpp', 'c++': 'cpp', 'capnp': 'capnproto', 'cc': 'cpp', 'clean': 'clean', 'clj': 'clojure', 'cls': 'cos', 'cmake.in': 'cmake', 'cmd': 'dos', 'coffee': 'coffeescript', 'console': 'shell', 'cos': 'cos', 'cr': 'crystal', 'craftcms': 'twig', 'crm': 'crmsh', 'csharp': 'cs', 'cson': 'coffeescript', 'dcl': 'clean', 'desktop': 'ini', 'dfm': 'delphi', 'do': 'stata', 'docker': 'dockerfile', 'dpr': 'delphi', 'dst': 'dust', 'el': 'lisp', 'erl': 'erlang', 'f90': 'fortran', 'f95': 'fortran', 'feature': 'gherkin', 'freepascal': 'delphi', 'fs': 'fsharp', 'gemspec': 'ruby', 'gms': 'gams', 'golang': 'go', 'graph': 'roboconf', 'gss': 'gauss', 'gyp': 'python', 'h': 'cpp', 'h++': 'cpp', 'hbs': 'handlebars', 'hpp': 'cpp', 'hs': 'haskell', 'html': 'xml', 'html.handlebars': 'handlebars', 'html.hbs': 'handlebars', 'https': 'http', 'hx': 'haxe', 'hylang': 'hy', 'i7': 'inform7', 'iced': 'coffeescript', 'icl': 'clean', 'instances': 'roboconf', 'ipynb': 'json', 'irb': 'ruby', 'jinja': 'django', 'js': 'javascript', 'jsp': 'java', 'jsx': 'javascript', 'k': 'q', 'kdb': 'q', 'lassoscript': 'lasso', 'lazarus': 'delphi', 'lfm': 'delphi', 'lpr': 'delphi', 'ls': 'livescript', 'm': 'objectivec', 'mak': 'makefile', 'md': 'markdown', 'mikrotik': 'routeros', 'mips': 'mipsasm', 'mk': 'makefile', 'mkd': 'markdown', 'mkdown': 'markdown', 'markdown': 'markdown', 'ml': 'ocaml', 'mm': 'objectivec', 'mma': 'mathematica', 'moo': 'mercury', 'moon': 'moonscript', 'nc': 'gcode', 'nginxconf': 'nginx', 'nim': 'nimrod', 'nixos': 'nix', 'obj-c': 'objectivec', 'objc': 'objectivec', 'osascript': 'applescript', 'p21': 'step21', 'pas': 'delphi', 'pascal': 'delphi', 'patch': 'diff', 'pb': 'purebasic', 'pbi': 'purebasic', 'pcmk': 'crmsh', 'pf.conf': 'pf', 'php3': 'php', 'php4': 'php', 'php5': 'php', 'php6': 'php', 'pl': 'perl', 'plist': 'xml', 'pm': 'perl', 'podspec': 'ruby', 'pp': 'puppet', 'ps': 'powershell', 'py': 'python', 'qrc': 'xml', 'qs': 'javascript', 'qt': 'qml', 'rb': 'ruby', 'routeros': 'routeros', 'rs': 'rust', 'rst': 'nohighlight-swh', 'rss': 'xml', 'ru': 'ruby', 'scad': 'openscad', 'sci': 'scilab', 'scpt': 'applescript', 'sh': 'bash', 'smali': 'smali', 'sqf': 'sqf', 'st': 'smalltalk', 'step': 'step21', 'stp': 'step21', 'styl': 'stylus', 'sv': 'verilog', 'svh': 'verilog', 'tao': 'xl', 'thor': 'ruby', 'tk': 'tcl', 'toml': 'ini', 'ui': 'xml', 'v': 'verilog', 'vb': 'vbnet', 'vbs': 'vbscript', 'wildfly-cli': 'jboss-cli', 'xhtml': 'xml', 'xjb': 'xml', 'xls': 'excel', 'xlsx': 'excel', 'xpath': 'xquery', 'xq': 'xquery', 'xsd': 'xml', 'xsl': 'xml', 'yaml': 'yaml', 'yml': 'yaml', 'zep': 'zephir', 'zone': 'dns', 'zsh': 'bash' } # dictionary mapping pygment lexers to hljs languages _pygments_lexer_to_hljs_language = {} # dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { 'text/x-c': 'cpp', 'text/x-c++': 'cpp', 'text/x-msdos-batch': 'dos', 'text/x-lisp': 'lisp', 'text/x-shellscript': 'bash', } # function to fill the above dictionnaries def _init_pygments_to_hljs_map(): if len(_pygments_lexer_to_hljs_language) == 0: for lexer in get_all_lexers(): lexer_name = lexer[0] lang_aliases = lexer[1] lang_mime_types = lexer[3] lang = None for lang_alias in lang_aliases: if lang_alias in _hljs_languages: lang = lang_alias _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang: for lang_mime_type in lang_mime_types: _mime_type_to_hljs_language[lang_mime_type] = lang def get_hljs_language_from_filename(filename): """Function that tries to associate a language supported by highlight.js from a filename. Args: filename: input filename Returns: highlight.js language id or None if no correspondance has been found """ _init_pygments_to_hljs_map() if filename: exts = filename.lower().split('.') # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): if ext in _hljs_languages: return ext if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] # otherwise use Pygments language database lexer = None # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) - except: + except Exception: pass # if there is a correspondance between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: return _pygments_lexer_to_hljs_language[lexer.name] # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: exts = [ext.replace('*.', '') for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] return None def get_hljs_language_from_mime_type(mime_type): """Function that tries to associate a language supported by highlight.js from a mime type. Args: mime_type: input mime type Returns: highlight.js language id or None if no correspondance has been found """ _init_pygments_to_hljs_map() if mime_type and mime_type in _mime_type_to_hljs_language: return _mime_type_to_hljs_language[mime_type] return None diff --git a/swh/web/common/throttling.py b/swh/web/common/throttling.py index e19a70a5..76bb7b15 100644 --- a/swh/web/common/throttling.py +++ b/swh/web/common/throttling.py @@ -1,130 +1,130 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import ipaddress from rest_framework.throttling import ScopedRateThrottle from swh.web.config import get_config class SwhWebRateThrottle(ScopedRateThrottle): """Custom request rate limiter for DRF enabling to exempt specific networks specified in swh-web configuration. Requests are grouped into scopes. It enables to apply different requests rate limiting based on the scope name but also the input HTTP request types. To associate a scope to requests, one must add a 'throttle_scope' attribute when using a class based view, or call the 'throttle_scope' decorator when using a function based view. By default, requests do not have an associated scope and are not rate limited. Rate limiting can also be configured according to the type of the input HTTP requests for fine grained tuning. For instance, the following YAML configuration section sets a rate of: - 1 per minute for POST requests - 60 per minute for other request types for the 'swh_api' scope while exempting those coming from the 127.0.0.0/8 ip network. .. code-block:: yaml throttling: scopes: swh_api: limiter_rate: default: 60/m POST: 1/m exempted_networks: - 127.0.0.0/8 """ scope = None def __init__(self): super().__init__() self.exempted_networks = None def get_exempted_networks(self, scope_name): if not self.exempted_networks: scopes = get_config()['throttling']['scopes'] scope = scopes.get(scope_name) if scope: networks = scope.get('exempted_networks') if networks: self.exempted_networks = [ipaddress.ip_network(network) for network in networks] return self.exempted_networks def allow_request(self, request, view): # class based view case if not self.scope: default_scope = getattr(view, self.scope_attr, None) # check if there is a specific rate limiting associated # to the request type try: request_scope = default_scope + '_' + request.method.lower() setattr(view, self.scope_attr, request_scope) request_allowed = \ super(SwhWebRateThrottle, self).allow_request(request, view) # noqa setattr(view, self.scope_attr, default_scope) # use default rate limiting otherwise - except: + except Exception: setattr(view, self.scope_attr, default_scope) request_allowed = \ super(SwhWebRateThrottle, self).allow_request(request, view) # noqa # function based view case else: default_scope = self.scope # check if there is a specific rate limiting associated # to the request type try: self.scope = default_scope + '_' + request.method.lower() self.rate = self.get_rate() # use default rate limiting otherwise - except: + except Exception: self.scope = default_scope self.rate = self.get_rate() self.num_requests, self.duration = self.parse_rate(self.rate) request_allowed = \ super(ScopedRateThrottle, self).allow_request(request, view) self.scope = default_scope exempted_networks = self.get_exempted_networks(default_scope) if exempted_networks: remote_address = ipaddress.ip_address(self.get_ident(request)) return any(remote_address in network for network in exempted_networks) or \ request_allowed return request_allowed def throttle_scope(scope): """Decorator that allows the throttle scope of a DRF function based view to be set:: @api_view(['GET', ]) @throttle_scope('scope') def view(request): ... """ def decorator(func): SwhScopeRateThrottle = type( 'CustomScopeRateThrottle', (SwhWebRateThrottle,), {'scope': scope} ) func.throttle_classes = (SwhScopeRateThrottle, ) return func return decorator diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index 32085769..8bbd04be 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,182 +1,182 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz from swh.web.common.exc import BadInputExc from django.core import urlresolvers from django.http import QueryDict def reverse(viewname, args=None, kwargs=None, query_params=None, current_app=None, urlconf=None): """An override of django reverse function supporting query parameters. Args: viewname: the name of the django view from which to compute a url args: list of url arguments ordered according to their position it kwargs: dictionary of url arguments indexed by their names query_params: dictionary of query parameters to append to the reversed url current_app: the name of the django app tighted to the view urlconf: url configuration module Returns: The url of the requested view with processed arguments and query parameters """ url = urlresolvers.reverse( viewname, urlconf=urlconf, args=args, kwargs=kwargs, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v is not None} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/')) return url def fmap(f, data): """Map f to data at each level. This must keep the origin data structure type: - map -> map - dict -> dict - list -> list - None -> None Args: f: function that expects one argument. data: data to traverse to apply the f function. list, map, dict or bare value. Returns: The same data-structure with modified values by the f function. """ if data is None: return data if isinstance(data, map): return map(lambda y: fmap(f, y), (x for x in data)) if isinstance(data, list): return [fmap(f, x) for x in data] if isinstance(data, dict): return {k: fmap(f, v) for (k, v) in data.items()} return f(data) def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: a timezone-aware datetime representing the parsed value. None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) - except: + except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): """Turns a string reprensation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: A formatted string representation of the input iso date """ date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: A list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info diff --git a/swh/web/manage.py b/swh/web/manage.py index fdb3ae44..182bc12c 100755 --- a/swh/web/manage.py +++ b/swh/web/manage.py @@ -1,45 +1,45 @@ #!/usr/bin/env python # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sys from swh.web import config if __name__ == "__main__": os.environ.setdefault("DJANGO_SETTINGS_MODULE", "swh.web.settings.development") # import root urls module for swh-web before running the django dev server # in order to ensure it will be automatically reloaded when source files # are modified (as django autoreload feature only works if the modules are # in sys.modules) try: from swh.web import urls # noqa - except: + except Exception: pass try: from django.core.management.commands.runserver import ( Command as runserver ) from django.core.management import execute_from_command_line except ImportError: # The above import may fail for some other reason. Ensure that the # issue is really that Django is missing to avoid masking other # exceptions on Python 2. try: import django # noqa except ImportError: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) raise swh_web_config = config.get_config() runserver.default_port = swh_web_config['port'] runserver.default_addr = swh_web_config['host'] execute_from_command_line(sys.argv)