diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py index c8362fac..b24509b1 100644 --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -1,425 +1,432 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re import urllib from django.core import urlresolvers +from django.http import QueryDict from datetime import datetime, timezone from dateutil import parser from .exc import BadInputExc # override django reverse function in order to get # the same result on debian jessie and stretch # (see https://code.djangoproject.com/ticket/22223) def reverse(viewname, urlconf=None, args=None, - kwargs=None, current_app=None): - return urllib.parse.unquote( + kwargs=None, current_app=None, query_params=None): + url = urllib.parse.unquote( urlresolvers.reverse( viewname, urlconf=urlconf, args=args, kwargs=kwargs, current_app=current_app ) ) + if query_params and len(query_params) > 0: + query_dict = QueryDict('', mutable=True) + for k, v in query_params.items(): + query_dict[k] = v + url += ('?' + query_dict.urlencode()) + return url def filter_endpoints(url_map, prefix_url_rule, blacklist=[]): """Filter endpoints by prefix url rule. Args: - url_map: Url Werkzeug.Map of rules - prefix_url_rule: prefix url string - blacklist: blacklist of some url Returns: Dictionary of url_rule with values methods and endpoint. The key is the url, the associated value is a dictionary of 'methods' (possible http methods) and 'endpoint' (python function) """ out = {} for r in url_map: rule = r['rule'] if rule == prefix_url_rule or rule in blacklist: continue if rule.startswith(prefix_url_rule): out[rule] = {'methods': sorted(map(str, r['methods'])), 'endpoint': r['endpoint']} return out def fmap(f, data): """Map f to data at each level. This must keep the origin data structure type: - map -> map - dict -> dict - list -> list - None -> None Args: f: function that expects one argument. data: data to traverse to apply the f function. list, map, dict or bare value. Returns: The same data-structure with modified values by the f function. """ if data is None: return data if isinstance(data, map): return map(lambda y: fmap(f, y), (x for x in data)) if isinstance(data, list): return [fmap(f, x) for x in data] if isinstance(data, dict): return {k: fmap(f, v) for (k, v) in data.items()} return f(data) def prepare_data_for_view(data, encoding='utf-8'): def prepare_data(s): # Note: can only be 'data' key with bytes of raw content if isinstance(s, bytes): try: return s.decode(encoding) except: return "Cannot decode the data bytes, try and set another " \ "encoding in the url (e.g. ?encoding=utf8) or " \ "download directly the " \ "content's raw data." if isinstance(s, str): return re.sub(r'/api/1/', r'/browse/', s) return s return fmap(prepare_data, data) def filter_field_keys(data, field_keys): """Given an object instance (directory or list), and a csv field keys to filter on. Return the object instance with filtered keys. Note: Returns obj as is if it's an instance of types not in (dictionary, list) Args: - data: one object (dictionary, list...) to filter. - field_keys: csv or set of keys to filter the object on Returns: obj filtered on field_keys """ if isinstance(data, map): return map(lambda x: filter_field_keys(x, field_keys), data) if isinstance(data, list): return [filter_field_keys(x, field_keys) for x in data] if isinstance(data, dict): return {k: v for (k, v) in data.items() if k in field_keys} return data def person_to_string(person): """Map a person (person, committer, tagger, etc...) to a string. """ return ''.join([person['name'], ' <', person['email'], '>']) def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as datetime. Returns: a timezone-aware datetime representing the parsed value. None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: return parser.parse(timestamp, ignoretz=False, fuzzy=True) except: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def enrich_object(object): """Enrich an object (revision, release) with link to the 'target' of type 'target_type'. Args: object: An object with target and target_type keys (e.g. release, revision) Returns: Object enriched with target_url pointing to the right swh.web.ui.api urls for the pointing object (revision, release, content, directory) """ obj = object.copy() if 'target' in obj and 'target_type' in obj: if obj['target_type'] == 'revision': obj['target_url'] = reverse('revision', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'release': obj['target_url'] = reverse('release', kwargs={'sha1_git': obj['target']}) elif obj['target_type'] == 'content': obj['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:' + obj['target']}) elif obj['target_type'] == 'directory': obj['target_url'] = reverse('directory', kwargs={'sha1_git': obj['target']}) if 'author' in obj: author = obj['author'] obj['author_url'] = reverse('person', kwargs={'person_id': author['id']}) return obj enrich_release = enrich_object def enrich_directory(directory, context_url=None): """Enrich directory with url to content or directory. """ if 'type' in directory: target_type = directory['type'] target = directory['target'] if target_type == 'file': directory['target_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % target}) if context_url: directory['file_url'] = context_url + directory['name'] + '/' else: directory['target_url'] = reverse('directory', kwargs={'sha1_git': target}) if context_url: directory['dir_url'] = context_url + directory['name'] + '/' return directory def enrich_metadata_endpoint(content): """Enrich metadata endpoint with link to the upper metadata endpoint. """ c = content.copy() c['content_url'] = reverse('content', args=['sha1:%s' % c['id']]) return c def enrich_content(content, top_url=False): """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information """ for h in ['sha1', 'sha1_git', 'sha256']: if h in content: q = '%s:%s' % (h, content[h]) if top_url: content['content_url'] = reverse('content', kwargs={'q': q}) content['data_url'] = reverse('content-raw', kwargs={'q': q}) content['filetype_url'] = reverse('content-filetype', kwargs={'q': q}) content['language_url'] = reverse('content-language', kwargs={'q': q}) content['license_url'] = reverse('content-license', kwargs={'q': q}) break return content def enrich_entity(entity): """Enrich entity with """ if 'uuid' in entity: entity['uuid_url'] = reverse('entity', kwargs={'uuid': entity['uuid']}) if 'parent' in entity and entity['parent']: entity['parent_url'] = reverse('entity', kwargs={'uuid': entity['parent']}) return entity def _get_path_list(path_string): """Helper for enrich_revision: get a list of the sha1 id of the navigation breadcrumbs, ordered from the oldest to the most recent. Args: path_string: the path as a '/'-separated string Returns: The navigation context as a list of sha1 revision ids """ return path_string.split('/') def _get_revision_contexts(rev_id, context): """Helper for enrich_revision: retrieve for the revision id and potentially the navigation breadcrumbs the context to pass to parents and children of of the revision. Args: rev_id: the revision's sha1 id context: the current navigation context Returns: The context for parents, children and the url of the direct child as a tuple in that order. """ context_for_parents = None context_for_children = None url_direct_child = None if not context: return (rev_id, None, None) path_list = _get_path_list(context) context_for_parents = '%s/%s' % (context, rev_id) prev_for_children = path_list[:-1] if len(prev_for_children) > 0: context_for_children = '/'.join(prev_for_children) child_id = path_list[-1] # This commit is not the first commit in the path if context_for_children: url_direct_child = reverse('revision-context', kwargs={'sha1_git': child_id, 'context': context_for_children}) # This commit is the first commit in the path else: url_direct_child = reverse('revision', kwargs={'sha1_git': child_id}) return (context_for_parents, context_for_children, url_direct_child) def _make_child_url(rev_children, context): """Helper for enrich_revision: retrieve the list of urls corresponding to the children of the current revision according to the navigation breadcrumbs. Args: rev_children: a list of revision id context: the '/'-separated navigation breadcrumbs Returns: the list of the children urls according to the context """ children = [] for child in rev_children: if context and child != _get_path_list(context)[-1]: children.append(reverse('revision', kwargs={'sha1_git': child})) elif not context: children.append(reverse('revision', kwargs={'sha1_git': child})) return children def enrich_revision(revision, context=None): """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. Args: revision: the revision as a dict context: the navigation breadcrumbs as a /-separated string of revision sha1_git """ ctx_parents, ctx_children, url_direct_child = _get_revision_contexts( revision['id'], context) revision['url'] = reverse('revision', kwargs={'sha1_git': revision['id']}) revision['history_url'] = reverse('revision-log', kwargs={'sha1_git': revision['id']}) if context: revision['history_context_url'] = reverse( 'revision-log', kwargs={'sha1_git': revision['id'], 'prev_sha1s': context}) if 'author' in revision: author = revision['author'] revision['author_url'] = reverse('person', kwargs={'person_id': author['id']}) if 'committer' in revision: committer = revision['committer'] revision['committer_url'] = \ reverse('person', kwargs={'person_id': committer['id']}) if 'directory' in revision: revision['directory_url'] = \ reverse('directory', kwargs={'sha1_git': revision['directory']}) if 'parents' in revision: parents = [] for parent in revision['parents']: parents.append({ 'id': parent, 'url': reverse('revision', kwargs={'sha1_git': parent}) }) revision['parents'] = parents if 'children' in revision: children = _make_child_url(revision['children'], context) if url_direct_child: children.append(url_direct_child) revision['children_urls'] = children else: if url_direct_child: revision['children_urls'] = [url_direct_child] if 'message_decoding_failed' in revision: revision['message_url'] = reverse('revision-raw-message', kwargs={'sha1_git': revision['id']}) return revision def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def get_query_params(request): """Utility functions for retrieving query parameters from a DRF request object. Its purpose is to handle multiple versions of DRF.""" if hasattr(request, 'query_params'): # DRF >= 3.0 uses query_params attribute return request.query_params else: # while DRF < 3.0 uses QUERY_PARAMS attribute return request.QUERY_PARAMS diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index 6f760c8c..dcaecab7 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,340 +1,339 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools -from django.http import QueryDict from django.http import HttpResponse from swh.web.api.utils import reverse from swh.web.api import service, utils from swh.web.api import apidoc as api_doc from swh.web.api.exc import NotFoundExc, ForbiddenExc from swh.web.api.apiurls import api_route from swh.web.api.views import ( _api_lookup, _doc_exc_id_not_found, _doc_header_link, _doc_arg_last_elt, _doc_arg_per_page, _doc_exc_bad_id, _doc_arg_content_id ) @api_route(r'/content/(?P.+)/provenance/', 'content-provenance') @api_doc.route('/content/provenance/', tags=['hidden']) @api_doc.arg('q', default='sha1_git:88b9b366facda0b5ff8d8640ee9279bed346f242', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""List of provenance information (dict) for the matched content.""") def api_content_provenance(request, q): """Return content's provenance information if any. """ def _enrich_revision(provenance): p = provenance.copy() p['revision_url'] = \ reverse('revision', kwargs={'sha1_git': provenance['revision']}) p['content_url'] = \ reverse('content', kwargs={'q': 'sha1_git:%s' % provenance['content']}) p['origin_url'] = \ reverse('origin', kwargs={'origin_id': provenance['origin']}) p['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': provenance['origin']}) p['origin_visit_url'] = \ reverse('origin-visit', kwargs={'origin_id': provenance['origin'], 'visit_id': provenance['visit']}) return p return _api_lookup( service.lookup_content_provenance, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=_enrich_revision) @api_route(r'/content/(?P.+)/filetype/', 'content-filetype') @api_doc.route('/content/filetype/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Filetype information (dict) for the matched content.""") def api_content_filetype(request, q): """Get information about the detected MIME type of a content object. """ return _api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/language/', 'content-language') @api_doc.route('/content/language/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Language information (dict) for the matched content.""") def api_content_language(request, q): """Get information about the detected (programming) language of a content object. """ return _api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/license/', 'content-license') @api_doc.route('/content/license/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""License information (dict) for the matched content.""") def api_content_license(request, q): """Get information about the detected license of a content object. """ return _api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/ctags/', 'content-ctags') @api_doc.route('/content/ctags/', tags=['upcoming']) @api_doc.arg('q', default='sha1:1fc6129a692e7a87b5450e2ba56e7669d0c5775d', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""Ctags symbol (dict) for the matched content.""") def api_content_ctags(request, q): """Get information about all `Ctags `_-style symbols defined in a content object. """ return _api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint) @api_route(r'/content/(?P.+)/raw/', 'content-raw') @api_doc.route('/content/raw/', handle_response=True) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.param('filename', default=None, argtype=api_doc.argtypes.str, doc='User\'s desired filename. If provided, the downloaded' ' content will get that filename.') @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc='The raw content data as an octet stream') def api_content_raw(request, q): """Get the raw content of a content object (AKA "blob"), as a byte sequence. """ def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) content_filetype = service.lookup_content_filetype(q) if not content_filetype: raise NotFoundExc('Content %s is not available for download.' % q) mimetype = content_filetype['mimetype'] if 'text/' not in mimetype: raise ForbiddenExc('Only textual content is available for download. ' 'Actual content mimetype is %s.' % mimetype) filename = utils.get_query_params(request).get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'content-symbol') @api_doc.route('/content/symbol/', tags=['upcoming']) @api_doc.arg('q', default='hello', argtype=api_doc.argtypes.str, argdoc="""An expression string to lookup in swh's raw content""") @api_doc.header('Link', doc=_doc_header_link) @api_doc.param('last_sha1', default=None, argtype=api_doc.argtypes.str, doc=_doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=_doc_arg_per_page) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""A list of dict whose content matches the expression. Each dict has the following keys: - id (bytes): identifier of the content - name (text): symbol whose content match the expression - kind (text): kind of the symbol that matched - lang (text): Language for that entry - line (int): Number line for the symbol """) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = utils.get_query_params(request).get('last_sha1', None) per_page = int(utils.get_query_params(request).get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): return service.lookup_expression(exp, last_sha1, per_page) symbols = _api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True)) if symbols: l = len(symbols) if l == per_page: - query_params = QueryDict('', mutable=True) + query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 if utils.get_query_params(request).get('per_page'): query_params['per_page'] = per_page result['headers'] = { - 'link-next': reverse('content-symbol', kwargs={'q': q}) + '?' + - query_params.urlencode() + 'link-next': reverse('content-symbol', kwargs={'q': q}, + query_params=query_params) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'content-known') @api_doc.route('/content/known/', tags=['hidden']) @api_doc.arg('q', default='adc83b19e793491b1c6ea0fd8b46cd9f32e592fc', argtype=api_doc.argtypes.sha1, argdoc='content identifier as a sha1 checksum') @api_doc.param('q', default=None, argtype=api_doc.argtypes.str, doc="""(POST request) An algo_hash:hash string, where algo_hash is one of sha1, sha1_git or sha256 and hash is the hash to search for in SWH""") @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""a dictionary with results (found/not found for each given identifier) and statistics about how many identifiers were found""") def api_check_content_known(request, q=None): """Check whether some content (AKA "blob") is present in the archive. Lookup can be performed by various means: - a GET request with one or several hashes, separated by ',' - a POST request with one or several hashes, passed as (multiple) values for parameter 'q' """ response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': data = request.data if hasattr(request, 'data') else request.DATA # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] l = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) search_stats['nbfiles'] = l search_stats['pct'] = (nbfound / l) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response @api_route(r'/content/(?P.+)/', 'content') @api_doc.route('/content/') @api_doc.arg('q', default='dc2830a9e72f23c1dfebef4413003221baa5fb62', argtype=api_doc.argtypes.algo_and_hash, argdoc=_doc_arg_content_id) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""known metadata for content identified by q""") def api_content_metadata(request, q): """Get information about a content (AKA "blob") object. """ return _api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=utils.enrich_content) diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index b63cbdc5..f9ff4d72 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,179 +1,178 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.http import QueryDict from swh.web.api.utils import reverse from swh.web.api import service, utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views import ( _api_lookup, _doc_exc_id_not_found, _doc_header_link, _doc_arg_last_elt, _doc_arg_per_page ) @api_route(r'/origin/(?P[0-9]+)/', 'origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)', 'origin') @api_doc.route('/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='origin identifier (when looking up by ID)') @api_doc.arg('origin_type', default='git', argtype=api_doc.argtypes.str, argdoc='origin type (when looking up by type+URL)') @api_doc.arg('origin_url', default='https://github.com/hylang/hy', argtype=api_doc.argtypes.path, argdoc='origin URL (when looking up by type+URL)') @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the origin corresponding to the given criteria""") def api_origin(request, origin_id=None, origin_type=None, origin_url=None): """Get information about a software origin. Software origins might be looked up by origin type and canonical URL (e.g., "git" + a "git clone" URL), or by their unique (but otherwise meaningless) identifier. """ ori_dict = { 'id': origin_id, 'type': origin_type, 'url': origin_url } ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]} if 'id' in ori_dict: error_msg = 'Origin with id %s not found.' % ori_dict['id'] else: error_msg = 'Origin with type %s and URL %s not found' % ( ori_dict['type'], ori_dict['url']) def _enrich_origin(origin): if 'id' in origin: o = origin.copy() o['origin_visits_url'] = \ reverse('origin-visits', kwargs={'origin_id': origin['id']}) return o return origin return _api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=_enrich_origin) @api_route(r'/origin/(?P[0-9]+)/visits/', 'origin-visits') @api_doc.route('/origin/visits/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.header('Link', doc=_doc_header_link) @api_doc.param('last_visit', default=None, argtype=api_doc.argtypes.int, doc=_doc_arg_last_elt) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=_doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.list, retdoc="""a list of dictionaries describing individual visits. For each visit, its identifier, timestamp (as UNIX time), outcome, and visit-specific URL for more information are given.""") def api_origin_visits(request, origin_id): """Get information about all visits of a given software origin. """ result = {} per_page = int(utils.get_query_params(request).get('per_page', '10')) last_visit = utils.get_query_params(request).get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page): return service.lookup_origin_visits( origin_id, last_visit=last_visit, per_page=per_page) def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_visit_url'] = reverse('origin-visit', kwargs={'origin_id': origin_id, 'visit_id': ov['visit']}) return ov r = _api_lookup( _lookup_origin_visits, origin_id, notfound_msg='No origin {} found'.format(origin_id), enrich_fn=_enrich_origin_visit) if r: l = len(r) if l == per_page: new_last_visit = r[-1]['visit'] - query_params = QueryDict('', mutable=True) + query_params = {} query_params['last_visit'] = new_last_visit if utils.get_query_params(request).get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('origin-visits', - kwargs={'origin_id': origin_id}) + - '?' + query_params.urlencode() + kwargs={'origin_id': origin_id}, + query_params=query_params) } result.update({ 'results': r }) return result @api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/', 'origin-visit') @api_doc.route('/origin/visit/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('visit_id', default=1, argtype=api_doc.argtypes.int, argdoc="""visit identifier, relative to the origin identified by origin_id""") @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""dictionary containing both metadata for the entire visit (e.g., timestamp as UNIX time, visit outcome, etc.) and what was at the software origin during the visit (i.e., a mapping from branches to other archive objects)""") def api_origin_visit(request, origin_id, visit_id): """Get information about a specific visit of a software origin. """ def _enrich_origin_visit(origin_visit): ov = origin_visit.copy() ov['origin_url'] = reverse('origin', kwargs={'origin_id': ov['origin']}) if 'occurrences' in ov: ov['occurrences'] = { k: utils.enrich_object(v) for k, v in ov['occurrences'].items() } return ov return _api_lookup( service.lookup_origin_visit, origin_id, visit_id, notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_id)), enrich_fn=_enrich_origin_visit) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 8129dbdd..d1df7f86 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,420 +1,419 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.http import QueryDict from django.http import HttpResponse from swh.web.api.utils import reverse from swh.web.api import service, utils from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views import ( _api_lookup, _doc_exc_id_not_found, _doc_header_link, _doc_arg_per_page, _doc_exc_bad_id, _doc_ret_revision_log, _doc_ret_revision_meta ) def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) else: # content result['content'] = utils.enrich_content(content) return result @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)' r'/ts/(?P.+)/log/', 'revision-origin-log') @api_doc.route('/revision/origin/log/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's SWH origin identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(Optional) The revision's branch name within the origin specified. Defaults to 'refs/heads/master'.""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""(Optional) A time or timestamp string to parse""") @api_doc.header('Link', doc=_doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=_doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=_doc_ret_revision_log) def api_revision_log_by(request, origin_id, branch_name='refs/heads/master', ts=None): """Show the commit log for a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/log``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ result = {} per_page = int(utils.get_query_params(request).get('per_page', '10')) if ts: ts = utils.parse_timestamp(ts) def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1): return service.lookup_revision_log_by(o_id, br, ts, limit) error_msg = 'No revision matching origin %s ' % origin_id error_msg += ', branch name %s' % branch_name error_msg += (' and time stamp %s.' % ts) if ts else '.' rev_get = _api_lookup( lookup_revision_log_by_with_limit, origin_id, branch_name, ts, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) l = len(rev_get) if l == per_page+1: revisions = rev_get[:-1] last_sha1_git = rev_get[-1]['id'] params = {k: v for k, v in {'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts, }.items() if v is not None} - query_params = QueryDict('', mutable=True) + query_params = {} query_params['sha1_git'] = last_sha1_git if utils.get_query_params(request).get('per_page'): query_params['per_page'] = per_page result['headers'] = { - 'link-next': reverse('revision-origin-log', kwargs=params) + - (('?' + query_params.urlencode()) if len(query_params) > 0 else '') + 'link-next': reverse('revision-origin-log', kwargs=params, + query_params=query_params) } else: revisions = rev_get result.update({'results': revisions}) return result @api_route(r'/revision/origin/(?P[0-9]+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/directory/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/directory/(?P.+)/', 'revision-directory') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)' r'/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/origin/directory/', tags=['hidden']) @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc="The revision's origin's SWH identifier") @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""The optional branch for the given origin (default to master""") @api_doc.arg('ts', default='2000-01-17T11:23:54+00:00', argtype=api_doc.argtypes.ts, argdoc="""Optional timestamp (default to the nearest time crawl of timestamp)""") @api_doc.arg('path', default='Dockerfile', argtype=api_doc.argtypes.path, argdoc='The path to the directory or file to display') @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""The metadata of the revision corresponding to the given criteria""") def api_directory_through_revision_origin(request, origin_id, branch_name="refs/heads/master", ts=None, path=None, with_data=False): """Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = utils.parse_timestamp(ts) return _revision_directory_by({'origin_id': origin_id, 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data) @api_route(r'/revision/origin/(?P[0-9]+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)' r'/branch/(?P.+)/ts/(?P.+)/', 'revision-origin') @api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/', 'revision-origin') @api_doc.route('/revision/origin/') @api_doc.arg('origin_id', default=1, argtype=api_doc.argtypes.int, argdoc='software origin identifier') @api_doc.arg('branch_name', default='refs/heads/master', argtype=api_doc.argtypes.path, argdoc="""(optional) fully-qualified branch name, e.g., "refs/heads/master". Defaults to the master branch.""") @api_doc.arg('ts', default=None, argtype=api_doc.argtypes.ts, argdoc="""(optional) timestamp close to which the revision pointed by the given branch should be looked up. Defaults to now.""") @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=_doc_ret_revision_meta) def api_revision_with_origin(request, origin_id, branch_name="refs/heads/master", ts=None): """Get information about a revision, searching for it based on software origin, branch name, and/or visit timestamp. This endpoint behaves like ``/revision``, but operates on the revision that has been found at a given software origin, close to a given point in time, pointed by a given branch. """ ts = utils.parse_timestamp(ts) return _api_lookup( service.lookup_revision_by, origin_id, branch_name, ts, notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/prev/(?P[0-9a-f/]+)/', 'revision-context') @api_doc.route('/revision/prev/', tags=['hidden']) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The revision's sha1_git identifier") @api_doc.arg('context', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc='The navigation breadcrumbs -- use at your own risk') @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc='The metadata of the revision identified by sha1_git') def api_revision_with_context(request, sha1_git, context): """Return information about revision with id sha1_git. """ def _enrich_revision(revision, context=context): return utils.enrich_revision(revision, context) return _api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git %s not found.' % sha1_git, enrich_fn=_enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/', 'revision') @api_doc.route('/revision/') @api_doc.arg('sha1_git', default='aafb16d69fd30ff58afdd69036a26047f3aebdc6', argtype=api_doc.argtypes.sha1_git, argdoc="revision identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=_doc_ret_revision_meta) def api_revision(request, sha1_git): """Get information about a revision. Revisions are identified by SHA1 checksums, compatible with Git commit identifiers. See ``revision_identifier`` in our `data model module `_ for details about how they are computed. """ return _api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'revision-raw-message') @api_doc.route('/revision/raw/', tags=['hidden'], handle_response=True) @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc="The queried revision's sha1_git identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc="""The message of the revision identified by sha1_git as a downloadable octet stream""") def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'revision-directory') @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'revision-directory') @api_doc.route('/revision/directory/') @api_doc.arg('sha1_git', default='ec72c666fb345ea5f21359b7bc063710ce558e39', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('dir_path', default='Documentation/BUG-HUNTING', argtype=api_doc.argtypes.path, argdoc="""path relative to the root directory of revision identifier by sha1_git""") @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc="""either a list of directory entries with their metadata, or the metadata of a single directory entry""") def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like ``/directory/``, but operates on the root directory associated to a given revision. """ return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'revision-log') @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f/]+)/log/', 'revision-log') @api_doc.route('/revision/log/') @api_doc.arg('sha1_git', default='37fc9e08d0c4b71807a4f1ecb06112e78d91c283', argtype=api_doc.argtypes.sha1_git, argdoc='revision identifier') @api_doc.arg('prev_sha1s', default='6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a', argtype=api_doc.argtypes.path, argdoc="""(Optional) Navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. """) @api_doc.header('Link', doc=_doc_header_link) @api_doc.param('per_page', default=10, argtype=api_doc.argtypes.int, doc=_doc_arg_per_page) @api_doc.raises(exc=api_doc.excs.badinput, doc=_doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=_doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=_doc_ret_revision_log) def api_revision_log(request, sha1_git, prev_sha1s=None): """Get a list of all revisions heading to a given one, i.e., show the commit log. """ result = {} per_page = int(utils.get_query_params(request).get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = _api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) l = len(rev_get) if l == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] - query_params = QueryDict('', mutable=True) + query_params = {} if utils.get_query_params(request).get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('revision-log', - kwargs={'sha1_git': new_last_sha1}) + - (('?' + query_params.urlencode()) if len(query_params) > 0 else '') + kwargs={'sha1_git': new_last_sha1}, + query_params=query_params) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = _api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result