diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 2323a0ee..1d9b7913 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,453 +1,437 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information - +from collections import defaultdict import functools from functools import wraps import os import re import textwrap from typing import List import docutils.nodes import docutils.parsers.rst import docutils.utils from rest_framework.decorators import api_view import sentry_sdk from swh.web.common.utils import parse_rst from swh.web.api.apiurls import APIUrls from swh.web.api.apiresponse import make_api_response, error_response class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ('param', 'parameter', 'arg', 'argument') request_json_object_roles = ('reqjsonobj', 'reqjson', 'jsonobj', '>json') response_json_array_roles = ('resjsonarr', '>jsonarr') query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query') request_header_roles = ('header', 'resheader', 'responseheader') status_code_roles = ('statuscode', 'status', 'code') - def __init__(self, document, urls, data): + def __init__(self, document, data): super().__init__(document) - self.urls = urls - self.url_idx = 0 self.data = data self.args_set = set() self.params_set = set() self.inputs_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False self.current_json_obj = None def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace('\n', ' ') # keep emphasized, strong and literal text par = par.replace('', '*') par = par.replace('', '*') par = par.replace('', '**') par = par.replace('', '**') par = par.replace('', '``') par = par.replace('', '``') # remove parsed document markups par = re.sub('<[^<]+?>', '', par) # api urls cleanup to generate valid links afterwards subs_made = 1 while subs_made: (par, subs_made) = re.subn(r'(:http:.*)(\(\w+\))', r'\1', par) subs_made = 1 while subs_made: (par, subs_made) = re.subn(r'(:http:.*)(\[.*\])', r'\1', par) par = par.replace('//', '/') # transform references to api endpoints into valid rst links par = re.sub(':http:get:`([^,]*)`', r'`<\1>`_', par) # transform references to some elements into bold text par = re.sub(':http:header:`(.*)`', r'**\1**', par) par = re.sub(':func:`(.*)`', r'**\1**', par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(' ') # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data['args'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data['params'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.params_set.add(field_data[2]) # Request data type if (field_data[0] in self.request_json_array_roles or field_data[0] in self.request_json_object_roles): # array if field_data[0] in self.request_json_array_roles: self.data['input_type'] = 'array' # object else: self.data['input_type'] = 'object' # input object field if field_data[2] not in self.inputs_set: self.data['inputs'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.inputs_set.add(field_data[2]) self.current_json_obj = self.data['inputs'][-1] # Response type if (field_data[0] in self.response_json_array_roles or field_data[0] in self.response_json_object_roles): # array if field_data[0] in self.response_json_array_roles: self.data['return_type'] = 'array' # object else: self.data['return_type'] = 'object' # returned object field if field_data[2] not in self.returns_set: self.data['returns'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.returns_set.add(field_data[2]) self.current_json_obj = self.data['returns'][-1] # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data['status_codes'].append({'code': field_data[1], # noqa 'doc': text}) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data['reqheaders'].append({'name': field_data[1], 'doc': text}) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {'name': field_data[1], 'doc': text} self.data['resheaders'].append(resheader) self.resheaders_set.add(field_data[1]) if resheader['name'] == 'Content-Type' and \ resheader['doc'] == 'application/octet-stream': self.data['return_type'] = 'octet stream' def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if (not text.startswith('**') and text not in self.data['description']): self.data['description'] += '\n\n' if self.data['description'] else '' # noqa self.data['description'] += text - # http methods - elif text.startswith('**Allowed HTTP Methods:**'): - text = text.replace('**Allowed HTTP Methods:**', '') - http_methods = text.strip().split(',') - http_methods = [m[m.find('`')+1:-1].upper() - for m in http_methods] - self.data['urls'].append({'rule': self.urls[self.url_idx], - 'methods': http_methods}) - self.url_idx += 1 def visit_literal_block(self, node): """ Visit literal blocks """ text = node.astext() # literal block in endpoint description if not self.field_list_visited: self.data['description'] += \ ':\n\n%s\n' % textwrap.indent(text, '\t') # extract example url if ':swh_web_api:' in text: self.data['examples'].append( '/api/1/' + re.sub('.*`(.*)`.*', r'\1', text)) def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: self.data['description'] += '\n\n' for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.data['description'] += '\t* %s\n' % line_text elif self.current_json_obj: self.current_json_obj['doc'] += '\n\n' for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.current_json_obj['doc'] += '\t\t* %s\n' % line_text self.current_json_obj = None def visit_warning(self, node): text = self.process_paragraph(str(node)) rst_warning = '\n\n.. warning::\n%s\n' % textwrap.indent(text, '\t') if rst_warning not in self.data['description']: self.data['description'] += rst_warning def unknown_visit(self, node): pass - def depart_document(self, node): - """ - End of parsing extra processing - """ - default_methods = ['GET', 'HEAD', 'OPTIONS'] - # ensure urls info is present and set default http methods - if not self.data['urls']: - for url in self.urls: - self.data['urls'].append({'rule': url, - 'methods': default_methods}) - def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split('\n') doc_lines_filtered = [] - urls = [] + urls = defaultdict(list) + default_http_methods = ['HEAD', 'OPTIONS'] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if '.. http' not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find('/'):] # emphasize url arguments for html rendering url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url) - urls.append(url) + method = re.search(r'http:(\w+)::', doc_line).group(1) + urls[url].append(method.upper()) + + for url, methods in urls.items(): + data['urls'].append({'rule': url, + 'methods': methods + default_http_methods}) # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst('\n'.join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info - visitor = _HTTPDomainDocVisitor(document, urls, data) + visitor = _HTTPDomainDocVisitor(document, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ def api_doc(route: str, noargs: bool = False, need_params: bool = False, tags: List[str] = [], handle_response: bool = False, api_version: str = '1'): """ Decorator for an API endpoint implementation used to generate a dedicated view displaying its HTML documentation. The documentation will be generated from the endpoint docstring based on sphinxcontrib-httpdomain format. Args: route: documentation page's route noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False need_params: specify the route requires query parameters otherwise errors will occur. It enables to avoid displaying the invalid response in its HTML documentation. Default to False. tags: Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable handle_response: indicate if the decorated function takes care of creating the HTTP response or delegates that task to the apiresponse module api_version: api version string """ tags_set = set(tags) # @api_doc() Decorator call def decorator(f): # if the route is not hidden, add it to the index if 'hidden' not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data['description'] first_dot_pos = doc_desc.find('.') APIUrls.add_doc_route(route, doc_desc[:first_dot_pos+1], noargs=noargs, api_version=api_version, tags=tags_set) # create a dedicated view to display endpoint HTML doc @api_view(['GET', 'HEAD']) @wraps(f) def doc_view(request): doc_data = get_doc_data(f, route, noargs) return make_api_response(request, None, doc_data) route_name = '%s-doc' % route[1:-1].replace('/', '-') urlpattern = f'^{api_version}{route}doc/$' view_name = 'api-%s-%s' % (api_version, route_name) APIUrls.add_url_pattern(urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = get_doc_data(f, route, noargs) try: response = f(request, **kwargs) except Exception as exc: sentry_sdk.capture_exception(exc) if request.accepted_media_type == 'text/html' and \ need_params and not request.query_params: response = None else: return error_response(request, exc, doc_data) if handle_response: return response else: return make_api_response(request, response, doc_data) return documented_view return decorator @functools.lru_cache(maxsize=32) def get_doc_data(f, route, noargs): """ Build documentation data for the decorated api endpoint function """ data = { 'description': '', 'response_data': None, 'urls': [], 'args': [], 'params': [], 'input_type': '', 'inputs': [], 'resheaders': [], 'reqheaders': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [], 'route': route, 'noargs': noargs } if not f.__doc__: raise APIDocException('apidoc: expected a docstring' ' for function %s' % (f.__name__,)) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if '.. http' not in f.__doc__: data['description'] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif 'SWH_WEB_DOC_BUILD' not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process input/returned object info for nicer html display inputs_list = '' returns_list = '' for inp in data['inputs']: # special case for array of non object type, for instance # :jsonarr string -: an array of string if ret['name'] != '-': returns_list += ('\t* **%s (%s)**: %s\n' % (ret['name'], ret['type'], ret['doc'])) data['inputs_list'] = inputs_list data['returns_list'] = returns_list return data DOC_COMMON_HEADERS = ''' :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request''' DOC_RESHEADER_LINK = ''' :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it ''' DEFAULT_SUBSTITUTIONS = { 'common_headers': DOC_COMMON_HEADERS, 'resheader_link': DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): f.__doc__ = f.__doc__.format(**{ **DEFAULT_SUBSTITUTIONS, **substitutions}) return f return decorator diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py index 5d956091..b98ba4ac 100644 --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -1,384 +1,372 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.common.exc import NotFoundExc from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api import utils from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/', 'api-1-content-filetype', checksum_args=['q']) @api_doc('/content/filetype/') @format_docstring() def api_content_filetype(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ Get information about the detected MIME type of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is `sha1`. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string encoding: the detected content encoding :>json string id: the **sha1** identifier of the content :>json string mimetype: the detected MIME type of the content :>json object tool: information about the tool used to detect the content filetype {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/` """ # noqa return api_lookup( service.lookup_content_filetype, q, notfound_msg='No filetype information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/', 'api-1-content-language', checksum_args=['q']) @api_doc('/content/language/') @format_docstring() def api_content_language(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/language/ Get information about the programming language used in a content object. Note: this endpoint currently returns no data. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json string lang: the detected programming language if any :>json object tool: information about the tool used to detect the programming language {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/` """ # noqa return api_lookup( service.lookup_content_language, q, notfound_msg='No language information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/', 'api-1-content-license', checksum_args=['q']) @api_doc('/content/license/') @format_docstring() def api_content_license(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/license/ Get information about the license of a content object. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information about the content :>json string id: the **sha1** identifier of the content :>json array licenses: array of strings containing the detected license names if any :>json object tool: information about the tool used to detect the license {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/` """ # noqa return api_lookup( service.lookup_content_license, q, notfound_msg='No license information found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/', 'api-1-content-ctags') @api_doc('/content/ctags/', tags=['hidden']) def api_content_ctags(request, q): """ Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( service.lookup_content_ctags, q, notfound_msg='No ctags symbol found for content {}.'.format(q), enrich_fn=utils.enrich_metadata_endpoint, request=request) @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/', 'api-1-content-raw', checksum_args=['q']) @api_doc('/content/raw/', handle_response=True) def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ # noqa def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response @api_route(r'/content/symbol/(?P.+)/', 'api-1-content-symbol') @api_doc('/content/symbol/', tags=['hidden']) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} last_sha1 = request.query_params.get('last_sha1', None) per_page = int(request.query_params.get('per_page', '10')) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): exp = list(service.lookup_expression(exp, last_sha1, per_page)) return exp if exp else None symbols = api_lookup( lookup_exp, q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True), request=request) if symbols: nb_symbols = len(symbols) if nb_symbols == per_page: query_params = {} new_last_sha1 = symbols[-1]['sha1'] query_params['last_sha1'] = new_last_sha1 if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-content-symbol', url_args={'q': q}, query_params=query_params, request=request) } result.update({ 'results': symbols }) return result @api_route(r'/content/known/search/', 'api-1-content-known', methods=['POST']) @api_route(r'/content/known/(?P(?!search).*)/', 'api-1-content-known') @api_doc('/content/known/', tags=['hidden']) @format_docstring() def api_check_content_known(request, q=None): """ .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ Check whether some content(s) (aka "blob(s)") is present in the archive based on its **sha1** checksum. :param string sha1: hexadecimal representation of the **sha1** checksum value for the content to check existence. Multiple values can be provided separated by ','. {common_headers} :>json array search_res: array holding the search result for each provided **sha1** :>json object search_stats: some statistics regarding the number of **sha1** provided and the percentage of those found in the archive - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **sha1** has been provided **Example:** .. parsed-literal:: :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` """ # noqa response = {'search_res': None, 'search_stats': None} search_stats = {'nbfiles': 0, 'pct': 0} search_res = None queries = [] # GET: Many hash separated values request if q: hashes = q.split(',') for v in hashes: queries.append({'filename': None, 'sha1': v}) # POST: Many hash requests in post form submission elif request.method == 'POST': data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: if k == 'q' and len(v) > 0: queries.append({'filename': None, 'sha1': v}) elif v != '': queries.append({'filename': k, 'sha1': v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] nb_queries = len(queries) for el in lookup: res_d = {'sha1': el['sha1'], 'found': el['found']} if 'filename' in el and el['filename']: res_d['filename'] = el['filename'] result.append(res_d) search_res = result nbfound = len([x for x in lookup if x['found']]) search_stats['nbfiles'] = nb_queries search_stats['pct'] = (nbfound / nb_queries) * 100 response['search_res'] = search_res response['search_stats'] = search_stats return response @api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/', 'api-1-content', checksum_args=['q']) @api_doc('/content/') @format_docstring() def api_content_metadata(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/ Get information about a content (aka a "blob") object. In the archive, a content object is identified based on checksum values computed using various hashing algorithms. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. {common_headers} :>json object checksums: object holding the computed checksum values for the requested content :>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/` for downloading the content raw bytes :>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` for getting information about the content MIME type :>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/` for getting information about the programming language used in the content :>json number length: length of the content in bytes :>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/` for getting information about the license of the content - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: curl -i :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` """ # noqa return api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q), request=request) diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py index 14e87981..01fec6b2 100644 --- a/swh/web/api/views/directory.py +++ b/swh/web/api/views/directory.py @@ -1,78 +1,76 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/directory/(?P[0-9a-f]+)/', 'api-1-directory', checksum_args=['sha1_git']) @api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', 'api-1-directory', checksum_args=['sha1_git']) @api_doc('/directory/') @format_docstring() def api_directory(request, sha1_git, path=None): """ .. http:get:: /api/1/directory/(sha1_git)/[(path)/] Get information about directory objects. Directories are identified by **sha1** checksums, compatible with Git directory identifiers. See :func:`swh.model.identifiers.directory_identifier` in our data model module for details about how they are computed. When given only a directory identifier, this endpoint returns information about the directory itself, returning its content (usually a list of directory entries). When given a directory identifier and a path, this endpoint returns information about the directory entry pointed by the relative path, starting path resolution from the given directory. :param string sha1_git: hexadecimal representation of the directory **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>jsonarr object checksums: object holding the computed checksum values for a directory entry (only for file entries) :>jsonarr string dir_id: **sha1_git** identifier of the requested directory :>jsonarr number length: length of a directory entry in bytes (only for file entries) for getting information about the content MIME type :>jsonarr string name: the directory entry name :>jsonarr number perms: permissions for the directory entry :>jsonarr string target: **sha1_git** identifier of the directory entry :>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type :>jsonarr string type: the type of the directory entry, can be either ``dir``, ``file`` or ``rev`` - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested directory can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/` """ # noqa if path: error_msg_path = ('Entry with path %s relative to directory ' 'with sha1_git %s not found.') % (path, sha1_git) return api_lookup( service.lookup_directory_with_path, sha1_git, path, notfound_msg=error_msg_path, enrich_fn=utils.enrich_directory, request=request) else: error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_directory, sha1_git, notfound_msg=error_msg_nopath, enrich_fn=utils.enrich_directory, request=request) diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py index 1de79301..a29c12c5 100644 --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -1,107 +1,105 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service, utils from swh.web.common.utils import ( resolve_swh_persistent_id, get_persistent_identifier ) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.exc import LargePayloadExc @api_route(r'/resolve/(?P.*)/', 'api-1-resolve-swh-pid') @api_doc('/resolve/') @format_docstring() def api_resolve_swh_pid(request, swh_id): """ .. http:get:: /api/1/resolve/(swh_id)/ Resolve a Software Heritage persistent identifier. Try to resolve a provided `persistent identifier `_ into an url for browsing the pointed archive object. If the provided identifier is valid, the existence of the object in the archive will also be checked. :param string swh_id: a Software Heritage persistent identifier :>json string browse_url: the url for browsing the pointed object :>json object metadata: object holding optional parts of the persistent identifier :>json string namespace: the persistent identifier namespace :>json string object_id: the hash identifier of the pointed object :>json string object_type: the type of the pointed object :>json number scheme_version: the scheme version of the persistent identifier {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid persistent identifier has been provided :statuscode 404: the pointed object does not exist in the archive **Example:** .. parsed-literal:: :swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/` """ # noqa # try to resolve the provided pid swh_id_resolved = resolve_swh_persistent_id(swh_id) # id is well-formed, now check that the pointed # object is present in the archive, NotFoundExc # will be raised otherwise swh_id_parsed = swh_id_resolved['swh_id_parsed'] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() swh_id_data['browse_url'] = request.build_absolute_uri( swh_id_resolved['browse_url']) return swh_id_data @api_route(r'/known/', 'api-1-swh-pid-known', methods=['POST']) @api_doc('/known/', tags=['hidden']) @format_docstring() def api_swh_pid_known(request): """ .. http:post:: /api/1/known/ Check if a list of Software Heritage persistent identifier is present in the archive depending on their id (sha1_git). Returns: A dictionary with: keys(str): Persistent identifier values(dict): A dictionary containing the key 'known'. (true if the pid is present, False otherwise) """ limit = 1000 if len(request.data) > limit: raise LargePayloadExc('The maximum number of PIDs this endpoint can ' 'receive is %s' % limit) persistent_ids = [get_persistent_identifier(pid) for pid in request.data] response = {str(pid): {'known': False} for pid in persistent_ids} # group pids by their type pids_by_type = utils.group_swh_persistent_identifiers(persistent_ids) # search for hashes not present in the storage missing_hashes = service.lookup_missing_hashes(pids_by_type) for pid in persistent_ids: if pid.object_id not in missing_hashes: response[str(pid)]['known'] = True return response diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py index 5bec10a4..c6729eb4 100644 --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -1,483 +1,459 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool from functools import partial from swh.web.common import service from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.utils import enrich_origin, enrich_origin_visit from swh.web.api.views.utils import api_lookup DOC_RETURN_ORIGIN = ''' :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string url: the origin canonical url ''' DOC_RETURN_ORIGIN_ARRAY = \ DOC_RETURN_ORIGIN.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT = ''' :>json string date: ISO representation of the visit date (in UTC) :>json str origin: the origin canonical url :>json string origin_url: link to get information about the origin :>jsonarr string snapshot: the snapshot identifier of the visit (may be null if status is not **full**). :>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/` in order to get information about the snapshot of the visit (may be null if status is not **full**). :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit ''' DOC_RETURN_ORIGIN_VISIT_ARRAY = \ DOC_RETURN_ORIGIN_VISIT.replace(':>json', ':>jsonarr') DOC_RETURN_ORIGIN_VISIT_ARRAY += ''' :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/` in order to get information about the visit ''' @api_route(r'/origins/', 'api-1-origins') @api_doc('/origins/', noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origins(request): """ .. http:get:: /api/1/origins/ Get list of archived software origins. .. warning:: This endpoint used to provide an `origin_from` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :query int origin_count: The maximum number of origins to return (default to 100, can not exceed 10000) {return_origin_array} {common_headers} {resheader_link} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origins?origin_count=500` """ origin_from = int(request.query_params.get('origin_from', '1')) origin_count = int(request.query_params.get('origin_count', '100')) origin_count = min(origin_count, 10000) results = api_lookup( service.lookup_origins, origin_from, origin_count+1, enrich_fn=enrich_origin, request=request) response = {'results': results, 'headers': {}} if len(results) > origin_count: origin_from = results.pop()['id'] response['headers']['link-next'] = reverse( 'api-1-origins', query_params={'origin_from': origin_from, 'origin_count': origin_count}, request=request) for result in results: if 'id' in result: del result['id'] return response @api_route(r'/origin/(?P.+)/get/', 'api-1-origin') @api_doc('/origin/') @format_docstring(return_origin=DOC_RETURN_ORIGIN) def api_origin(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/get/ Get information about a software origin. :param string origin_url: the origin url {return_origin} {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/get/` """ ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found.' % ori_dict['url'] return api_lookup( service.lookup_origin, ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request) @api_route(r'/origin/search/(?P.+)/', 'api-1-origin-search', throttle_scope='swh_api_origin_search') @api_doc('/origin/search/') @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_search(request, url_pattern): """ .. http:get:: /api/1/origin/search/(url_pattern)/ Search for software origins whose urls contain a provided string pattern or match a provided regular expression. The search is performed in a case insensitive way. .. warning:: This endpoint used to provide an `offset` query parameter, and guarantee an order on results. This is no longer true, and only the Link header should be used for paginating through results. :param string url_pattern: a string pattern :query int limit: the maximum number of found origins to return (bounded to 1000) :query boolean with_visit: if true, only return origins with at least one visit by Software heritage {return_origin_array} {common_headers} {resheader_link} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/search/python/?limit=2` """ result = {} limit = min(int(request.query_params.get('limit', '70')), 1000) page_token = request.query_params.get('page_token') with_visit = request.query_params.get('with_visit', 'false') (results, page_token) = api_lookup( service.search_origin, url_pattern, limit, bool(strtobool(with_visit)), page_token, enrich_fn=enrich_origin, request=request) if page_token is not None: query_params = {} query_params['limit'] = limit query_params['page_token'] = page_token result['headers'] = { 'link-next': reverse('api-1-origin-search', url_args={'url_pattern': url_pattern}, query_params=query_params, request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/metadata-search/', 'api-1-origin-metadata-search') @api_doc('/origin/metadata-search/', noargs=True, need_params=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_metadata_search(request): """ .. http:get:: /api/1/origin/metadata-search/ Search for software origins whose metadata (expressed as a JSON-LD/CodeMeta dictionary) match the provided criteria. For now, only full-text search on this dictionary is supported. :query str fulltext: a string that will be matched against origin metadata; results are ranked and ordered starting with the best ones. :query int limit: the maximum number of found origins to return (bounded to 100) {return_origin_array} {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ fulltext = request.query_params.get('fulltext', None) limit = min(int(request.query_params.get('limit', '70')), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) results = api_lookup(service.search_origin_metadata, fulltext, limit, request=request) return { 'results': results, } @api_route(r'/origin/(?P.*)/visits/', 'api-1-origin-visits') @api_doc('/origin/visits/') @format_docstring( return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) def api_origin_visits(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visits/ Get information about all visits of a software origin. Visits are returned sorted in descending order according to their date. :param str origin_url: a software origin URL :query int per_page: specify the number of visits to list, for pagination purposes :query int last_visit: visit to start listing from, for pagination purposes {common_headers} {resheader_link} {return_origin_visit_array} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visits/` """ result = {} origin_query = {'url': origin_url} notfound_msg = 'No origin {} found'.format(origin_url) url_args_next = {'origin_url': origin_url} per_page = int(request.query_params.get('per_page', '10')) last_visit = request.query_params.get('last_visit') if last_visit: last_visit = int(last_visit) def _lookup_origin_visits( origin_query, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits(origin_query) all_visits.reverse() visits = [] if not last_visit: visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): if v['visit'] == last_visit: visits = all_visits[i+1:i+1+per_page] break for v in visits: yield v results = api_lookup(_lookup_origin_visits, origin_query, notfound_msg=notfound_msg, enrich_fn=partial(enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True), request=request) if results: nb_results = len(results) if nb_results == per_page: new_last_visit = results[-1]['visit'] query_params = {} query_params['last_visit'] = new_last_visit if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-origin-visits', url_args=url_args_next, query_params=query_params, request=request) } result.update({ 'results': results }) return result @api_route(r'/origin/(?P.*)/visit/latest/', 'api-1-origin-visit-latest', throttle_scope='swh_api_origin_visit_latest') @api_doc('/origin/visit/latest/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit_latest(request, origin_url=None): """ .. http:get:: /api/1/origin/(origin_url)/visit/latest/ Get information about the latest visit of a software origin. :param str origin_url: a software origin URL :query boolean require_snapshot: if true, only return a visit with a snapshot {common_headers} {return_origin_visit} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ require_snapshot = request.query_params.get('require_snapshot', 'false') return api_lookup( service.lookup_origin_visit_latest, origin_url, bool(strtobool(require_snapshot)), notfound_msg=('No visit for origin {} found' .format(origin_url)), enrich_fn=partial(enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False), request=request) @api_route(r'/origin/(?P.*)/visit/(?P[0-9]+)/', 'api-1-origin-visit') @api_doc('/origin/visit/') @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit(request, visit_id, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/ Get information about a specific visit of a software origin. :param str origin_url: a software origin URL :param int visit_id: a visit identifier {common_headers} {return_origin_visit} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 404: requested origin or visit can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ return api_lookup( service.lookup_origin_visit, origin_url, int(visit_id), notfound_msg=('No visit {} for origin {} found' .format(visit_id, origin_url)), enrich_fn=partial(enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False), request=request) @api_route(r'/origin/(?P.+)' '/intrinsic-metadata', 'api-origin-intrinsic-metadata') @api_doc('/origin/intrinsic-metadata/') @format_docstring() def api_origin_intrinsic_metadata(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary). :param string origin_url: the origin url :>json string ???: intrinsic metadata field of the origin {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 404: requested origin can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata` """ # noqa ori_dict = { 'url': origin_url } error_msg = 'Origin with url %s not found' % ori_dict['url'] return api_lookup( service.lookup_origin_intrinsic_metadata, ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request) diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index be6240ed..0696169a 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,88 +1,85 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.views.decorators.cache import never_cache from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_requests ) @api_route(r'/origin/save/(?P.+)/url/(?P.+)/', 'api-1-save-origin', methods=['GET', 'POST'], throttle_scope='swh_save_origin') @never_cache @api_doc('/origin/save/') @format_docstring() def api_save_origin(request, visit_type, origin_url): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeed**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are ``git``, ``hg`` and ``svn``) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, - :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ if request.method == 'POST': sor = create_save_origin_request(visit_type, origin_url) del sor['id'] else: sor = get_save_origin_requests(visit_type, origin_url) for s in sor: del s['id'] return sor diff --git a/swh/web/api/views/release.py b/swh/web/api/views/release.py index 80b0a1f1..f4e827c7 100644 --- a/swh/web/api/views/release.py +++ b/swh/web/api/views/release.py @@ -1,60 +1,57 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup @api_route(r'/release/(?P[0-9a-f]+)/', 'api-1-release', checksum_args=['sha1_git']) @api_doc('/release/') @format_docstring() def api_release(request, sha1_git): """ .. http:get:: /api/1/release/(sha1_git)/ Get information about a release in the archive. Releases are identified by **sha1** checksums, compatible with Git tag identifiers. See :func:`swh.model.identifiers.release_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the release **sha1_git** identifier {common_headers} :>json object author: information about the author of the release :>json string date: ISO representation of the release date (in UTC) :>json string id: the release unique identifier :>json string message: the message associated to the release :>json string name: the name of the release :>json string target: the target identifier of the release :>json string target_type: the type of the target, can be either **release**, **revision**, **content**, **directory** :>json string target_url: a link to the adequate api url based on the target type - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested release can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`release/208f61cc7a5dbc9879ae6e5c2f95891e270f09ef/` """ error_msg = 'Release with sha1_git %s not found.' % sha1_git return api_lookup( service.lookup_release, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_release, request=request) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index 363a3e66..b1047e9a 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -1,260 +1,251 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from swh.web.common import service from swh.web.common.utils import reverse from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup DOC_RETURN_REVISION = ''' :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision ''' # noqa DOC_RETURN_REVISION_ARRAY = \ DOC_RETURN_REVISION.replace(':>json', ':>jsonarr') def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. Args: revision: dictionary of criterions representing a revision to lookup path: directory's path to lookup request_path: request path which holds the original context to limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of with_data: indicate to retrieve the content's raw data if path resolves to a content. """ def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( revision, path, limit=limit, with_data=with_data) content = result['content'] if result['type'] == 'dir': # dir_entries result['content'] = list(map(enrich_directory_local, content)) elif result['type'] == 'file': # content result['content'] = utils.enrich_content(content) elif result['type'] == 'rev': # revision result['content'] = utils.enrich_revision(content) return result @api_route(r'/revision/(?P[0-9a-f]+)/', 'api-1-revision', checksum_args=['sha1_git']) @api_doc('/revision/') @format_docstring(return_revision=DOC_RETURN_REVISION) def api_revision(request, sha1_git): """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier {common_headers} {return_revision} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ # noqa return api_lookup( service.lookup_revision, sha1_git, notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), enrich_fn=utils.enrich_revision, request=request) @api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'api-1-revision-raw-message', checksum_args=['sha1_git']) @api_doc('/revision/raw/', tags=['hidden'], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) response = HttpResponse(raw['message'], content_type='application/octet-stream') response['Content-disposition'] = \ 'attachment;filename=rev_%s_raw' % sha1_git return response @api_route(r'/revision/(?P[0-9a-f]+)/directory/', 'api-1-revision-directory', checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', 'api-1-revision-directory', checksum_args=['sha1_git']) @api_doc('/revision/directory/') @format_docstring() def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] Get information about directory (entry) objects associated to revisions. Each revision is associated to a single "root" directory. This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string path: optional parameter to get information about the directory entry pointed by that relative path {common_headers} :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` """ # noqa return _revision_directory_by({'sha1_git': sha1_git}, dir_path, request.path, with_data=with_data) @api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-1-revision-log', checksum_args=['sha1_git']) @api_route(r'/revision/(?P[0-9a-f]+)' r'/prev/(?P[0-9a-f]*/*)/log/', 'api-1-revision-log', checksum_args=['sha1_git', 'prev_sha1s']) @api_doc('/revision/log/') @format_docstring(return_revision_array=DOC_RETURN_REVISION_ARRAY) def api_revision_log(request, sha1_git, prev_sha1s=None): """ .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ Get a list of all revisions heading to a given one, in other words show the commit log. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :param string prev_sha1s: optional parameter representing the navigation breadcrumbs (descendant revisions previously visited). If multiple values, use / as delimiter. If provided, revisions information will be added at the beginning of the returned list. :query int per_page: number of elements in the returned list, for pagination purpose {common_headers} {resheader_link} {return_revision_array} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` """ # noqa result = {} per_page = int(request.query_params.get('per_page', '10')) def lookup_revision_log_with_limit(s, limit=per_page+1): return service.lookup_revision_log(s, limit) error_msg = 'Revision with sha1_git %s not found.' % sha1_git rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_revision, request=request) nb_rev = len(rev_get) if nb_rev == per_page+1: rev_backward = rev_get[:-1] new_last_sha1 = rev_get[-1]['id'] query_params = {} if request.query_params.get('per_page'): query_params['per_page'] = per_page result['headers'] = { 'link-next': reverse('api-1-revision-log', url_args={'sha1_git': new_last_sha1}, query_params=query_params, request=request) } else: rev_backward = rev_get if not prev_sha1s: # no nav breadcrumbs, so we're done revisions = rev_backward else: rev_forward_ids = prev_sha1s.split('/') rev_forward = api_lookup( service.lookup_revision_multiple, rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision, request=request) revisions = rev_forward + rev_backward result.update({ 'results': revisions }) return result diff --git a/swh/web/api/views/snapshot.py b/swh/web/api/views/snapshot.py index 6027846b..bd6ff0ad 100644 --- a/swh/web/api/views/snapshot.py +++ b/swh/web/api/views/snapshot.py @@ -1,97 +1,94 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.common.utils import reverse from swh.web.config import get_config from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.utils import enrich_snapshot from swh.web.api.views.utils import api_lookup @api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-1-snapshot', checksum_args=['snapshot_id']) @api_doc('/snapshot/') @format_docstring() def api_snapshot(request, snapshot_id): """ .. http:get:: /api/1/snapshot/(snapshot_id)/ Get information about a snapshot in the archive. A snapshot is a set of named branches, which are pointers to objects at any level of the Software Heritage DAG. It represents a full picture of an origin at a given time. As well as pointing to other objects in the Software Heritage DAG, branches can also be aliases, in which case their target is the name of another branch in the same snapshot, or dangling, in which case the target is unknown. A snapshot identifier is a salted sha1. See :func:`swh.model.identifiers.snapshot_identifier` in our data model module for details about how they are computed. :param sha1 snapshot_id: a snapshot identifier :query str branches_from: optional parameter used to skip branches whose name is lesser than it before returning them :query int branches_count: optional parameter used to restrain the amount of returned branches (default to 1000) :query str target_types: optional comma separated list parameter used to filter the target types of branch to return (possible values that can be contained in that list are ``content``, ``directory``, ``revision``, ``release``, ``snapshot`` or ``alias``) {common_headers} {resheader_link} :>json object branches: object containing all branches associated to the snapshot,for each of them the associated target type and id are given but also a link to get information about that target :>json string id: the unique identifier of the snapshot - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid snapshot identifier has been provided :statuscode 404: requested snapshot can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`snapshot/6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a/` """ snapshot_content_max_size = get_config()['snapshot_content_max_size'] branches_from = request.GET.get('branches_from', '') branches_count = int(request.GET.get('branches_count', snapshot_content_max_size)) target_types = request.GET.get('target_types', None) target_types = target_types.split(',') if target_types else None results = api_lookup( service.lookup_snapshot, snapshot_id, branches_from, branches_count, target_types, notfound_msg='Snapshot with id {} not found.'.format(snapshot_id), enrich_fn=enrich_snapshot, request=request) response = {'results': results, 'headers': {}} if results['next_branch'] is not None: response['headers']['link-next'] = reverse( 'api-1-snapshot', url_args={'snapshot_id': snapshot_id}, query_params={'branches_from': results['next_branch'], 'branches_count': branches_count, 'target_types': target_types}, request=request) return response diff --git a/swh/web/api/views/stat.py b/swh/web/api/views/stat.py index edddcc96..12477717 100644 --- a/swh/web/api/views/stat.py +++ b/swh/web/api/views/stat.py @@ -1,53 +1,50 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route @api_route(r'/stat/counters/', 'api-1-stat-counters') @api_doc('/stat/counters/', noargs=True) @format_docstring() def api_stats(request): """ .. http:get:: /api/1/stat/counters/ Get statistics about the content of the archive. :>json number content: current number of content objects (aka files) in the archive :>json number directory: current number of directory objects in the archive :>json number origin: current number of software origins (an origin is a "place" where code source can be found, e.g. a git repository, a tarball, ...) in the archive :>json number origin_visit: current number of visits on software origins to fill the archive :>json number person: current number of persons (code source authors or committers) in the archive :>json number release: current number of releases objects in the archive :>json number revision: current number of revision objects (aka commits) in the archive :>json number skipped_content: current number of content objects (aka files) which where not inserted in the archive :>json number snapshot: current number of snapshot objects (aka set of named branches) in the archive {common_headers} - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error **Example:** .. parsed-literal:: :swh_web_api:`stat/counters/` """ return service.stat_counters() diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index 6e33ac92..7e5e544e 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,252 +1,240 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import redirect from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.common import service, query from swh.web.common.utils import reverse from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.api.views.utils import api_lookup # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split('_')[0].title() if request.method == 'GET': return api_lookup( service.vault_progress, obj_type, obj_id, notfound_msg=("{} '{}' was never requested." .format(object_name, hex_id)), request=request) elif request.method == 'POST': email = request.POST.get('email', request.GET.get('email', None)) return api_lookup( service.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found." .format(object_name, hex_id)), request=request) @api_route(r'/vault/directory/(?P[0-9a-f]+)/', 'api-1-vault-cook-directory', methods=['GET', 'POST'], checksum_args=['dir_id'], throttle_scope='swh_vault_cooking') @never_cache @api_doc('/vault/directory/') @format_docstring() def api_vault_cook_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/ .. http:post:: /api/1/vault/directory/(dir_id)/ Request the cooking of an archive for a directory or check its cooking status. That endpoint enables to create a vault cooking task for a directory through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/directory/(dir_id)/raw/`. Then to extract the cooked directory in the current one, use:: $ tar xvf path/to/directory.tar.gz :param string dir_id: the directory's sha1 identifier :query string email: e-mail to notify when the archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/directory/(dir_id)/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (either **new**, **pending**, **done** or **failed**) :>json string obj_id: the identifier of the object to cook - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, - :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'directory', obj_id) res['fetch_url'] = reverse('api-1-vault-fetch-directory', url_args={'dir_id': dir_id}) return res @api_route(r'/vault/directory/(?P[0-9a-f]+)/raw/', 'api-1-vault-fetch-directory', checksum_args=['dir_id']) @api_doc('/vault/directory/raw/', handle_response=True) def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ Fetch the cooked archive for a directory. See :http:get:`/api/1/vault/directory/(dir_id)/` to get more details on directory cooking. :param string dir_id: the directory's sha1 identifier :resheader Content-Type: application/octet-stream - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid directory identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'directory', obj_id, notfound_msg="Directory with ID '{}' not found.".format(dir_id), request=request) fname = '{}.tar.gz'.format(dir_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response @api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/', 'api-1-vault-cook-revision_gitfast', methods=['GET', 'POST'], checksum_args=['rev_id'], throttle_scope='swh_vault_cooking') @never_cache @api_doc('/vault/revision/gitfast/') @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/ .. http:post:: /api/1/vault/revision/(rev_id)/gitfast/ Request the cooking of a gitfast archive for a revision or check its cooking status. That endpoint enables to create a vault cooking task for a revision through a POST request or check the status of a previously created one through a GET request. Once the cooking task has been executed, the resulting gitfast archive can be downloaded using the dedicated endpoint :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`. Then to import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD :param string rev_id: the revision's sha1 identifier :query string email: e-mail to notify when the gitfast archive is ready {common_headers} :>json string fetch_url: the url from which to download the archive once it has been cooked (see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`) :>json string obj_type: the type of object to cook (directory or revision) :>json string progress_message: message describing the cooking task progress :>json number id: the cooking task id :>json string status: the cooking task status (new/pending/done/failed) :>json string obj_id: the identifier of the object to cook - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, - :http:method:`head`, :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'revision_gitfast', obj_id) res['fetch_url'] = reverse('api-1-vault-fetch-revision_gitfast', url_args={'rev_id': rev_id}) return res @api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/raw/', 'api-1-vault-fetch-revision_gitfast', checksum_args=['rev_id']) @api_doc('/vault/revision/gitfast/raw/', handle_response=True) def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ Fetch the cooked gitfast archive for a revision. See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more details on directory cooking. :param string rev_id: the revision's sha1 identifier :resheader Content-Type: application/octet-stream - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, - :http:method:`options` - :statuscode 200: no error :statuscode 400: an invalid revision identifier has been provided :statuscode 404: requested directory did not receive any cooking request yet (in case of GET) or can not be found in the archive (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'revision_gitfast', obj_id, notfound_msg="Revision with ID '{}' not found.".format(rev_id), request=request) fname = '{}.gitfast.gz'.format(rev_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response @api_route(r'/vault/revision_gitfast/(?P[0-9a-f]+)/raw/', 'api-1-vault-revision_gitfast-raw', checksum_args=['rev_id']) @api_doc('/vault/revision_gitfast/raw/', tags=['hidden'], handle_response=True) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ rev_gitfast_raw_url = reverse('api-1-vault-fetch-revision_gitfast', url_args={'rev_id': rev_id}) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/tests/api/test_apidoc.py b/swh/web/tests/api/test_apidoc.py index 5f907e6f..67e57427 100644 --- a/swh/web/tests/api/test_apidoc.py +++ b/swh/web/tests/api/test_apidoc.py @@ -1,454 +1,452 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import textwrap import pytest from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api.apidoc import api_doc, _parse_httpdomain_doc from swh.web.api.apiurls import api_route from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc from swh.web.common.utils import reverse, prettify_html from swh.web.tests.django_asserts import assert_template_used _httpdomain_doc = """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision - **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head` - :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Request:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ _exception_http_code = { BadInputExc: 400, ForbiddenExc: 403, NotFoundExc: 404, Exception: 500, StorageAPIError: 503, StorageDBError: 503, } def test_apidoc_nodoc_failure(): with pytest.raises(Exception): @api_doc('/my/nodoc/url/') def apidoc_nodoc_tester(request, arga=0, argb=0): return Response(arga + argb) @api_route(r'/some/(?P[0-9]+)/(?P[0-9]+)/', 'api-1-some-doc-route') @api_doc('/some/doc/route/') def apidoc_route(request, myarg, myotherarg, akw=0): """ Sample doc """ return {'result': int(myarg) + int(myotherarg) + akw} def test_apidoc_route_doc(client): url = reverse('api-1-some-doc-route-doc') rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') def test_apidoc_route_fn(api_client): url = reverse('api-1-some-doc-route', url_args={'myarg': 1, 'myotherarg': 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data @api_route(r'/test/error/(?P.+)/', 'api-1-test-error') @api_doc('/test/error/') def apidoc_test_error_route(request, exc_name): """ Sample doc """ for e in _exception_http_code.keys(): if e.__name__ == exc_name: raise e('Error') def test_apidoc_error(api_client): for exc, code in _exception_http_code.items(): url = reverse('api-1-test-error', url_args={'exc_name': exc.__name__}) rv = api_client.get(url) assert rv.status_code == code, rv.data @api_route(r'/some/full/(?P[0-9]+)/(?P[0-9]+)/', 'api-1-some-complete-doc-route') @api_doc('/some/complete/doc/route/') def apidoc_full_stack(request, myarg, myotherarg, akw=0): """ Sample doc """ return {'result': int(myarg) + int(myotherarg) + akw} def test_apidoc_full_stack_doc(client): url = reverse('api-1-some-complete-doc-route-doc') rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') def test_apidoc_full_stack_fn(api_client): url = reverse('api-1-some-complete-doc-route', url_args={'myarg': 1, 'myotherarg': 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data @api_route(r'/test/post/only/', 'api-1-test-post-only', methods=['POST']) @api_doc('/test/post/only/') def apidoc_test_post_only(request, exc_name): """ Sample doc """ return {'result': 'some data'} def test_apidoc_post_only(client): # a dedicated view accepting GET requests should have # been created to display the HTML documentation url = reverse('api-1-test-post-only-doc') rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') def test_api_doc_parse_httpdomain(): doc_data = { 'description': '', 'urls': [], 'args': [], 'params': [], 'resheaders': [], 'reqheaders': [], 'input_type': '', 'inputs': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [] } _parse_httpdomain_doc(_httpdomain_doc, doc_data) expected_urls = [{ 'rule': '/api/1/revision/ **\\(sha1_git\\)** /', - 'methods': ['GET', 'HEAD'] + 'methods': ['GET', 'HEAD', 'OPTIONS'] }] assert 'urls' in doc_data assert doc_data['urls'] == expected_urls expected_description = ('Get information about a revision in the archive. ' 'Revisions are identified by **sha1** checksums, ' 'compatible with Git commit identifiers. See ' '**swh.model.identifiers.revision_identifier** in ' 'our data model module for details about how they ' 'are computed.') assert 'description' in doc_data assert doc_data['description'] == expected_description expected_args = [{ 'name': 'sha1_git', 'type': 'string', 'doc': ('hexadecimal representation of the revision ' '**sha1_git** identifier') }] assert 'args' in doc_data assert doc_data['args'] == expected_args expected_params = [] assert 'params' in doc_data assert doc_data['params'] == expected_params expected_reqheaders = [{ 'doc': ('the requested response content type, either ' '``application/json`` (default) or ``application/yaml``'), 'name': 'Accept' }] assert 'reqheaders' in doc_data assert doc_data['reqheaders'] == expected_reqheaders expected_resheaders = [{ 'doc': 'this depends on **Accept** header of request', 'name': 'Content-Type' }] assert 'resheaders' in doc_data assert doc_data['resheaders'] == expected_resheaders expected_statuscodes = [ { 'code': '200', 'doc': 'no error' }, { 'code': '400', 'doc': 'an invalid **sha1_git** value has been provided' }, { 'code': '404', 'doc': 'requested revision can not be found in the archive' } ] assert 'status_codes' in doc_data assert doc_data['status_codes'] == expected_statuscodes expected_input_type = 'object' assert 'input_type' in doc_data assert doc_data['input_type'] == expected_input_type expected_inputs = [ { 'name': 'n', 'type': 'int', 'doc': 'sample input integer' }, { 'name': 's', 'type': 'string', 'doc': 'sample input string' }, { 'name': 'a', 'type': 'array', 'doc': 'sample input array' }, ] assert 'inputs' in doc_data assert doc_data['inputs'] == expected_inputs expected_return_type = 'object' assert 'return_type' in doc_data assert doc_data['return_type'] == expected_return_type expected_returns = [ { 'name': 'author', 'type': 'object', 'doc': 'information about the author of the revision' }, { 'name': 'committer', 'type': 'object', 'doc': 'information about the committer of the revision' }, { 'name': 'committer_date', 'type': 'string', 'doc': 'ISO representation of the commit date (in UTC)' }, { 'name': 'date', 'type': 'string', 'doc': 'ISO representation of the revision date (in UTC)' }, { 'name': 'directory', 'type': 'string', 'doc': 'the unique identifier that revision points to' }, { 'name': 'directory_url', 'type': 'string', 'doc': ('link to ``_ to get information about ' 'the directory associated to the revision') }, { 'name': 'id', 'type': 'string', 'doc': 'the revision unique identifier' }, { 'name': 'merge', 'type': 'boolean', 'doc': 'whether or not the revision corresponds to a merge commit' }, { 'name': 'message', 'type': 'string', 'doc': 'the message associated to the revision' }, { 'name': 'parents', 'type': 'array', 'doc': ('the parents of the revision, i.e. the previous revisions ' 'that head directly to it, each entry of that array ' 'contains an unique parent revision identifier but also a ' 'link to ``_ to get more information ' 'about it') }, { 'name': 'type', 'type': 'string', 'doc': 'the type of the revision' } ] assert 'returns' in doc_data assert doc_data['returns'] == expected_returns expected_examples = [ '/api/1/revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/' ] assert 'examples' in doc_data assert doc_data['examples'] == expected_examples @api_route(r'/post/endpoint/', 'api-1-post-endpoint', methods=['POST']) @api_doc('/post/endpoint/') def apidoc_test_post_endpoint(request): """ .. http:post:: /api/1/post/endpoint/ Endpoint documentation :json object : an object whose keys are input persistent identifiers and values objects with the following keys: * **known (bool)**: whether the object was found """ pass def test_apidoc_input_output_doc(client): url = reverse('api-1-post-endpoint-doc') rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') input_html_doc = textwrap.indent(( '
\n' '
\n' ' array\n' '
\n' '
\n' '

\n' ' Input array of pids\n' '

\n' '
\n' '
\n' ), ' '*7) output_html_doc = textwrap.indent(( '
\n' '
\n' ' object\n' '
\n' '
\n' '

\n' ' an object containing the following keys:\n' '

\n' '
\n' '
\n' '
    \n' '
  • \n' '

    \n' ' \n' ' <swh_pid> (object)\n' ' \n' ' : an object whose keys are input persistent identifiers' ' and values objects with the following keys:\n' '

    \n' '
    \n' '
      \n' '
    • \n' '

      \n' ' \n' ' known (bool)\n' ' \n' ' : whether the object was found\n' '

      \n' '
    • \n' '
    \n' '
    \n' '
  • \n' '
\n' '
\n' '
\n' '
\n' '
\n' ), ' '*7) html = prettify_html(rv.content) assert input_html_doc in html assert output_html_doc in html