diff --git a/cypress/integration/origin-search.spec.js b/cypress/integration/origin-search.spec.js index ec6982e6..ddc3e0f1 100644 --- a/cypress/integration/origin-search.spec.js +++ b/cypress/integration/origin-search.spec.js @@ -1,429 +1,429 @@ /** * Copyright (C) 2019-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ const nonExistentText = 'NoMatchExists'; let origin; let url; function doSearch(searchText) { cy.get('#origins-url-patterns') .type(searchText) .get('.swh-search-icon') .click(); } function searchShouldRedirect(searchText, redirectUrl) { doSearch(searchText); cy.location('pathname') .should('equal', redirectUrl); } function searchShouldShowNotFound(searchText, msg) { doSearch(searchText); cy.get('#swh-no-result') .should('be.visible') .and('contain', msg); } function stubOriginVisitLatestRequests() { cy.server(); cy.route({ method: 'GET', url: '**/visit/latest/**', response: { type: 'tar' } }).as('originVisitLatest'); } describe('Test origin-search', function() { before(function() { origin = this.origin[0]; url = this.Urls.browse_search(); }); beforeEach(function() { cy.visit(url); }); it('should show in result when url is searched', function() { cy.get('#origins-url-patterns') .type(origin.url); cy.get('.swh-search-icon') .click(); cy.get('#origin-search-results') .should('be.visible'); cy.contains('tr', origin.url) .should('be.visible') .find('.swh-visit-status') .find('i') .should('have.class', 'fa-check') .and('have.attr', 'title', 'Origin has at least one full visit by Software Heritage'); }); it('should show not found message when no repo matches', function() { searchShouldShowNotFound(nonExistentText, 'No origins matching the search criteria were found.'); }); it('should add appropriate URL parameters', function() { // Check all three checkboxes and check if // correct url params are added cy.get('#swh-search-origins-with-visit') .check() .get('#swh-filter-empty-visits') .check() .get('#swh-search-origin-metadata') .check() .then(() => { const searchText = origin.url; doSearch(searchText); cy.location('search').then(locationSearch => { const urlParams = new URLSearchParams(locationSearch); const query = urlParams.get('q'); const withVisit = urlParams.has('with_visit'); const withContent = urlParams.has('with_content'); const searchMetadata = urlParams.has('search_metadata'); assert.strictEqual(query, searchText); assert.strictEqual(withVisit, true); assert.strictEqual(withContent, true); assert.strictEqual(searchMetadata, true); }); }); }); it('should not send request to the resolve endpoint', function() { cy.server(); cy.route({ method: 'GET', - url: `${this.Urls.api_1_resolve()}**` + url: `${this.Urls.api_1_resolve_swh_pid('').slice(0, -1)}**` }).as('resolvePid'); cy.route({ method: 'GET', - url: `${this.Urls.api_1_origin_search()}**` + url: `${this.Urls.api_1_origin_search(origin.url)}**` }).as('searchOrigin'); cy.get('#origins-url-patterns') .type(origin.url); cy.get('.swh-search-icon') .click(); cy.wait('@searchOrigin'); cy.xhrShouldBeCalled('resolvePid', 0); cy.xhrShouldBeCalled('searchOrigin', 1); }); context('Test pagination', function() { it('should not paginate if there are not many results', function() { // Setup search cy.get('#swh-search-origins-with-visit') .uncheck() .get('#swh-filter-empty-visits') .uncheck() .then(() => { const searchText = 'libtess'; // Get first page of results doSearch(searchText); cy.get('.swh-search-result-entry') .should('have.length', 1); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://github.com/memononen/libtess2'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); }); }); it('should paginate forward when there are many results', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck() .get('#swh-filter-empty-visits') .uncheck() .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/101'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/200'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get third (and last) page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 50); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/201'); cy.get('.swh-search-result-entry#origin-49 td a') .should('have.text', 'https://many.origins/250'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); }); }); it('should paginate backward from a middle page', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck() .get('#swh-filter-empty-visits') .uncheck() .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get first page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); }); }); it('should paginate backward from the last page', function() { stubOriginVisitLatestRequests(); // Setup search cy.get('#swh-search-origins-with-visit') .uncheck() .get('#swh-filter-empty-visits') .uncheck() .then(() => { const searchText = 'many.origins'; // Get first page of results doSearch(searchText); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get second page of results cy.get('#origins-next-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get third (and last) page of results cy.get('#origins-next-results-button a') .click(); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('have.class', 'disabled'); // Get second page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/101'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/200'); cy.get('#origins-prev-results-button') .should('not.have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); // Get first page of results again cy.get('#origins-prev-results-button a') .click(); cy.wait('@originVisitLatest'); cy.get('.swh-search-result-entry') .should('have.length', 100); cy.get('.swh-search-result-entry#origin-0 td a') .should('have.text', 'https://many.origins/1'); cy.get('.swh-search-result-entry#origin-99 td a') .should('have.text', 'https://many.origins/100'); cy.get('#origins-prev-results-button') .should('have.class', 'disabled'); cy.get('#origins-next-results-button') .should('not.have.class', 'disabled'); }); }); }); context('Test valid persistent ids', function() { it('should resolve directory', function() { const redirectUrl = this.Urls.browse_directory(origin.content[0].directory); const persistentId = `swh:1:dir:${origin.content[0].directory}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve revision', function() { const redirectUrl = this.Urls.browse_revision(origin.revisions[0]); const persistentId = `swh:1:rev:${origin.revisions[0]}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve snapshot', function() { const redirectUrl = this.Urls.browse_snapshot_directory(origin.snapshot); const persistentId = `swh:1:snp:${origin.snapshot}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should resolve content', function() { const redirectUrl = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`); const persistentId = `swh:1:cnt:${origin.content[0].sha1git}`; searchShouldRedirect(persistentId, redirectUrl); }); it('should not send request to the search endpoint', function() { cy.server(); const persistentId = `swh:1:rev:${origin.revisions[0]}`; cy.route({ method: 'GET', - url: `${this.Urls.api_1_resolve()}**` + url: this.Urls.api_1_resolve_swh_pid(persistentId) }).as('resolvePid'); cy.route({ method: 'GET', - url: `${this.Urls.api_1_origin_search()}**` + url: `${this.Urls.api_1_origin_search('').slice(0, -1)}**` }).as('searchOrigin'); cy.get('#origins-url-patterns') .type(persistentId); cy.get('.swh-search-icon') .click(); cy.wait('@resolvePid'); cy.xhrShouldBeCalled('resolvePid', 1); cy.xhrShouldBeCalled('searchOrigin', 0); }); }); context('Test invalid persistent ids', function() { it('should show not found for directory', function() { const persistentId = `swh:1:dir:${this.unarchivedRepo.rootDirectory}`; const msg = `Directory with sha1_git ${this.unarchivedRepo.rootDirectory} not found`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for snapshot', function() { const persistentId = `swh:1:snp:${this.unarchivedRepo.snapshot}`; const msg = `Snapshot with id ${this.unarchivedRepo.snapshot} not found!`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for revision', function() { const persistentId = `swh:1:rev:${this.unarchivedRepo.revision}`; const msg = `Revision with sha1_git ${this.unarchivedRepo.revision} not found.`; searchShouldShowNotFound(persistentId, msg); }); it('should show not found for content', function() { const persistentId = `swh:1:cnt:${this.unarchivedRepo.content[0].sha1git}`; const msg = `Content with sha1_git checksum equals to ${this.unarchivedRepo.content[0].sha1git} not found!`; searchShouldShowNotFound(persistentId, msg); }); }); }); diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 970a843a..f1875dc8 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,400 +1,405 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import docutils.nodes -import docutils.parsers.rst -import docutils.utils + import functools from functools import wraps import os import re import textwrap +from typing import List + +import docutils.nodes +import docutils.parsers.rst +import docutils.utils from rest_framework.decorators import api_view import sentry_sdk from swh.web.common.utils import parse_rst from swh.web.api.apiurls import APIUrls from swh.web.api.apiresponse import make_api_response, error_response class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ('param', 'parameter', 'arg', 'argument') response_json_object_roles = ('resjsonobj', 'resjson', '>jsonobj', '>json') response_json_array_roles = ('resjsonarr', '>jsonarr') query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query') request_header_roles = ('header', 'resheader', 'responseheader') status_code_roles = ('statuscode', 'status', 'code') def __init__(self, document, urls, data): super().__init__(document) self.urls = urls self.url_idx = 0 self.data = data self.args_set = set() self.params_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace('\n', ' ') # keep emphasized, strong and literal text par = par.replace('', '*') par = par.replace('', '*') par = par.replace('', '**') par = par.replace('', '**') par = par.replace('', '``') par = par.replace('', '``') # remove parsed document markups par = re.sub('<[^<]+?>', '', par) # api urls cleanup to generate valid links afterwards par = re.sub(r'\(\w+\)', '', par) par = re.sub(r'\[.*\]', '', par) par = par.replace('//', '/') # transform references to api endpoints into valid rst links par = re.sub(':http:get:`([^,]*)`', r'`<\1>`_', par) # transform references to some elements into bold text par = re.sub(':http:header:`(.*)`', r'**\1**', par) par = re.sub(':func:`(.*)`', r'**\1**', par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(' ') # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data['args'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data['params'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.params_set.add(field_data[2]) # Response type if field_data[0] in self.response_json_array_roles or \ field_data[0] in self.response_json_object_roles: # array if field_data[0] in self.response_json_array_roles: self.data['return_type'] = 'array' # object else: self.data['return_type'] = 'object' # returned object field if field_data[2] not in self.returns_set: self.data['returns'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.returns_set.add(field_data[2]) # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data['status_codes'].append({'code': field_data[1], # noqa 'doc': text}) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data['reqheaders'].append({'name': field_data[1], 'doc': text}) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {'name': field_data[1], 'doc': text} self.data['resheaders'].append(resheader) self.resheaders_set.add(field_data[1]) if resheader['name'] == 'Content-Type' and \ resheader['doc'] == 'application/octet-stream': self.data['return_type'] = 'octet stream' def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if (not text.startswith('**') and text not in self.data['description']): self.data['description'] += '\n\n' if self.data['description'] else '' # noqa self.data['description'] += text # http methods elif text.startswith('**Allowed HTTP Methods:**'): text = text.replace('**Allowed HTTP Methods:**', '') http_methods = text.strip().split(',') http_methods = [m[m.find('`')+1:-1].upper() for m in http_methods] self.data['urls'].append({'rule': self.urls[self.url_idx], 'methods': http_methods}) self.url_idx += 1 def visit_literal_block(self, node): """ Visit literal blocks """ text = node.astext() # literal block in endpoint description if not self.field_list_visited: self.data['description'] += \ ':\n\n%s\n' % textwrap.indent(text, '\t') # extract example url if ':swh_web_api:' in text: self.data['examples'].append( '/api/1/' + re.sub('.*`(.*)`.*', r'\1', text)) def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: self.data['description'] += '\n\n' for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.data['description'] += '\t* %s\n' % line_text def visit_warning(self, node): text = self.process_paragraph(str(node)) rst_warning = '\n\n.. warning::\n%s\n' % textwrap.indent(text, '\t') if rst_warning not in self.data['description']: self.data['description'] += rst_warning def unknown_visit(self, node): pass def depart_document(self, node): """ End of parsing extra processing """ default_methods = ['GET', 'HEAD', 'OPTIONS'] # ensure urls info is present and set default http methods if not self.data['urls']: for url in self.urls: self.data['urls'].append({'rule': url, 'methods': default_methods}) def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split('\n') doc_lines_filtered = [] urls = [] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if '.. http' not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find('/'):] # emphasize url arguments for html rendering url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url) urls.append(url) # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst('\n'.join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, urls, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ -def api_doc(route, noargs=False, need_params=False, tags=[], - handle_response=False, api_version='1'): +def api_doc(route: str, noargs: bool = False, need_params: bool = False, + tags: List[str] = [], handle_response: bool = False, + api_version: str = '1'): """ - Decorate an API function to register it in the API doc route index - and create the corresponding DRF route. + Decorator for an API endpoint implementation used to generate a dedicated + view displaying its HTML documentation. + + The documentation will be generated from the endpoint docstring based on + sphinxcontrib-httpdomain format. Args: - route (str): documentation page's route - noargs (boolean): set to True if the route has no arguments, and its + route: documentation page's route + noargs: set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False - need_params (boolean): specify the route requires query parameters + need_params: specify the route requires query parameters otherwise errors will occur. It enables to avoid displaying the invalid response in its HTML documentation. Default to False. - tags (list): Further information on api endpoints. Two values are + tags: Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable - handle_response (boolean): indicate if the decorated function takes + handle_response: indicate if the decorated function takes care of creating the HTTP response or delegates that task to the apiresponse module - api_version (str): api version string - + api_version: api version string """ - urlpattern = '^' + api_version + route + '$' - tags = set(tags) + + tags_set = set(tags) # @api_doc() Decorator call def decorator(f): - - # If the route is not hidden, add it to the index - if 'hidden' not in tags: + # if the route is not hidden, add it to the index + if 'hidden' not in tags_set: doc_data = get_doc_data(f, route, noargs) doc_desc = doc_data['description'] first_dot_pos = doc_desc.find('.') - APIUrls.add_route(route, doc_desc[:first_dot_pos+1], - tags=tags) - - # If the decorated route has arguments, we create a specific - # documentation view - if not noargs: - - @api_view(['GET', 'HEAD']) - @wraps(f) - def doc_view(request): - doc_data = get_doc_data(f, route, noargs) - return make_api_response(request, None, doc_data) - - view_name = 'api-%s-%s' % \ - (api_version, route[1:-1].replace('/', '-')) - APIUrls.add_url_pattern(urlpattern, doc_view, view_name) + APIUrls.add_doc_route(route, doc_desc[:first_dot_pos+1], + noargs=noargs, api_version=api_version, + tags=tags_set) + + # create a dedicated view to display endpoint HTML doc + @api_view(['GET', 'HEAD']) + @wraps(f) + def doc_view(request): + doc_data = get_doc_data(f, route, noargs) + return make_api_response(request, None, doc_data) + + route_name = '%s-doc' % route[1:-1].replace('/', '-') + urlpattern = f'^{api_version}{route}doc/$' + + view_name = 'api-%s-%s' % (api_version, route_name) + APIUrls.add_url_pattern(urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = get_doc_data(f, route, noargs) try: response = f(request, **kwargs) except Exception as exc: sentry_sdk.capture_exception(exc) if request.accepted_media_type == 'text/html' and \ need_params and not request.query_params: response = None else: return error_response(request, exc, doc_data) if handle_response: return response else: return make_api_response(request, response, doc_data) return documented_view return decorator @functools.lru_cache(maxsize=32) def get_doc_data(f, route, noargs): """ Build documentation data for the decorated api endpoint function """ data = { 'description': '', 'response_data': None, 'urls': [], 'args': [], 'params': [], 'resheaders': [], 'reqheaders': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [], 'route': route, 'noargs': noargs } if not f.__doc__: raise APIDocException('apidoc: expected a docstring' ' for function %s' % (f.__name__,)) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if '.. http' not in f.__doc__: data['description'] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif 'SWH_WEB_DOC_BUILD' not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process returned object info for nicer html display returns_list = '' for ret in data['returns']: returns_list += '\t* **%s (%s)**: %s\n' %\ (ret['name'], ret['type'], ret['doc']) data['returns_list'] = returns_list return data DOC_COMMON_HEADERS = ''' :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request''' DOC_RESHEADER_LINK = ''' :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it ''' DEFAULT_SUBSTITUTIONS = { 'common_headers': DOC_COMMON_HEADERS, 'resheader_link': DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): f.__doc__ = f.__doc__.format(**{ **DEFAULT_SUBSTITUTIONS, **substitutions}) return f return decorator diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 5b69e9e9..773cda44 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,191 +1,196 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import traceback from django.utils.html import escape from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils from swh.web.common.exc import ( NotFoundExc, ForbiddenExc, BadInputExc, LargePayloadExc ) from swh.web.common.utils import shorten_path, gen_path_info from swh.web.config import get_config def compute_link_header(rv, options): """Add Link header in returned value results. Args: request: a DRF Request object rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if 'headers' not in rv: return {} rv_headers = rv['headers'] if 'link-next' in rv_headers: link_headers.append('<%s>; rel="next"' % rv_headers['link-next']) if 'link-prev' in rv_headers: link_headers.append('<%s>; rel="previous"' % rv_headers['link-prev']) if link_headers: link_header_str = ','.join(link_headers) headers = options.get('headers', {}) headers.update({ 'Link': link_header_str }) return headers return {} def filter_by_fields(request, data): """Extract a request parameter 'fields' if it exists to permit the filtering on the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get('fields') if fields: fields = set(fields.split(',')) data = utils.filter_field_keys(data, fields) return data def transform(rv): """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if 'results' in rv: return rv['results'] if 'headers' in rv: rv.pop('headers') return rv def make_api_response(request, data, doc_data={}, options={}): """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ if data: options['headers'] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} if 'headers' in options: doc_env['headers_data'] = options['headers'] headers = options['headers'] # get request status code doc_env['status_code'] = options.get('status', 200) response_args = {'status': doc_env['status_code'], 'headers': headers, 'content_type': request.accepted_media_type} # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering if request.accepted_media_type == 'text/html': if data: data = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) doc_env['response_data'] = data doc_env['heading'] = shorten_path(str(request.path)) + # generate breadcrumbs data if 'route' in doc_env: doc_env['endpoint_path'] = gen_path_info(doc_env['route']) + for i in range(len(doc_env['endpoint_path']) - 1): + doc_env['endpoint_path'][i]['path'] += '/doc/' + if not doc_env['noargs']: + doc_env['endpoint_path'][-1]['path'] += '/doc/' response_args['data'] = doc_env response_args['template_name'] = 'api/apidoc.html' # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response_args['data'] = data return Response(**response_args) def error_response(request, error, doc_data): """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 500 if isinstance(error, BadInputExc): error_code = 400 elif isinstance(error, NotFoundExc): error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 elif isinstance(error, LargePayloadExc): error_code = 413 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): error_code = 503 error_opts = {'status': error_code} error_data = { 'exception': error.__class__.__name__, 'reason': str(error), } if request.accepted_media_type == 'text/html': error_data['reason'] = escape(error_data['reason']) if get_config()['debug']: error_data['traceback'] = traceback.format_exc() return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 8694115f..6f8031f0 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,86 +1,91 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools from typing import Dict from rest_framework.decorators import api_view from swh.web.common.urlsindex import UrlsIndex from swh.web.common import throttling class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ _apidoc_routes = {} # type: Dict[str, Dict[str, str]] scope = 'api' @classmethod def get_app_endpoints(cls): return cls._apidoc_routes @classmethod - def add_route(cls, route, docstring, **kwargs): + def add_doc_route(cls, route, docstring, noargs=False, + api_version='1', **kwargs): """ Add a route to the self-documenting API reference """ - route_view_name = 'api-1-%s' % route[1:-1].replace('/', '-') + route_name = route[1:-1].replace('/', '-') + if not noargs: + route_name = '%s-doc' % route_name + route_view_name = 'api-%s-%s' % (api_version, route_name) if route not in cls._apidoc_routes: d = {'docstring': docstring, + 'route': '/api/%s%s' % (api_version, route), 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d def api_route(url_pattern=None, view_name=None, methods=['GET', 'HEAD', 'OPTIONS'], throttle_scope='swh_api', api_version='1', checksum_args=None): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ url_pattern = '^' + api_version + url_pattern + '$' def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: APIUrls.add_redirect_for_checksum_args(view_name, [url_pattern], checksum_args) return f return decorator diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py index bc9d8cf2..1de79301 100644 --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -1,107 +1,107 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.common import service, utils from swh.web.common.utils import ( resolve_swh_persistent_id, get_persistent_identifier ) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.exc import LargePayloadExc @api_route(r'/resolve/(?P.*)/', 'api-1-resolve-swh-pid') @api_doc('/resolve/') @format_docstring() def api_resolve_swh_pid(request, swh_id): """ .. http:get:: /api/1/resolve/(swh_id)/ Resolve a Software Heritage persistent identifier. Try to resolve a provided `persistent identifier `_ into an url for browsing the pointed archive object. If the provided identifier is valid, the existence of the object in the archive will also be checked. :param string swh_id: a Software Heritage persistent identifier :>json string browse_url: the url for browsing the pointed object :>json object metadata: object holding optional parts of the persistent identifier :>json string namespace: the persistent identifier namespace :>json string object_id: the hash identifier of the pointed object :>json string object_type: the type of the pointed object :>json number scheme_version: the scheme version of the persistent identifier {common_headers} **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid persistent identifier has been provided :statuscode 404: the pointed object does not exist in the archive **Example:** .. parsed-literal:: :swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/` """ # noqa # try to resolve the provided pid swh_id_resolved = resolve_swh_persistent_id(swh_id) # id is well-formed, now check that the pointed # object is present in the archive, NotFoundExc # will be raised otherwise swh_id_parsed = swh_id_resolved['swh_id_parsed'] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() swh_id_data['browse_url'] = request.build_absolute_uri( swh_id_resolved['browse_url']) return swh_id_data @api_route(r'/known/', 'api-1-swh-pid-known', methods=['POST']) -@api_doc('/known/', noargs=True, tags=['hidden']) +@api_doc('/known/', tags=['hidden']) @format_docstring() def api_swh_pid_known(request): """ .. http:post:: /api/1/known/ Check if a list of Software Heritage persistent identifier is present in the archive depending on their id (sha1_git). Returns: A dictionary with: keys(str): Persistent identifier values(dict): A dictionary containing the key 'known'. (true if the pid is present, False otherwise) """ limit = 1000 if len(request.data) > limit: raise LargePayloadExc('The maximum number of PIDs this endpoint can ' 'receive is %s' % limit) persistent_ids = [get_persistent_identifier(pid) for pid in request.data] response = {str(pid): {'known': False} for pid in persistent_ids} # group pids by their type pids_by_type = utils.group_swh_persistent_identifiers(persistent_ids) # search for hashes not present in the storage missing_hashes = service.lookup_missing_hashes(pids_by_type) for pid in persistent_ids: if pid.object_id not in missing_hashes: response[str(pid)]['known'] = True return response diff --git a/swh/web/templates/api/apidoc.html b/swh/web/templates/api/apidoc.html index b5a7ef35..7b2d25d0 100644 --- a/swh/web/templates/api/apidoc.html +++ b/swh/web/templates/api/apidoc.html @@ -1,183 +1,183 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2015-2019 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% block title %}{{ heading }} – Software Heritage API {% endblock %} {% block navbar-content %} {% endblock %} {% block content %}
{% if description %}

Description

{{ description | safe_docstring_display | safe }}
{% endif %} {% if response_data is not None %}

Request

{{ request.method }} {{ request.build_absolute_uri }}

Response

{% if status_code != 200 %}
Status Code
{{ status_code }}
{% endif %} {% if headers_data %}
Headers
{% for header_name, header_value in headers_data.items %}
{{ header_name }} {{ header_value | urlize_header_links | safe }}
{% endfor %} {% endif %}
Body
{{ response_data | urlize_links_and_mails | safe }}
{% endif %}
{% if urls and urls|length > 0 %}
{% for url in urls %} {% endfor %}
URL Allowed Methods
{{ url.rule | safe_docstring_display | safe }} {{ url.methods | dictsort:0 | join:', ' }}

{% endif %} {% if args and args|length > 0 %}

Arguments

{% for arg in args %}
{{ arg.name }} ({{ arg.type }})
{{ arg.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if params and params|length > 0 %}

Query parameters

{% for param in params %}
{{ param.name }} ({{ param.type }})
{{ param.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if reqheaders and reqheaders|length > 0 %}

Request headers

{% for header in reqheaders %}
{{ header.name }}
{{ header.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if resheaders and resheaders|length > 0 %}

Response headers

{% for header in resheaders %}
{{ header.name }}
{{ header.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if return_type %}

Returns

{{ return_type }}

{% if return_type == 'array' %} an array of objects containing the following keys: {% elif return_type == 'octet stream' %} the raw data as an octet stream {% else %} an object containing the following keys: {% endif %} {{ returns_list | safe_docstring_display | safe }}


{% endif %} {% if status_codes and status_codes|length > 0 %}

HTTP status codes

{% for status in status_codes %}
{{ status.code }}
{{ status.doc | safe_docstring_display | safe }}
{% endfor %}

{% endif %} {% if examples and examples|length > 0 %}

Examples

{% for example in examples %}
{{ example }}
{% endfor %}
{% endif %}
{% endblock %} diff --git a/swh/web/templates/api/endpoints.html b/swh/web/templates/api/endpoints.html index 1e4a49f2..71da9c62 100644 --- a/swh/web/templates/api/endpoints.html +++ b/swh/web/templates/api/endpoints.html @@ -1,82 +1,82 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2015-2019 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% block title %} Endpoints – Software Heritage API {% endblock %} {% block navbar-content %} {% endblock %} {% block content %}

Below you can find a list of the available endpoints for version 1 of the Software Heritage API. For a more general introduction please refer to the API overview.

Endpoints marked "available" are considered stable for the current version of the API; endpoints marked "upcoming" are work in progress that will be stabilized in the near future.

{% for route, doc in doc_routes %} {% if doc.tags|length > 0 %} {% else %} {% endif %} {% endfor %}
Endpoint Description
{% url doc.route_view_name %} - {% url doc.route_view_name %} + {{ doc.route }} {{ doc.doc_intro | safe_docstring_display | safe }}
{% endblock %} diff --git a/swh/web/tests/api/test_apidoc.py b/swh/web/tests/api/test_apidoc.py index 7dfc9b9d..5ead7dab 100644 --- a/swh/web/tests/api/test_apidoc.py +++ b/swh/web/tests/api/test_apidoc.py @@ -1,313 +1,335 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api.apidoc import api_doc, _parse_httpdomain_doc from swh.web.api.apiurls import api_route from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc +from swh.web.common.utils import reverse from swh.web.tests.django_asserts import assert_template_used httpdomain_doc = """ .. http:get:: /api/1/revision/(sha1_git)/ Get information about a revision in the archive. Revisions are identified by **sha1** checksums, compatible with Git commit identifiers. See :func:`swh.model.identifiers.revision_identifier` in our data model module for details about how they are computed. :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision :>json string committer_date: ISO representation of the commit date (in UTC) :>json string date: ISO representation of the revision date (in UTC) :>json string directory: the unique identifier that revision points to :>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]` to get information about the directory associated to the revision :>json string id: the revision unique identifier :>json boolean merge: whether or not the revision corresponds to a merge commit :>json string message: the message associated to the revision :>json array parents: the parents of the revision, i.e. the previous revisions that head directly to it, each entry of that array contains an unique parent revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head` :statuscode 200: no error :statuscode 400: an invalid **sha1_git** value has been provided :statuscode 404: requested revision can not be found in the archive **Request:** .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` """ exception_http_code = { BadInputExc: 400, ForbiddenExc: 403, NotFoundExc: 404, Exception: 500, StorageAPIError: 503, StorageDBError: 503, } def test_apidoc_nodoc_failure(): with pytest.raises(Exception): @api_doc('/my/nodoc/url/') def apidoc_nodoc_tester(request, arga=0, argb=0): return Response(arga + argb) @api_route(r'/some/(?P[0-9]+)/(?P[0-9]+)/', - 'some-doc-route') + 'api-1-some-doc-route') @api_doc('/some/doc/route/') def apidoc_route(request, myarg, myotherarg, akw=0): """ Sample doc """ return {'result': int(myarg) + int(myotherarg) + akw} -# remove deprecation warnings related to docutils -@pytest.mark.filterwarnings( - 'ignore:.*U.*mode is deprecated:DeprecationWarning') + def test_apidoc_route_doc(client): - rv = client.get('/api/1/some/doc/route/', HTTP_ACCEPT='text/html') + url = reverse('api-1-some-doc-route-doc') + rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') def test_apidoc_route_fn(api_client): - rv = api_client.get('/api/1/some/1/1/') - + url = reverse('api-1-some-doc-route', + url_args={'myarg': 1, 'myotherarg': 1}) + rv = api_client.get(url) assert rv.status_code == 200, rv.data -@api_route(r'/test/error/(?P.+)/', 'test-error') +@api_route(r'/test/error/(?P.+)/', 'api-1-test-error') @api_doc('/test/error/') def apidoc_test_error_route(request, exc_name): """ Sample doc """ for e in exception_http_code.keys(): if e.__name__ == exc_name: raise e('Error') def test_apidoc_error(api_client): for exc, code in exception_http_code.items(): - rv = api_client.get('/api/1/test/error/%s/' % exc.__name__) + url = reverse('api-1-test-error', + url_args={'exc_name': exc.__name__}) + rv = api_client.get(url) assert rv.status_code == code, rv.data @api_route(r'/some/full/(?P[0-9]+)/(?P[0-9]+)/', - 'some-complete-doc-route') + 'api-1-some-complete-doc-route') @api_doc('/some/complete/doc/route/') def apidoc_full_stack(request, myarg, myotherarg, akw=0): """ Sample doc """ return {'result': int(myarg) + int(myotherarg) + akw} -# remove deprecation warnings related to docutils -@pytest.mark.filterwarnings( - 'ignore:.*U.*mode is deprecated:DeprecationWarning') def test_apidoc_full_stack_doc(client): - rv = client.get('/api/1/some/complete/doc/route/', HTTP_ACCEPT='text/html') + url = reverse('api-1-some-complete-doc-route-doc') + rv = client.get(url, HTTP_ACCEPT='text/html') assert rv.status_code == 200, rv.content assert_template_used(rv, 'api/apidoc.html') def test_apidoc_full_stack_fn(api_client): - rv = api_client.get('/api/1/some/full/1/1/') + url = reverse('api-1-some-complete-doc-route', + url_args={'myarg': 1, 'myotherarg': 1}) + rv = api_client.get(url) assert rv.status_code == 200, rv.data +@api_route(r'/test/post/only/', 'api-1-test-post-only', + methods=['POST']) +@api_doc('/test/post/only/') +def apidoc_test_post_only(request, exc_name): + """ + Sample doc + """ + return {'result': 'some data'} + + +def test_apidoc_post_only(client): + # a dedicated view accepting GET requests should have + # been created to display the HTML documentation + url = reverse('api-1-test-post-only-doc') + rv = client.get(url, HTTP_ACCEPT='text/html') + assert rv.status_code == 200, rv.content + assert_template_used(rv, 'api/apidoc.html') + + def test_api_doc_parse_httpdomain(): doc_data = { 'description': '', 'urls': [], 'args': [], 'params': [], 'resheaders': [], 'reqheaders': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [] } _parse_httpdomain_doc(httpdomain_doc, doc_data) expected_urls = [{ 'rule': '/api/1/revision/ **\\(sha1_git\\)** /', 'methods': ['GET', 'HEAD'] }] assert 'urls' in doc_data assert doc_data['urls'] == expected_urls expected_description = ('Get information about a revision in the archive. ' 'Revisions are identified by **sha1** checksums, ' 'compatible with Git commit identifiers. See ' '**swh.model.identifiers.revision_identifier** in ' 'our data model module for details about how they ' 'are computed.') assert 'description' in doc_data assert doc_data['description'] == expected_description expected_args = [{ 'name': 'sha1_git', 'type': 'string', 'doc': ('hexadecimal representation of the revision ' '**sha1_git** identifier') }] assert 'args' in doc_data assert doc_data['args'] == expected_args expected_params = [] assert 'params' in doc_data assert doc_data['params'] == expected_params expected_reqheaders = [{ 'doc': ('the requested response content type, either ' '``application/json`` or ``application/yaml``'), 'name': 'Accept' }] assert 'reqheaders' in doc_data assert doc_data['reqheaders'] == expected_reqheaders expected_resheaders = [{ 'doc': 'this depends on **Accept** header of request', 'name': 'Content-Type' }] assert 'resheaders' in doc_data assert doc_data['resheaders'] == expected_resheaders expected_statuscodes = [ { 'code': '200', 'doc': 'no error' }, { 'code': '400', 'doc': 'an invalid **sha1_git** value has been provided' }, { 'code': '404', 'doc': 'requested revision can not be found in the archive' } ] assert 'status_codes' in doc_data assert doc_data['status_codes'] == expected_statuscodes expected_return_type = 'object' assert 'return_type' in doc_data assert doc_data['return_type'] in expected_return_type expected_returns = [ { 'name': 'author', 'type': 'object', 'doc': 'information about the author of the revision' }, { 'name': 'committer', 'type': 'object', 'doc': 'information about the committer of the revision' }, { 'name': 'committer_date', 'type': 'string', 'doc': 'ISO representation of the commit date (in UTC)' }, { 'name': 'date', 'type': 'string', 'doc': 'ISO representation of the revision date (in UTC)' }, { 'name': 'directory', 'type': 'string', 'doc': 'the unique identifier that revision points to' }, { 'name': 'directory_url', 'type': 'string', 'doc': ('link to ``_ to get information about ' 'the directory associated to the revision') }, { 'name': 'id', 'type': 'string', 'doc': 'the revision unique identifier' }, { 'name': 'merge', 'type': 'boolean', 'doc': 'whether or not the revision corresponds to a merge commit' }, { 'name': 'message', 'type': 'string', 'doc': 'the message associated to the revision' }, { 'name': 'parents', 'type': 'array', 'doc': ('the parents of the revision, i.e. the previous revisions ' 'that head directly to it, each entry of that array ' 'contains an unique parent revision identifier but also a ' 'link to ``_ to get more information ' 'about it') }, { 'name': 'type', 'type': 'string', 'doc': 'the type of the revision' } ] assert 'returns' in doc_data assert doc_data['returns'] == expected_returns expected_examples = [ '/api/1/revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/' ] assert 'examples' in doc_data assert doc_data['examples'] == expected_examples