diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 9c7b471b8..b3ed9e282 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,173 +1,172 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils from swh.web.api.exc import NotFoundExc, ForbiddenExc def compute_link_header(rv, options): """Add Link header in returned value results. - Expects rv to be a dict with 'results' and 'headers' key: - 'results': the returned value expected to be shown - 'headers': dictionary with link-next and link-prev - Args: - rv (dict): with keys: - - 'headers': potential headers with 'link-next' - and 'link-prev' keys - - 'results': containing the result to return + rv (dict): dictionary with keys: + + - headers: potential headers with 'link-next' and 'link-prev' + keys + - results: containing the result to return + options (dict): the initial dict to update with result if any Returns: - Dict with optional keys 'link-next' and 'link-prev'. + dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if 'headers' not in rv: return {} rv_headers = rv['headers'] if 'link-next' in rv_headers: link_headers.append('<%s>; rel="next"' % ( rv_headers['link-next'])) if 'link-prev' in rv_headers: link_headers.append('<%s>; rel="previous"' % ( rv_headers['link-prev'])) if link_headers: link_header_str = ','.join(link_headers) headers = options.get('headers', {}) headers.update({ 'Link': link_header_str }) return headers return {} def filter_by_fields(request, data): - """Extract a request parameter 'fields' if it exists to permit the - filtering on the data dict's keys. + """Extract a request parameter 'fields' if it exists to permit the filtering on + he data dict's keys. - If such field is not provided, returns the data as is. + If such field is not provided, returns the data as is. """ fields = utils.get_query_params(request).get('fields') if fields: fields = set(fields.split(',')) data = utils.filter_field_keys(data, fields) return data def transform(rv): """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if 'results' in rv: return rv['results'] if 'headers' in rv: rv.pop('headers') return rv def make_api_response(request, data, doc_data={}, options={}): """Generates an API response based on the requested mimetype. - Args: - request: a DRF Request object - data: raw data to return in the API response - doc_data: documentation data for HTML response - options: optionnal data that can be used to generate the response + Args: + request: a DRF Request object + data: raw data to return in the API response + doc_data: documentation data for HTML response + options: optionnal data that can be used to generate the response - Returns: - a DRF Response a object + Returns: + a DRF Response a object """ if data: options['headers'] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} if 'headers' in options: doc_env['headers_data'] = options['headers'] headers = options['headers'] # get request status code doc_env['status_code'] = options.get('status', 200) response_args = {'status': doc_env['status_code'], 'headers': headers, 'content_type': request.accepted_media_type} # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering if request.accepted_media_type == 'text/html': if data: data = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) doc_env['response_data'] = data doc_env['request'] = request doc_env['heading'] = utils.shorten_path(str(request.path)) response_args['data'] = doc_env response_args['template_name'] = 'apidoc.html' # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response_args['data'] = data return Response(**response_args) def error_response(request, error, doc_data): """Private function to create a custom error response. - Args: - request: a DRF Request object - error: the exception that caused the error - doc_data: documentation data for HTML response + Args: + request: a DRF Request object + error: the exception that caused the error + doc_data: documentation data for HTML response + """ error_code = 400 if isinstance(error, NotFoundExc): error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): error_code = 503 error_opts = {'status': error_code} error_data = { 'exception': error.__class__.__name__, 'reason': str(error), } return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index f93cc9279..da7bbe98d 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,125 +1,129 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re from django.conf.urls import url from rest_framework.decorators import api_view class APIUrls(object): """ Class to manage API documentation URLs. - * Indexes all routes documented using apidoc's decorators. - * Tracks endpoint/request processing method relationships for use - in generating related urls in API documentation + + - Indexes all routes documented using apidoc's decorators. + - Tracks endpoint/request processing method relationships for use in + generating related urls in API documentation + """ apidoc_routes = {} method_endpoints = {} urlpatterns = [] @classmethod def get_app_endpoints(cls): return cls.apidoc_routes @classmethod def get_method_endpoints(cls, f): if f.__name__ not in cls.method_endpoints: cls.method_endpoints[f.__name__] = cls.group_routes_by_method(f) return cls.method_endpoints[f.__name__] @classmethod def group_routes_by_method(cls, f): """ Group URL endpoints according to their processing method. + Returns: - A dict where keys are the processing method names, and values - are the routes that are bound to the key method. + A dict where keys are the processing method names, and values are + the routes that are bound to the key method. + """ rules = [] for urlp in cls.urlpatterns: endpoint = urlp.callback.__name__ if endpoint != f.__name__: continue method_names = urlp.callback.http_method_names url_rule = urlp.regex.pattern.replace('^', '/').replace('$', '') url_rule_params = re.findall('\([^)]+\)', url_rule) for param in url_rule_params: param_name = re.findall('<(.*)>', param) param_name = param_name[0] if len(param_name) > 0 else None if param_name and hasattr(f, 'doc_data'): param_index = \ next(i for (i, d) in enumerate(f.doc_data['args']) if d['name'] == param_name) if param_index is not None: url_rule = url_rule.replace( param, '<' + f.doc_data['args'][param_index]['name'] + ': ' + f.doc_data['args'][param_index]['type'] + '>') rule_dict = {'rule': '/api' + url_rule, 'name': urlp.name, 'methods': {method.upper() for method in method_names} } rules.append(rule_dict) return rules @classmethod def index_add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = route[1:-1].replace('/', '-') if route not in cls.apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls.apidoc_routes[route] = d @classmethod def index_add_url_pattern(cls, url_pattern, view, view_name): cls.urlpatterns.append(url(url_pattern, view, name=view_name)) @classmethod def get_url_patterns(cls): return cls.urlpatterns class api_route(object): # noqa: N801 """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route - view_name: the name of the API view associated to the route - used to reverse the url + view_name: the name of the API view associated to the route used to + reverse the url methods: array of HTTP methods supported by the API route """ def __init__(self, url_pattern=None, view_name=None, methods=['GET', 'HEAD'], api_version='1'): super().__init__() self.url_pattern = '^' + api_version + url_pattern + '$' self.view_name = view_name self.methods = methods def __call__(self, f): # create a DRF view from the wrapped function @api_view(self.methods) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = self.methods # register the route and its view in the endpoints index APIUrls.index_add_url_pattern(self.url_pattern, api_view_f, self.view_name) return f diff --git a/swh/web/api/converters.py b/swh/web/api/converters.py index 5bf181818..7b494b395 100644 --- a/swh/web/api/converters.py +++ b/swh/web/api/converters.py @@ -1,312 +1,325 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import json from swh.model import hashutil from swh.core.utils import decode_with_escape from swh.web.api import utils def from_swh(dict_swh, hashess={}, bytess={}, dates={}, blacklist={}, removables_if_empty={}, empty_dict={}, empty_list={}, convert={}, convert_fn=lambda x: x): """Convert from an swh dictionary to something reasonably json serializable. Args: - - dict_swh: the origin dictionary needed to be transformed - - hashess: list/set of keys representing hashes values (sha1, sha256, - sha1_git, etc...) as bytes. Those need to be transformed in hexadecimal - string - - bytess: list/set of keys representing bytes values which needs to - be decoded - - blacklist: set of keys to filter out from the conversion - - convert: set of keys whose associated values need to be converted - using convert_fn - - convert_fn: the conversion function to apply on the value of key - in 'convert' - - The remaining keys are copied as is in the output. + dict_swh: the origin dictionary needed to be transformed + hashess: list/set of keys representing hashes values (sha1, sha256, + sha1_git, etc...) as bytes. Those need to be transformed in + hexadecimal string + bytess: list/set of keys representing bytes values which needs to be + decoded + blacklist: set of keys to filter out from the conversion + convert: set of keys whose associated values need to be converted using + convert_fn + convert_fn: the conversion function to apply on the value of key in + 'convert' + + The remaining keys are copied as is in the output. Returns: - dictionary equivalent as dict_swh only with its keys `converted`. + dictionary equivalent as dict_swh only with its keys converted. """ def convert_hashes_bytes(v): """v is supposedly a hash as bytes, returns it converted in hex. """ if isinstance(v, bytes): return hashutil.hash_to_hex(v) return v def convert_bytes(v): """v is supposedly a bytes string, decode as utf-8. FIXME: Improve decoding policy. If not utf-8, break! """ if isinstance(v, bytes): return v.decode('utf-8') return v def convert_date(v): - """v is either: - - a dict with three keys: - - timestamp (dict or integer timestamp) - - offset - - negative_utc - - a datetime + """ + Args: + v (dict or datatime): either: + + - a dict with three keys: + + - timestamp (dict or integer timestamp) + - offset + - negative_utc + + - or, a datetime We convert it to a human-readable string """ if isinstance(v, datetime.datetime): return v.isoformat() tz = datetime.timezone(datetime.timedelta(minutes=v['offset'])) swh_timestamp = v['timestamp'] if isinstance(swh_timestamp, dict): date = datetime.datetime.fromtimestamp( swh_timestamp['seconds'], tz=tz) else: date = datetime.datetime.fromtimestamp( swh_timestamp, tz=tz) datestr = date.isoformat() if v['offset'] == 0 and v['negative_utc']: # remove the rightmost + and replace it with a - return '-'.join(datestr.rsplit('+', 1)) return datestr if not dict_swh: return dict_swh new_dict = {} for key, value in dict_swh.items(): if key in blacklist or (key in removables_if_empty and not value): continue if key in dates: new_dict[key] = convert_date(value) elif key in convert: new_dict[key] = convert_fn(value) elif isinstance(value, dict): new_dict[key] = from_swh(value, hashess=hashess, bytess=bytess, dates=dates, blacklist=blacklist, removables_if_empty=removables_if_empty, empty_dict=empty_dict, empty_list=empty_list, convert=convert, convert_fn=convert_fn) elif key in hashess: new_dict[key] = utils.fmap(convert_hashes_bytes, value) elif key in bytess: try: new_dict[key] = utils.fmap(convert_bytes, value) except UnicodeDecodeError: if 'decoding_failures' not in new_dict: new_dict['decoding_failures'] = [key] else: new_dict['decoding_failures'].append(key) new_dict[key] = utils.fmap(decode_with_escape, value) elif key in empty_dict and not value: new_dict[key] = {} elif key in empty_list and not value: new_dict[key] = [] else: new_dict[key] = value return new_dict def from_provenance(provenance): """Convert from a provenance information to a provenance dictionary. Args: - provenance: Dictionary with the following keys: - content (sha1_git) : the content's identifier - revision (sha1_git) : the revision the content was seen - origin (int) : the origin the content was seen - visit (int) : the visit it occurred - path (bytes) : the path the content was seen at + provenance (dict): Dictionary with the following keys: + - content (sha1_git): the content's identifier + - revision (sha1_git): the revision the content was seen + - origin (int): the origin the content was seen + - visit (int): the visit it occurred + - path (bytes): the path the content was seen at + """ return from_swh(provenance, hashess={'content', 'revision'}, bytess={'path'}) def from_origin(origin): """Convert from an SWH origin to an origin dictionary. """ return from_swh(origin, removables_if_empty={'lister', 'project'}) def from_release(release): """Convert from an SWH release to a json serializable release dictionary. Args: - release: Dict with the following keys - - id: identifier of the revision (sha1 in bytes) - - revision: identifier of the revision the release points to (sha1 in - bytes) - - comment: release's comment message (bytes) - - name: release's name (string) - - author: release's author identifier (swh's id) - - synthetic: the synthetic property (boolean) + release (dict): dictionary with keys: + + - id: identifier of the revision (sha1 in bytes) + - revision: identifier of the revision the release points to (sha1 + in bytes) + + comment: release's comment message (bytes) + name: release's name (string) + author: release's author identifier (swh's id) + synthetic: the synthetic property (boolean) Returns: - Release dictionary with the following keys: + dict: Release dictionary with the following keys: + - id: hexadecimal sha1 (string) - revision: hexadecimal sha1 (string) - comment: release's comment message (string) - name: release's name (string) - author: release's author identifier (swh's id) - synthetic: the synthetic property (boolean) """ return from_swh( release, hashess={'id', 'target'}, bytess={'message', 'name', 'fullname', 'email'}, dates={'date'}, ) class SWHMetadataEncoder(json.JSONEncoder): """Special json encoder for metadata field which can contain bytes encoded value. """ def default(self, obj): if isinstance(obj, bytes): return obj.decode('utf-8') # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj) def convert_revision_metadata(metadata): """Convert json specific dict to a json serializable one. """ if not metadata: return {} return json.loads(json.dumps(metadata, cls=SWHMetadataEncoder)) def from_revision(revision): """Convert from an SWH revision to a json serializable revision dictionary. Args: - revision: Dict with the following keys - - id: identifier of the revision (sha1 in bytes) - - directory: identifier of the directory the revision points to (sha1 - in bytes) - - author_name, author_email: author's revision name and email - - committer_name, committer_email: committer's revision name and email - - message: revision's message - - date, date_offset: revision's author date - - committer_date, committer_date_offset: revision's commit date - - parents: list of parents for such revision - - synthetic: revision's property nature - - type: revision's type (git, tar or dsc at the moment) - - metadata: if the revision is synthetic, this can reference dynamic - properties. + revision (dict): dict with keys: + + - id: identifier of the revision (sha1 in bytes) + - directory: identifier of the directory the revision points to + (sha1 in bytes) + - author_name, author_email: author's revision name and email + - committer_name, committer_email: committer's revision name and + email + - message: revision's message + - date, date_offset: revision's author date + - committer_date, committer_date_offset: revision's commit date + - parents: list of parents for such revision + - synthetic: revision's property nature + - type: revision's type (git, tar or dsc at the moment) + - metadata: if the revision is synthetic, this can reference + dynamic properties. Returns: - Revision dictionary with the same keys as inputs, only: + dict: Revision dictionary with the same keys as inputs, except: + - sha1s are in hexadecimal strings (id, directory) - bytes are decoded in string (author_name, committer_name, - author_email, committer_email) - - remaining keys are left as is + author_email, committer_email) + + Remaining keys are left as is """ revision = from_swh(revision, hashess={'id', 'directory', 'parents', 'children'}, bytess={'name', 'fullname', 'email'}, convert={'metadata'}, convert_fn=convert_revision_metadata, dates={'date', 'committer_date'}) if revision: if 'parents' in revision: revision['merge'] = len(revision['parents']) > 1 if 'message' in revision: try: revision['message'] = revision['message'].decode('utf-8') except UnicodeDecodeError: revision['message_decoding_failed'] = True revision['message'] = None return revision def from_content(content): """Convert swh content to serializable content dictionary. """ return from_swh(content, hashess={'sha1', 'sha1_git', 'sha256', 'blake2s256'}, blacklist={'ctime'}, convert={'status'}, convert_fn=lambda v: 'absent' if v == 'hidden' else v) def from_person(person): """Convert swh person to serializable person dictionary. """ return from_swh(person, bytess={'name', 'fullname', 'email'}) def from_origin_visit(visit): """Convert swh origin_visit to serializable origin_visit dictionary. """ ov = from_swh(visit, hashess={'target'}, bytess={'branch'}, dates={'date'}, empty_dict={'metadata'}) if ov and 'occurrences' in ov: ov['occurrences'] = { decode_with_escape(k): v for k, v in ov['occurrences'].items() } return ov def from_directory_entry(dir_entry): """Convert swh person to serializable person dictionary. """ return from_swh(dir_entry, hashess={'dir_id', 'sha1_git', 'sha1', 'sha256', 'target'}, bytess={'name'}, removables_if_empty={ 'sha1', 'sha1_git', 'sha256', 'status'}, convert={'status'}, convert_fn=lambda v: 'absent' if v == 'hidden' else v) def from_filetype(content_entry): """Convert swh person to serializable person dictionary. """ return from_swh(content_entry, hashess={'id'}, bytess={'mimetype', 'encoding'}) diff --git a/swh/web/urls.py b/swh/web/urls.py index 7e34b4286..499e9c628 100644 --- a/swh/web/urls.py +++ b/swh/web/urls.py @@ -1,28 +1,36 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """swhweb URL Configuration -The `urlpatterns` list routes URLs to views. For more information please see: - https://docs.djangoproject.com/en/1.11/topics/http/urls/ +The :data:`urlpatterns` list routes URLs to views. For more information please + see: https://docs.djangoproject.com/en/1.11/topics/http/urls/ + Examples: -Function views - 1. Add an import: from my_app import views - 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') -Class-based views - 1. Add an import: from other_app.views import Home - 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') -Including another URLconf - 1. Import the include() function: from django.conf.urls import url, include - 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) + +- Function views: + + 1. Add an import: ``from my_app import views`` + 2. Add a URL to urlpatterns: ``url(r'^$', views.home, name='home')`` + +- Class-based views: + + 1. Add an import: ``from other_app.views import Home`` + 2. Add a URL to urlpatterns: ``url(r'^$', Home.as_view(), name='home')`` + +- Including another URLconf: + + 1. Import the include function: ``from django.conf.urls import url, include`` + 2. Add a URL to urlpatterns: ``url(r'^blog/', include('blog.urls'))`` + """ from django.conf.urls import url, include from django.contrib.staticfiles.urls import staticfiles_urlpatterns urlpatterns = [ url(r'^api/', include('swh.web.api.urls')), ] urlpatterns += staticfiles_urlpatterns()