diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index 97c02bc7..9676a2b4 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,124 +1,126 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re from rest_framework.decorators import api_view from swh.web.common.urlsindex import UrlsIndex from swh.web.common.throttling import throttle_scope class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ apidoc_routes = {} method_endpoints = {} scope = 'api' @classmethod def get_app_endpoints(cls): return cls.apidoc_routes @classmethod def get_method_endpoints(cls, f): if f.__name__ not in cls.method_endpoints: cls.method_endpoints[f.__name__] = cls.group_routes_by_method(f) return cls.method_endpoints[f.__name__] @classmethod def group_routes_by_method(cls, f): """ Group URL endpoints according to their processing method. Returns: A dict where keys are the processing method names, and values are the routes that are bound to the key method. """ rules = [] for urlp in cls.get_url_patterns(): endpoint = urlp.callback.__name__ if endpoint != f.__name__: continue method_names = urlp.callback.http_method_names url_rule = urlp.regex.pattern.replace('^', '/').replace('$', '') url_rule_params = re.findall('\([^)]+\)', url_rule) for param in url_rule_params: param_name = re.findall('<(.*)>', param) param_name = param_name[0] if len(param_name) > 0 else None if param_name and hasattr(f, 'doc_data') and f.doc_data['args']: # noqa param_index = \ next(i for (i, d) in enumerate(f.doc_data['args']) if d['name'] == param_name) if param_index is not None: url_rule = url_rule.replace( param, '<' + f.doc_data['args'][param_index]['name'] + ': ' + f.doc_data['args'][param_index]['type'] + '>').replace('.*', '') rule_dict = {'rule': '/api' + url_rule, 'name': urlp.name, 'methods': {method.upper() for method in method_names} } rules.append(rule_dict) return rules @classmethod def add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = route[1:-1].replace('/', '-') if route not in cls.apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls.apidoc_routes[route] = d class api_route(object): # noqa: N801 """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ def __init__(self, url_pattern=None, view_name=None, - methods=['GET', 'HEAD', 'OPTIONS'], api_version='1'): + methods=['GET', 'HEAD', 'OPTIONS'], + throttle_scope='swh_api', + api_version='1'): super().__init__() self.url_pattern = '^' + api_version + url_pattern + '$' self.view_name = view_name self.methods = methods + self.throttle_scope = throttle_scope def __call__(self, f): - # create a DRF view from the wrapped function @api_view(self.methods) - @throttle_scope('swh_api') + @throttle_scope(self.throttle_scope) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = self.methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(self.url_pattern, api_view_f, self.view_name) return f diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py index 6a102ea8..43927e1b 100644 --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -1,148 +1,150 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.views.decorators.cache import never_cache from swh.model import hashutil from swh.web.common import service, query from swh.web.common.utils import reverse from swh.web.api import apidoc as api_doc from swh.web.api.apiurls import api_route from swh.web.api.views.utils import ( api_lookup, doc_exc_id_not_found, doc_exc_bad_id, ) # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) object_name = obj_type.split('_')[0].title() if request.method == 'GET': return api_lookup( service.vault_progress, obj_type, obj_id, notfound_msg=("{} '{}' was never requested." .format(object_name, hex_id))) elif request.method == 'POST': email = request.POST.get('email', request.GET.get('email', None)) return api_lookup( service.vault_cook, obj_type, obj_id, email, notfound_msg=("{} '{}' not found." .format(object_name, hex_id))) @api_route('/vault/directory/(?P[a-fA-F0-9]+)/', - 'vault-cook-directory', methods=['GET', 'POST']) + 'vault-cook-directory', methods=['GET', 'POST'], + throttle_scope='swh_vault_cooking') @never_cache @api_doc.route('/vault/directory/', tags=['hidden']) @api_doc.arg('dir_id', default='d4a96ba891017d0d26c15e509b4e6515e40d75ee', argtype=api_doc.argtypes.sha1_git, argdoc="The directory's sha1 identifier") @api_doc.param('email', default=None, argtype=api_doc.argtypes.int, doc="e-mail to notify when the bundle is ready") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc=('dictionary mapping containing the status of ' 'the cooking')) def api_vault_cook_directory(request, dir_id): """Requests an archive of the directoy identified by dir_id. To import the directory in the current directory, use:: $ tar xvf path/to/directory.tar.gz """ _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'directory', obj_id) res['fetch_url'] = reverse('vault-fetch-directory', kwargs={'dir_id': dir_id}) return res @api_route(r'/vault/directory/(?P[a-fA-F0-9]+)/raw/', 'vault-fetch-directory') @api_doc.route('/vault/directory/raw/', tags=['hidden'], handle_response=True) @api_doc.arg('dir_id', default='d4a96ba891017d0d26c15e509b4e6515e40d75ee', argtype=api_doc.argtypes.sha1_git, argdoc="The directory's sha1 identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc='the cooked directory tarball') def api_vault_fetch_directory(request, dir_id): """Fetch the archive of the directoy identified by dir_id.""" _, obj_id = query.parse_hash_with_algorithms_or_throws( dir_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'directory', obj_id, notfound_msg="Directory with ID '{}' not found.".format(dir_id)) fname = '{}.tar.gz'.format(dir_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response @api_route(r'/vault/revision/(?P[a-fA-F0-9]+)/gitfast/', - 'vault-cook-revision_gitfast', methods=['GET', 'POST']) + 'vault-cook-revision_gitfast', methods=['GET', 'POST'], + throttle_scope='swh_vault_cooking') @never_cache @api_doc.route('/vault/revision/gitfast/', tags=['hidden']) @api_doc.arg('rev_id', default='9174026cfe69d73ef80b27890615f8b2ef5c265a', argtype=api_doc.argtypes.sha1_git, argdoc="The revision's sha1_git identifier") @api_doc.param('email', default=None, argtype=api_doc.argtypes.int, doc="e-mail to notify when the bundle is ready") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.dict, retdoc='dictionary mapping containing the status of ' 'the cooking') def api_vault_cook_revision_gitfast(request, rev_id): """Requests an archive of the revision identified by rev_id. To import the revision in the current directory, use:: $ git init $ zcat path/to/revision.gitfast.gz | git fast-import $ git checkout HEAD """ _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = _dispatch_cook_progress(request, 'revision_gitfast', obj_id) res['fetch_url'] = reverse('vault-fetch-revision_gitfast', kwargs={'rev_id': rev_id}) return res @api_route('/vault/revision/(?P[a-fA-F0-9]+)/gitfast/raw/', 'vault-fetch-revision_gitfast') @api_doc.route('/vault/revision/gitfast/raw/', tags=['hidden'], handle_response=True) @api_doc.arg('rev_id', default='9174026cfe69d73ef80b27890615f8b2ef5c265a', argtype=api_doc.argtypes.sha1_git, argdoc="The revision's sha1_git identifier") @api_doc.raises(exc=api_doc.excs.badinput, doc=doc_exc_bad_id) @api_doc.raises(exc=api_doc.excs.notfound, doc=doc_exc_id_not_found) @api_doc.returns(rettype=api_doc.rettypes.octet_stream, retdoc='the cooked revision git fast-export') def api_vault_fetch_revision_gitfast(request, rev_id): """Fetch the archive of the revision identified by rev_id.""" _, obj_id = query.parse_hash_with_algorithms_or_throws( rev_id, ['sha1'], 'Only sha1_git is supported.') res = api_lookup( service.vault_fetch, 'revision_gitfast', obj_id, notfound_msg="Revision with ID '{}' not found.".format(rev_id)) fname = '{}.gitfast.gz'.format(rev_id) response = HttpResponse(res, content_type='application/gzip') response['Content-disposition'] = 'attachment; filename={}'.format(fname) return response diff --git a/swh/web/config.py b/swh/web/config.py index 518beaa5..39fe52c7 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,82 +1,91 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.core import config from swh.storage import get_storage from swh.indexer.storage import get_indexer_storage from swh.vault.api.client import RemoteVaultClient DEFAULT_CONFIG = { 'allowed_hosts': ('list', []), 'storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5002/', }, }), 'indexer_storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5007/', } }), 'vault': ('string', 'http://127.0.0.1:5005/'), 'log_dir': ('string', '/tmp/swh/log'), 'debug': ('bool', False), 'host': ('string', '127.0.0.1'), 'port': ('int', 5004), 'secret_key': ('string', 'development key'), 'throttling': ('dict', { 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None 'scopes': { 'swh_api': { - 'limiter_rate': '120/h', + 'limiter_rate': { + 'default': '120/h' + }, + 'exempted_networks': ['127.0.0.0/8'] + }, + 'swh_vault_cooking': { + 'limiter_rate': { + 'default': '120/h', + 'GET': '60/m' + }, 'exempted_networks': ['127.0.0.0/8'] } } }) } swhweb_config = {} def get_config(config_file='webapp/webapp'): """Read the configuration file `config_file`, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration.""" if not swhweb_config: cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, 'log_dir') swhweb_config['storage'] = get_storage(**swhweb_config['storage']) swhweb_config['vault'] = RemoteVaultClient(swhweb_config['vault']) swhweb_config['indexer_storage'] = get_indexer_storage( **swhweb_config['indexer_storage']) return swhweb_config def storage(): """Return the current application's SWH storage. """ return get_config()['storage'] def vault(): """Return the current application's SWH vault. """ return get_config()['vault'] def indexer_storage(): """Return the current application's SWH indexer storage. """ return get_config()['indexer_storage'] diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py index 18dc8c38..8bab5262 100644 --- a/swh/web/settings/tests.py +++ b/swh/web/settings/tests.py @@ -1,56 +1,63 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from swh.web.config import get_config scope1_limiter_rate = 3 scope1_limiter_rate_post = 1 scope2_limiter_rate = 5 scope2_limiter_rate_post = 2 scope3_limiter_rate = 1 scope3_limiter_rate_post = 1 swh_web_config = get_config() swh_web_config.update({ 'debug': True, 'secret_key': 'test', 'throttling': { 'cache_uri': None, 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '60/min' }, 'exempted_networks': ['127.0.0.0/8'] }, + 'swh_vault_cooking': { + 'limiter_rate': { + 'default': '120/h', + 'GET': '60/m' + }, + 'exempted_networks': ['127.0.0.0/8'] + }, 'scope1': { 'limiter_rate': { 'default': '%s/min' % scope1_limiter_rate, 'POST': '%s/min' % scope1_limiter_rate_post, } }, 'scope2': { 'limiter_rate': { 'default': '%s/min' % scope2_limiter_rate, 'POST': '%s/min' % scope2_limiter_rate_post } }, 'scope3': { 'limiter_rate': { 'default': '%s/min' % scope3_limiter_rate, 'POST': '%s/min' % scope3_limiter_rate_post }, 'exempted_networks': ['127.0.0.0/8'] } } } }) from .common import * ALLOWED_HOSTS += ['testserver'] # noqa \ No newline at end of file