diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -13,7 +13,10 @@ from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils -from swh.web.common.exc import NotFoundExc, ForbiddenExc, BadInputExc +from swh.web.common.exc import ( + NotFoundExc, ForbiddenExc, + BadInputExc, LargePayloadExc +) from swh.web.common.utils import shorten_path, gen_path_info from swh.web.config import get_config @@ -165,6 +168,8 @@ error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 + elif isinstance(error, LargePayloadExc): + error_code = 413 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -10,6 +10,8 @@ ) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route +from swh.web.common.exc import LargePayloadExc +from swh.web.config import get_config @api_route(r'/resolve/(?P.*)/', @@ -84,6 +86,11 @@ the pid is present, False otherwise) """ + limit = get_config()['pids_list_max_size'] + if len(request.data) > limit: + raise LargePayloadExc('The maximum number of PIDs this endpoint can ' + 'receive is %s' % limit) + persistent_ids = [get_persistent_identifier(pid) for pid in request.data] diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -44,11 +44,21 @@ pass +class LargePayloadExc(Exception): + """The input size is too large. + + Example: Asking to resolve 10000 persistent identifier when the limit + is 1000. + """ + pass + + http_status_code_message = { 400: 'Bad Request', 401: 'Unauthorized', 403: 'Access Denied', 404: 'Resource not found', + 413: 'Payload Too Large', 500: 'Internal Server Error', 501: 'Not Implemented', 502: 'Bad Gateway', diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -47,6 +47,7 @@ # do not display code highlighting for content > 1MB 'content_display_max_size': ('int', 5 * 1024 * 1024), 'snapshot_content_max_size': ('int', 1000), + 'pids_list_max_size': ('int', 1000), 'throttling': ('dict', { 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None diff --git a/swh/web/tests/api/views/test_identifiers.py b/swh/web/tests/api/views/test_identifiers.py --- a/swh/web/tests/api/views/test_identifiers.py +++ b/swh/web/tests/api/views/test_identifiers.py @@ -16,6 +16,7 @@ unknown_content, unknown_directory, unknown_release, unknown_revision, unknown_snapshot ) +from swh.web.config import get_config @given(origin(), content(), directory(), release(), revision(), snapshot()) @@ -140,3 +141,23 @@ HTTP_ACCEPT='application/json') assert resp2.status_code == 400, resp.data + + +def test_api_known_raises_large_payload_error(api_client): + random_pid = 'swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13de;' + limit = get_config()['pids_list_max_size'] + err_msg = 'The maximum number of PIDs this endpoint can receive is %s' \ + % limit + + pids = [random_pid for i in range(limit+1)] + + url = reverse('api-1-swh-pid-known') + resp = api_client.post(url, data=pids, format='json', + HTTP_ACCEPT='application/json') + + assert resp.status_code == 413, resp.data + assert resp['Content-Type'] == 'application/json' + assert resp.data == { + 'exception': 'LargePayloadExc', + 'reason': err_msg + }