diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -13,7 +13,10 @@ from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils -from swh.web.common.exc import NotFoundExc, ForbiddenExc, BadInputExc +from swh.web.common.exc import ( + NotFoundExc, ForbiddenExc, + BadInputExc, LargePayloadExc +) from swh.web.common.utils import shorten_path, gen_path_info from swh.web.config import get_config @@ -165,6 +168,8 @@ error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 + elif isinstance(error, LargePayloadExc): + error_code = 413 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -10,6 +10,7 @@ ) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route +from swh.web.common.exc import LargePayloadExc @api_route(r'/resolve/(?P.*)/', @@ -84,6 +85,11 @@ the pid is present, False otherwise) """ + limit = 1000 + if len(request.data) > limit: + raise LargePayloadExc('The maximum number of PIDs this endpoint can ' + 'receive is %s' % limit) + persistent_ids = [get_persistent_identifier(pid) for pid in request.data] diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -44,11 +44,21 @@ pass +class LargePayloadExc(Exception): + """The input size is too large. + + Example: Asking to resolve 10000 persistent identifier when the limit + is 1000. + """ + pass + + http_status_code_message = { 400: 'Bad Request', 401: 'Unauthorized', 403: 'Access Denied', 404: 'Resource not found', + 413: 'Payload Too Large', 500: 'Internal Server Error', 501: 'Not Implemented', 502: 'Bad Gateway', diff --git a/swh/web/tests/api/views/test_identifiers.py b/swh/web/tests/api/views/test_identifiers.py --- a/swh/web/tests/api/views/test_identifiers.py +++ b/swh/web/tests/api/views/test_identifiers.py @@ -140,3 +140,22 @@ HTTP_ACCEPT='application/json') assert resp2.status_code == 400, resp.data + + +def test_api_known_raises_large_payload_error(api_client): + random_pid = 'swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13deb' + limit = 10000 + err_msg = 'The maximum number of PIDs this endpoint can receive is 1000' + + pids = [random_pid for i in range(limit)] + + url = reverse('api-1-swh-pid-known') + resp = api_client.post(url, data=pids, format='json', + HTTP_ACCEPT='application/json') + + assert resp.status_code == 413, resp.data + assert resp['Content-Type'] == 'application/json' + assert resp.data == { + 'exception': 'LargePayloadExc', + 'reason': err_msg + }