diff --git a/swh/lister/tests/test_throttling_retry.py b/swh/lister/tests/test_throttling_retry.py new file mode 100644 --- /dev/null +++ b/swh/lister/tests/test_throttling_retry.py @@ -0,0 +1,91 @@ +# Copyright (C) 2021 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest +import requests +from tenacity.wait import wait_fixed + +from swh.lister.throttling_retry import ( + MAX_NUMBER_ATTEMPTS, + WAIT_EXP_BASE, + throttling_retry, +) + +TEST_URL = "https://example.og/api/repositories" + + +@throttling_retry() +def make_request(): + response = requests.get(TEST_URL) + response.raise_for_status() + return response + + +def _assert_sleep_calls(mocker, mock_sleep, sleep_params): + try: + mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params]) + except AssertionError: + # tenacity < 5.1 has a different behavior for wait_exponential + # https://github.com/jd/tenacity/commit/aac4307a0aa30d7befd0ebe4212ee4fc69083a95 + mock_sleep.assert_has_calls( + [mocker.call(param * WAIT_EXP_BASE) for param in sleep_params] + ) + + +def test_throttling_retry(requests_mock, mocker): + data = {"result": {}} + requests_mock.get( + TEST_URL, + [ + {"status_code": 429}, + {"status_code": 429}, + {"status_code": 200, "json": data}, + ], + ) + + mock_sleep = mocker.patch.object(make_request.retry, "sleep") + + response = make_request() + + _assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE]) + + assert response.json() == data + + +def test_throttling_retry_max_attemps(requests_mock, mocker): + requests_mock.get( + TEST_URL, [{"status_code": 429}] * (MAX_NUMBER_ATTEMPTS), + ) + + mock_sleep = mocker.patch.object(make_request.retry, "sleep") + + with pytest.raises(requests.exceptions.HTTPError) as e: + make_request() + + assert e.value.response.status_code == 429 + + _assert_sleep_calls( + mocker, + mock_sleep, + [float(WAIT_EXP_BASE ** i) for i in range(MAX_NUMBER_ATTEMPTS - 1)], + ) + + +@throttling_retry(wait=wait_fixed(WAIT_EXP_BASE)) +def make_request_wait_fixed(): + response = requests.get(TEST_URL) + response.raise_for_status() + return response + + +def test_throttling_retry_wait_fixed(requests_mock, mocker): + requests_mock.get( + TEST_URL, [{"status_code": 429}, {"status_code": 429}, {"status_code": 200}] + ) + + mock_sleep = mocker.patch.object(make_request_wait_fixed.retry, "sleep") + + make_request_wait_fixed() + + _assert_sleep_calls(mocker, mock_sleep, [WAIT_EXP_BASE] * 2) diff --git a/swh/lister/throttling_retry.py b/swh/lister/throttling_retry.py new file mode 100644 --- /dev/null +++ b/swh/lister/throttling_retry.py @@ -0,0 +1,68 @@ +# Copyright (C) 2021 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information* + +from requests.exceptions import HTTPError +from requests.status_codes import codes +from tenacity import retry as tenacity_retry +from tenacity.stop import stop_after_attempt +from tenacity.wait import wait_exponential + + +def is_throttling_exception(e: Exception) -> bool: + """ + Checks if an exception is a requests.exception.HTTPError for + a response with status code 429 (too many requests). + """ + return ( + isinstance(e, HTTPError) and e.response.status_code == codes.too_many_requests + ) + + +def retry_attempt(retry_state): + """ + Utility function to get last retry attempt info based on the + tenacity version (as debian buster packages version 4.12). + """ + try: + attempt = retry_state.outcome + except AttributeError: + # tenacity < 5.0 + attempt = retry_state + return attempt + + +def retry_if_throttling(retry_state) -> bool: + """ + Custom tenacity retry predicate for handling HTTP responses with + status code 429 (too many requests). + """ + attempt = retry_attempt(retry_state) + if attempt.failed: + exception = attempt.exception() + return is_throttling_exception(exception) + return False + + +WAIT_EXP_BASE = 10 +MAX_NUMBER_ATTEMPTS = 5 + + +def throttling_retry( + retry=retry_if_throttling, + wait=wait_exponential(exp_base=WAIT_EXP_BASE), + stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS), + **retry_args, +): + """ + Decorator for retrying a function possibly raising requests.exception.HTTPError + for status code 429 (too many requests). + + The default wait strategy is based on exponential backoff. + + The default max number of attempts is set to 5, HTTPError exception + will then be reraised. + + All tenacity.retry parameters can also be overridden in client code. + """ + return tenacity_retry(retry=retry, wait=wait, stop=stop, reraise=True, **retry_args)