diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py --- a/swh/loader/package/tests/test_utils.py +++ b/swh/loader/package/tests/test_utils.py @@ -13,7 +13,6 @@ import pytest from requests.exceptions import HTTPError -from swh.loader.exception import NotFound import swh.loader.package from swh.loader.package.utils import api_info, download, release_name @@ -213,11 +212,11 @@ status_code = 400 requests_mock.get(url, status_code=status_code) - with pytest.raises(NotFound) as e0: + with pytest.raises( + HTTPError, match=f"{status_code} Client Error: None for url: {url}" + ): api_info(url) - assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code) - def test_api_info(requests_mock): """Fetching json info from pypi project should be ok""" @@ -271,3 +270,39 @@ with pytest.raises(HTTPError): _check_download_ok(url, dest=str(tmp_path)) + + +@pytest.fixture(autouse=True) +def mock_api_info_retry_sleep(mocker): + mocker.patch.object(api_info.retry, "sleep") + + +def test_api_info_retry(mocker, requests_mock, tmp_path): + url = "https://example.org/api/endpoint" + json_data = {"foo": "bar"} + + requests_mock.get( + url, + [ + {"status_code": 429}, + {"status_code": 429}, + { + "json": json_data, + "status_code": 200, + }, + ], + ) + + assert json.loads(api_info(url)) == json_data + + +def test_api_info_retry_reraise(mocker, requests_mock, tmp_path): + url = "https://example.org/api/endpoint" + + requests_mock.get( + url, + [{"status_code": 429}] * 5, + ) + + with pytest.raises(HTTPError, match=f"429 Client Error: None for url: {url}"): + api_info(url) diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py --- a/swh/loader/package/utils.py +++ b/swh/loader/package/utils.py @@ -34,26 +34,6 @@ EMPTY_AUTHOR = Person.from_fullname(b"") -def api_info(url: str, **extra_params) -> bytes: - """Basic api client to retrieve information on project. This deals with - fetching json metadata about pypi projects. - - Args: - url (str): The api url (e.g PyPI, npm, etc...) - - Raises: - NotFound in case of query failures (for some reasons: 404, ...) - - Returns: - The associated response's information - - """ - response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params}) - if response.status_code != 200: - raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}") - return response.content - - def _content_disposition_filename(header: str) -> Optional[str]: fname = None fnames = re.findall(r"filename[\*]?=([^;]+)", header) @@ -81,13 +61,16 @@ return False -@retry( +throttling_retry = retry( retry=_retry_if_throttling, wait=wait_exponential(exp_base=10), stop=stop_after_attempt(max_attempt_number=5), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True, ) + + +@throttling_retry def download( url: str, dest: str, @@ -181,6 +164,29 @@ return filepath, extrinsic_metadata +@throttling_retry +def api_info(url: str, **extra_params) -> bytes: + """Basic api client to retrieve information, typically JSON metadata, + on software package. + + Args: + url (str): The api url (e.g PyPI, npm, etc...) + + Raises: + NotFound in case of query failures (for some reasons: 404, ...) + + Returns: + The associated response's information + + """ + logger.debug("Fetching %s", url) + response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params}) + if response.status_code == 404: + raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}") + response.raise_for_status() + return response.content + + def release_name(version: str, filename: Optional[str] = None) -> str: if filename: return "releases/%s/%s" % (version, filename)