Page MenuHomeSoftware Heritage

D8495.id.diff
No OneTemporary

D8495.id.diff

diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -13,7 +13,6 @@
import pytest
from requests.exceptions import HTTPError
-from swh.loader.exception import NotFound
import swh.loader.package
from swh.loader.package.utils import api_info, download, release_name
@@ -213,11 +212,11 @@
status_code = 400
requests_mock.get(url, status_code=status_code)
- with pytest.raises(NotFound) as e0:
+ with pytest.raises(
+ HTTPError, match=f"{status_code} Client Error: None for url: {url}"
+ ):
api_info(url)
- assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
-
def test_api_info(requests_mock):
"""Fetching json info from pypi project should be ok"""
@@ -271,3 +270,39 @@
with pytest.raises(HTTPError):
_check_download_ok(url, dest=str(tmp_path))
+
+
+@pytest.fixture(autouse=True)
+def mock_api_info_retry_sleep(mocker):
+ mocker.patch.object(api_info.retry, "sleep")
+
+
+def test_api_info_retry(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+ json_data = {"foo": "bar"}
+
+ requests_mock.get(
+ url,
+ [
+ {"status_code": 429},
+ {"status_code": 429},
+ {
+ "json": json_data,
+ "status_code": 200,
+ },
+ ],
+ )
+
+ assert json.loads(api_info(url)) == json_data
+
+
+def test_api_info_retry_reraise(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+
+ requests_mock.get(
+ url,
+ [{"status_code": 429}] * 5,
+ )
+
+ with pytest.raises(HTTPError, match=f"429 Client Error: None for url: {url}"):
+ api_info(url)
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -34,26 +34,6 @@
EMPTY_AUTHOR = Person.from_fullname(b"")
-def api_info(url: str, **extra_params) -> bytes:
- """Basic api client to retrieve information on project. This deals with
- fetching json metadata about pypi projects.
-
- Args:
- url (str): The api url (e.g PyPI, npm, etc...)
-
- Raises:
- NotFound in case of query failures (for some reasons: 404, ...)
-
- Returns:
- The associated response's information
-
- """
- response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
- if response.status_code != 200:
- raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
- return response.content
-
-
def _content_disposition_filename(header: str) -> Optional[str]:
fname = None
fnames = re.findall(r"filename[\*]?=([^;]+)", header)
@@ -81,13 +61,16 @@
return False
-@retry(
+throttling_retry = retry(
retry=_retry_if_throttling,
wait=wait_exponential(exp_base=10),
stop=stop_after_attempt(max_attempt_number=5),
before_sleep=before_sleep_log(logger, logging.WARNING),
reraise=True,
)
+
+
+@throttling_retry
def download(
url: str,
dest: str,
@@ -181,6 +164,29 @@
return filepath, extrinsic_metadata
+@throttling_retry
+def api_info(url: str, **extra_params) -> bytes:
+ """Basic api client to retrieve information, typically JSON metadata,
+ on software package.
+
+ Args:
+ url (str): The api url (e.g PyPI, npm, etc...)
+
+ Raises:
+ NotFound in case of query failures (for some reasons: 404, ...)
+
+ Returns:
+ The associated response's information
+
+ """
+ logger.debug("Fetching %s", url)
+ response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
+ if response.status_code == 404:
+ raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
+ response.raise_for_status()
+ return response.content
+
+
def release_name(version: str, filename: Optional[str] = None) -> str:
if filename:
return "releases/%s/%s" % (version, filename)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 11:29 AM (11 w, 15 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217013

Event Timeline