Page MenuHomeSoftware Heritage

D8495.id30646.diff
No OneTemporary

D8495.id30646.diff

diff --git a/swh/loader/package/crates/loader.py b/swh/loader/package/crates/loader.py
--- a/swh/loader/package/crates/loader.py
+++ b/swh/loader/package/crates/loader.py
@@ -14,7 +14,7 @@
from typing_extensions import TypedDict
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import api_info, cached_method, release_name
+from swh.loader.package.utils import cached_method, get_url_body, release_name
from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@@ -248,7 +248,7 @@
Returns:
Content response as bytes. Content response is a json document.
"""
- return api_info(self.url)
+ return get_url_body(self.url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/golang/loader.py b/swh/loader/package/golang/loader.py
--- a/swh/loader/package/golang/loader.py
+++ b/swh/loader/package/golang/loader.py
@@ -11,7 +11,12 @@
import attr
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name
+from swh.loader.package.utils import (
+ EMPTY_AUTHOR,
+ get_url_body,
+ release_name,
+ cached_method,
+)
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@@ -54,7 +59,7 @@
self.url = _uppercase_encode(self.url)
def get_versions(self) -> Sequence[str]:
- versions = api_info(f"{self.url}/@v/list").decode().splitlines()
+ versions = get_url_body(f"{self.url}/@v/list").decode().splitlines()
# some go packages only have a development version not listed by the endpoint above,
# so ensure to return it or it will be missed by the golang loader
default_version = self.get_default_version()
@@ -64,12 +69,12 @@
@cached_method
def get_default_version(self) -> str:
- latest = api_info(f"{self.url}/@latest")
+ latest = get_url_body(f"{self.url}/@latest")
return json.loads(latest)["Version"]
def _raw_info(self, version: str) -> dict:
url = f"{self.url}/@v/{_uppercase_encode(version)}.info"
- return json.loads(api_info(url))
+ return json.loads(get_url_body(url))
def get_package_info(self, version: str) -> Iterator[Tuple[str, GolangPackageInfo]]:
# Encode the name because creating nested folders can become problematic
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -17,7 +17,7 @@
PartialExtID,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method
+from swh.loader.package.utils import EMPTY_AUTHOR, cached_method, get_url_body
from swh.model import hashutil
from swh.model.model import (
MetadataAuthority,
@@ -195,7 +195,7 @@
def retrieve_sources(url: str) -> bytes:
"""Retrieve sources. Potentially raise NotFound error."""
- return api_info(url, allow_redirects=True)
+ return get_url_body(url, allow_redirects=True)
def parse_sources(raw_sources: bytes) -> Dict[str, Any]:
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -19,7 +19,7 @@
PackageLoader,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import api_info, cached_method, release_name
+from swh.loader.package.utils import cached_method, get_url_body, release_name
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -116,7 +116,7 @@
@cached_method
def _raw_info(self) -> bytes:
- return api_info(self.provider_url)
+ return get_url_body(self.provider_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py
--- a/swh/loader/package/pubdev/loader.py
+++ b/swh/loader/package/pubdev/loader.py
@@ -14,8 +14,8 @@
from swh.loader.package.utils import (
EMPTY_AUTHOR,
Person,
- api_info,
cached_method,
+ get_url_body,
release_name,
)
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
@@ -78,7 +78,7 @@
)
def _raw_info(self) -> bytes:
- return api_info(self.package_info_url)
+ return get_url_body(self.package_info_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -18,7 +18,12 @@
PartialExtID,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name
+from swh.loader.package.utils import (
+ EMPTY_AUTHOR,
+ cached_method,
+ get_url_body,
+ release_name,
+)
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
MetadataAuthority,
@@ -83,7 +88,7 @@
@cached_method
def _raw_info(self) -> bytes:
- return api_info(self.provider_url)
+ return get_url_body(self.provider_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -13,9 +13,8 @@
import pytest
from requests.exceptions import HTTPError
-from swh.loader.exception import NotFound
import swh.loader.package
-from swh.loader.package.utils import api_info, download, release_name
+from swh.loader.package.utils import download, get_url_body, release_name
def test_version_generation():
@@ -213,17 +212,17 @@
status_code = 400
requests_mock.get(url, status_code=status_code)
- with pytest.raises(NotFound) as e0:
- api_info(url)
-
- assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
+ with pytest.raises(
+ HTTPError, match=f"{status_code} Client Error: None for url: {url}"
+ ):
+ get_url_body(url)
def test_api_info(requests_mock):
"""Fetching json info from pypi project should be ok"""
url = "https://pypi.org/pypi/requests/json"
requests_mock.get(url, text='{"version": "0.0.1"}')
- actual_info = json.loads(api_info(url))
+ actual_info = json.loads(get_url_body(url))
assert actual_info == {
"version": "0.0.1",
}
@@ -271,3 +270,39 @@
with pytest.raises(HTTPError):
_check_download_ok(url, dest=str(tmp_path))
+
+
+@pytest.fixture(autouse=True)
+def mock_api_info_retry_sleep(mocker):
+ mocker.patch.object(get_url_body.retry, "sleep")
+
+
+def test_api_info_retry(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+ json_data = {"foo": "bar"}
+
+ requests_mock.get(
+ url,
+ [
+ {"status_code": 429},
+ {"status_code": 429},
+ {
+ "json": json_data,
+ "status_code": 200,
+ },
+ ],
+ )
+
+ assert json.loads(get_url_body(url)) == json_data
+
+
+def test_api_info_retry_reraise(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+
+ requests_mock.get(
+ url,
+ [{"status_code": 429}] * 5,
+ )
+
+ with pytest.raises(HTTPError, match=f"429 Client Error: None for url: {url}"):
+ get_url_body(url)
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -34,26 +34,6 @@
EMPTY_AUTHOR = Person.from_fullname(b"")
-def api_info(url: str, **extra_params) -> bytes:
- """Basic api client to retrieve information on project. This deals with
- fetching json metadata about pypi projects.
-
- Args:
- url (str): The api url (e.g PyPI, npm, etc...)
-
- Raises:
- NotFound in case of query failures (for some reasons: 404, ...)
-
- Returns:
- The associated response's information
-
- """
- response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
- if response.status_code != 200:
- raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
- return response.content
-
-
def _content_disposition_filename(header: str) -> Optional[str]:
fname = None
fnames = re.findall(r"filename[\*]?=([^;]+)", header)
@@ -81,13 +61,16 @@
return False
-@retry(
+throttling_retry = retry(
retry=_retry_if_throttling,
wait=wait_exponential(exp_base=10),
stop=stop_after_attempt(max_attempt_number=5),
before_sleep=before_sleep_log(logger, logging.WARNING),
reraise=True,
)
+
+
+@throttling_retry
def download(
url: str,
dest: str,
@@ -181,6 +164,29 @@
return filepath, extrinsic_metadata
+@throttling_retry
+def get_url_body(url: str, **extra_params) -> bytes:
+ """Basic api client to retrieve information, typically JSON metadata,
+ on software package.
+
+ Args:
+ url (str): The api url (e.g PyPI, npm, etc...)
+
+ Raises:
+ NotFound in case of query failures (for some reasons: 404, ...)
+
+ Returns:
+ The associated response's information
+
+ """
+ logger.debug("Fetching %s", url)
+ response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
+ if response.status_code == 404:
+ raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
+ response.raise_for_status()
+ return response.content
+
+
def release_name(version: str, filename: Optional[str] = None) -> str:
if filename:
return "releases/%s/%s" % (version, filename)

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 11:21 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234307

Event Timeline