Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9697303
D8495.id30646.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D8495.id30646.diff
View Options
diff --git a/swh/loader/package/crates/loader.py b/swh/loader/package/crates/loader.py
--- a/swh/loader/package/crates/loader.py
+++ b/swh/loader/package/crates/loader.py
@@ -14,7 +14,7 @@
from typing_extensions import TypedDict
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import api_info, cached_method, release_name
+from swh.loader.package.utils import cached_method, get_url_body, release_name
from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@@ -248,7 +248,7 @@
Returns:
Content response as bytes. Content response is a json document.
"""
- return api_info(self.url)
+ return get_url_body(self.url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/golang/loader.py b/swh/loader/package/golang/loader.py
--- a/swh/loader/package/golang/loader.py
+++ b/swh/loader/package/golang/loader.py
@@ -11,7 +11,12 @@
import attr
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name
+from swh.loader.package.utils import (
+ EMPTY_AUTHOR,
+ get_url_body,
+ release_name,
+ cached_method,
+)
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@@ -54,7 +59,7 @@
self.url = _uppercase_encode(self.url)
def get_versions(self) -> Sequence[str]:
- versions = api_info(f"{self.url}/@v/list").decode().splitlines()
+ versions = get_url_body(f"{self.url}/@v/list").decode().splitlines()
# some go packages only have a development version not listed by the endpoint above,
# so ensure to return it or it will be missed by the golang loader
default_version = self.get_default_version()
@@ -64,12 +69,12 @@
@cached_method
def get_default_version(self) -> str:
- latest = api_info(f"{self.url}/@latest")
+ latest = get_url_body(f"{self.url}/@latest")
return json.loads(latest)["Version"]
def _raw_info(self, version: str) -> dict:
url = f"{self.url}/@v/{_uppercase_encode(version)}.info"
- return json.loads(api_info(url))
+ return json.loads(get_url_body(url))
def get_package_info(self, version: str) -> Iterator[Tuple[str, GolangPackageInfo]]:
# Encode the name because creating nested folders can become problematic
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -17,7 +17,7 @@
PartialExtID,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method
+from swh.loader.package.utils import EMPTY_AUTHOR, cached_method, get_url_body
from swh.model import hashutil
from swh.model.model import (
MetadataAuthority,
@@ -195,7 +195,7 @@
def retrieve_sources(url: str) -> bytes:
"""Retrieve sources. Potentially raise NotFound error."""
- return api_info(url, allow_redirects=True)
+ return get_url_body(url, allow_redirects=True)
def parse_sources(raw_sources: bytes) -> Dict[str, Any]:
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -19,7 +19,7 @@
PackageLoader,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import api_info, cached_method, release_name
+from swh.loader.package.utils import cached_method, get_url_body, release_name
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -116,7 +116,7 @@
@cached_method
def _raw_info(self) -> bytes:
- return api_info(self.provider_url)
+ return get_url_body(self.provider_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py
--- a/swh/loader/package/pubdev/loader.py
+++ b/swh/loader/package/pubdev/loader.py
@@ -14,8 +14,8 @@
from swh.loader.package.utils import (
EMPTY_AUTHOR,
Person,
- api_info,
cached_method,
+ get_url_body,
release_name,
)
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
@@ -78,7 +78,7 @@
)
def _raw_info(self) -> bytes:
- return api_info(self.package_info_url)
+ return get_url_body(self.package_info_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -18,7 +18,12 @@
PartialExtID,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method, release_name
+from swh.loader.package.utils import (
+ EMPTY_AUTHOR,
+ cached_method,
+ get_url_body,
+ release_name,
+)
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
MetadataAuthority,
@@ -83,7 +88,7 @@
@cached_method
def _raw_info(self) -> bytes:
- return api_info(self.provider_url)
+ return get_url_body(self.provider_url)
@cached_method
def info(self) -> Dict:
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -13,9 +13,8 @@
import pytest
from requests.exceptions import HTTPError
-from swh.loader.exception import NotFound
import swh.loader.package
-from swh.loader.package.utils import api_info, download, release_name
+from swh.loader.package.utils import download, get_url_body, release_name
def test_version_generation():
@@ -213,17 +212,17 @@
status_code = 400
requests_mock.get(url, status_code=status_code)
- with pytest.raises(NotFound) as e0:
- api_info(url)
-
- assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
+ with pytest.raises(
+ HTTPError, match=f"{status_code} Client Error: None for url: {url}"
+ ):
+ get_url_body(url)
def test_api_info(requests_mock):
"""Fetching json info from pypi project should be ok"""
url = "https://pypi.org/pypi/requests/json"
requests_mock.get(url, text='{"version": "0.0.1"}')
- actual_info = json.loads(api_info(url))
+ actual_info = json.loads(get_url_body(url))
assert actual_info == {
"version": "0.0.1",
}
@@ -271,3 +270,39 @@
with pytest.raises(HTTPError):
_check_download_ok(url, dest=str(tmp_path))
+
+
+@pytest.fixture(autouse=True)
+def mock_api_info_retry_sleep(mocker):
+ mocker.patch.object(get_url_body.retry, "sleep")
+
+
+def test_api_info_retry(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+ json_data = {"foo": "bar"}
+
+ requests_mock.get(
+ url,
+ [
+ {"status_code": 429},
+ {"status_code": 429},
+ {
+ "json": json_data,
+ "status_code": 200,
+ },
+ ],
+ )
+
+ assert json.loads(get_url_body(url)) == json_data
+
+
+def test_api_info_retry_reraise(mocker, requests_mock, tmp_path):
+ url = "https://example.org/api/endpoint"
+
+ requests_mock.get(
+ url,
+ [{"status_code": 429}] * 5,
+ )
+
+ with pytest.raises(HTTPError, match=f"429 Client Error: None for url: {url}"):
+ get_url_body(url)
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -34,26 +34,6 @@
EMPTY_AUTHOR = Person.from_fullname(b"")
-def api_info(url: str, **extra_params) -> bytes:
- """Basic api client to retrieve information on project. This deals with
- fetching json metadata about pypi projects.
-
- Args:
- url (str): The api url (e.g PyPI, npm, etc...)
-
- Raises:
- NotFound in case of query failures (for some reasons: 404, ...)
-
- Returns:
- The associated response's information
-
- """
- response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
- if response.status_code != 200:
- raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
- return response.content
-
-
def _content_disposition_filename(header: str) -> Optional[str]:
fname = None
fnames = re.findall(r"filename[\*]?=([^;]+)", header)
@@ -81,13 +61,16 @@
return False
-@retry(
+throttling_retry = retry(
retry=_retry_if_throttling,
wait=wait_exponential(exp_base=10),
stop=stop_after_attempt(max_attempt_number=5),
before_sleep=before_sleep_log(logger, logging.WARNING),
reraise=True,
)
+
+
+@throttling_retry
def download(
url: str,
dest: str,
@@ -181,6 +164,29 @@
return filepath, extrinsic_metadata
+@throttling_retry
+def get_url_body(url: str, **extra_params) -> bytes:
+ """Basic api client to retrieve information, typically JSON metadata,
+ on software package.
+
+ Args:
+ url (str): The api url (e.g PyPI, npm, etc...)
+
+ Raises:
+ NotFound in case of query failures (for some reasons: 404, ...)
+
+ Returns:
+ The associated response's information
+
+ """
+ logger.debug("Fetching %s", url)
+ response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
+ if response.status_code == 404:
+ raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
+ response.raise_for_status()
+ return response.content
+
+
def release_name(version: str, filename: Optional[str] = None) -> str:
if filename:
return "releases/%s/%s" % (version, filename)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 11:21 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234307
Attached To
D8495: package/utils: Add debug log and throttling retry to api_info
Event Timeline
Log In to Comment