Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124209
D8519.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D8519.diff
View Options
diff --git a/docs/new_lister_template.py b/docs/new_lister_template.py
--- a/docs/new_lister_template.py
+++ b/docs/new_lister_template.py
@@ -11,7 +11,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -77,11 +77,11 @@
def state_to_dict(self, state: NewForgeListerState) -> Dict[str, Any]:
return asdict(state)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url, params) -> requests.Response:
# Do the network resource request under a retrying decorator
# to handle rate limiting and transient errors up to a limit.
- # `throttling_retry` by default use the `requests` library to check
+ # `http_retry` by default use the `requests` library to check
# only for rate-limit and a base-10 exponential waiting strategy.
# This can be customized by passed waiting, retrying and logging strategies
# as functions. See the `tenacity` library documentation.
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -317,7 +317,7 @@
then immediately stop the listing by doing an equivalent of
:py:meth:`Response.raise_for_status` from the ``requests`` library. As for rate-limiting
errors, we have a strategy of using a flexible decorator to handle the retrying for us.
-It is based on the ``tenacity`` library and accessible as :py:func:`throttling_retry` from
+It is based on the ``tenacity`` library and accessible as :py:func:`http_retry` from
:py:mod:`swh.lister.utils`.
Pagination
diff --git a/swh/lister/arch/lister.py b/swh/lister/arch/lister.py
--- a/swh/lister/arch/lister.py
+++ b/swh/lister/arch/lister.py
@@ -16,7 +16,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.model.hashutil import hash_to_hex
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -132,7 +132,7 @@
}
)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def request_get(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, params)
diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -14,7 +14,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -107,7 +107,7 @@
if username is not None and password is not None:
self.session.auth = (username, password)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
def page_request(self, last_repo_cdate: str) -> requests.Response:
self.url_params["after"] = last_repo_cdate
diff --git a/swh/lister/bower/lister.py b/swh/lister/bower/lister.py
--- a/swh/lister/bower/lister.py
+++ b/swh/lister/bower/lister.py
@@ -8,7 +8,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -49,7 +49,7 @@
}
)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.info("Fetching URL %s with params %s", url, params)
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -15,7 +15,7 @@
from swh.lister import USER_AGENT
from swh.lister.pattern import CredentialsType, StatelessLister
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -79,7 +79,7 @@
)
self.base_git_url = base_git_url
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
def _get_and_parse(self, url: str) -> BeautifulSoup:
"""Get the given url and parse the retrieved HTML using BeautifulSoup"""
response = self.session.get(url)
diff --git a/swh/lister/gitea/tests/test_lister.py b/swh/lister/gitea/tests/test_lister.py
--- a/swh/lister/gitea/tests/test_lister.py
+++ b/swh/lister/gitea/tests/test_lister.py
@@ -138,7 +138,7 @@
assert stats.pages == 1
-@pytest.mark.parametrize("http_code", [400, 500, 502])
+@pytest.mark.parametrize("http_code", [400, 500])
def test_gitea_list_http_error(
swh_scheduler, requests_mock, http_code, trygitea_p1, trygitea_p2
):
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -17,7 +17,7 @@
from swh.lister import USER_AGENT
from swh.lister.pattern import CredentialsType, Lister
-from swh.lister.utils import is_retryable_exception, throttling_retry
+from swh.lister.utils import http_retry, is_retryable_exception
from swh.scheduler.model import ListedOrigin
logger = logging.getLogger(__name__)
@@ -138,7 +138,7 @@
def state_to_dict(self, state: GitLabListerState) -> Dict[str, Any]:
return asdict(state)
- @throttling_retry(
+ @http_retry(
retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
)
def get_page_result(self, url: str) -> PageResult:
diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py
--- a/swh/lister/gogs/lister.py
+++ b/swh/lister/gogs/lister.py
@@ -12,7 +12,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -116,7 +116,7 @@
def state_to_dict(self, state: GogsListerState) -> Dict[str, Any]:
return asdict(state)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(
self, url: str, params: Dict[str, Any]
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py
--- a/swh/lister/gogs/tests/test_lister.py
+++ b/swh/lister/gogs/tests/test_lister.py
@@ -177,7 +177,7 @@
assert stats.origins == 6
-@pytest.mark.parametrize("http_code", [400, 500, 502])
+@pytest.mark.parametrize("http_code", [400, 500])
def test_gogs_list_http_error(
swh_scheduler, requests_mock, http_code, trygogs_p1, trygogs_p3_last
):
diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -13,7 +13,7 @@
import requests
from tenacity import before_sleep_log
-from swh.lister.utils import retry_policy_generic, throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -87,8 +87,7 @@
):
self.updated = True
- @throttling_retry(
- retry=retry_policy_generic,
+ @http_retry(
before_sleep=before_sleep_log(logger, logging.WARNING),
)
def api_request(self, url: str) -> List[str]:
diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py
--- a/swh/lister/launchpad/lister.py
+++ b/swh/lister/launchpad/lister.py
@@ -14,7 +14,7 @@
from lazr.restfulclient.resource import Collection
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import retry_if_exception, throttling_retry
+from swh.lister.utils import http_retry, retry_if_exception
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -100,7 +100,7 @@
d[attribute_name] = date_last_modified.isoformat()
return d
- @throttling_retry(
+ @http_retry(
retry=retry_if_restful_error,
before_sleep=before_sleep_log(logger, logging.WARNING),
)
diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py
--- a/swh/lister/maven/lister.py
+++ b/swh/lister/maven/lister.py
@@ -16,7 +16,7 @@
from tenacity.before_sleep import before_sleep_log
from swh.core.github.utils import GitHubSession
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -112,7 +112,7 @@
def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]:
return asdict(state)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.info("Fetching URL %s with params %s", url, params)
diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py
--- a/swh/lister/maven/tests/test_lister.py
+++ b/swh/lister/maven/tests/test_lister.py
@@ -125,6 +125,11 @@
requests_mock.get(URL_POM_3, content=maven_pom_3)
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+ mocker.patch.object(MavenLister.page_request.retry, "sleep")
+
+
def test_maven_full_listing(swh_scheduler):
"""Covers full listing of multiple pages, checking page results and listed
origins, statelessness."""
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -12,7 +12,7 @@
from swh.lister import USER_AGENT
from swh.lister.pattern import CredentialsType, Lister
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -95,7 +95,7 @@
params["startkey"] = last_package_id
return params
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, last_package_id: str) -> requests.Response:
params = self.request_params(last_package_id)
logger.debug("Fetching URL %s with params %s", self.url, params)
diff --git a/swh/lister/npm/tests/test_lister.py b/swh/lister/npm/tests/test_lister.py
--- a/swh/lister/npm/tests/test_lister.py
+++ b/swh/lister/npm/tests/test_lister.py
@@ -35,6 +35,11 @@
return json.loads(Path(datadir, "npm_incremental_page2.json").read_text())
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+ mocker.patch.object(NpmLister.page_request.retry, "sleep")
+
+
def _check_listed_npm_packages(lister, packages, scheduler_origins):
for package in packages:
package_name = package["doc"]["name"]
diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py
--- a/swh/lister/pubdev/lister.py
+++ b/swh/lister/pubdev/lister.py
@@ -10,7 +10,7 @@
from requests.exceptions import HTTPError
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -60,7 +60,7 @@
}
)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, params)
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -13,7 +13,7 @@
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -88,7 +88,7 @@
def state_to_dict(self, state: PyPIListerState) -> Dict[str, Any]:
return asdict(state)
- @throttling_retry(
+ @http_retry(
retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
)
def _changelog_last_serial(self, client: ServerProxy) -> int:
@@ -97,7 +97,7 @@
assert isinstance(serial, int)
return serial
- @throttling_retry(
+ @http_retry(
retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING)
)
def _changelog_since_serial(
diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py
--- a/swh/lister/sourceforge/lister.py
+++ b/swh/lister/sourceforge/lister.py
@@ -18,7 +18,7 @@
from tenacity.before_sleep import before_sleep_log
from swh.core.api.classes import stream_results
-from swh.lister.utils import retry_policy_generic, throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -208,8 +208,7 @@
self._project_last_modified = listed_origins
return listed_origins
- @throttling_retry(
- retry=retry_policy_generic,
+ @http_retry(
before_sleep=before_sleep_log(logger, logging.WARNING),
)
def page_request(self, url, params) -> requests.Response:
diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py
--- a/swh/lister/tests/test_utils.py
+++ b/swh/lister/tests/test_utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2021 the Software Heritage developers
+# Copyright (C) 2018-2022 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -7,12 +7,7 @@
from requests.status_codes import codes
from tenacity.wait import wait_fixed
-from swh.lister.utils import (
- MAX_NUMBER_ATTEMPTS,
- WAIT_EXP_BASE,
- split_range,
- throttling_retry,
-)
+from swh.lister.utils import MAX_NUMBER_ATTEMPTS, WAIT_EXP_BASE, http_retry, split_range
@pytest.mark.parametrize(
@@ -51,7 +46,7 @@
TEST_URL = "https://example.og/api/repositories"
-@throttling_retry()
+@http_retry()
def make_request():
response = requests.get(TEST_URL)
response.raise_for_status()
@@ -62,13 +57,22 @@
mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
-def test_throttling_retry(requests_mock, mocker):
+@pytest.mark.parametrize(
+ "status_code",
+ [
+ codes.too_many_requests,
+ codes.internal_server_error,
+ codes.bad_gateway,
+ codes.service_unavailable,
+ ],
+)
+def test_http_retry(requests_mock, mocker, status_code):
data = {"result": {}}
requests_mock.get(
TEST_URL,
[
- {"status_code": codes.too_many_requests},
- {"status_code": codes.too_many_requests},
+ {"status_code": status_code},
+ {"status_code": status_code},
{"status_code": codes.ok, "json": data},
],
)
@@ -82,7 +86,7 @@
assert response.json() == data
-def test_throttling_retry_max_attemps(requests_mock, mocker):
+def test_http_retry_max_attemps(requests_mock, mocker):
requests_mock.get(
TEST_URL,
[{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
@@ -102,14 +106,14 @@
)
-@throttling_retry(wait=wait_fixed(WAIT_EXP_BASE))
+@http_retry(wait=wait_fixed(WAIT_EXP_BASE))
def make_request_wait_fixed():
response = requests.get(TEST_URL)
response.raise_for_status()
return response
-def test_throttling_retry_wait_fixed(requests_mock, mocker):
+def test_http_retry_wait_fixed(requests_mock, mocker):
requests_mock.get(
TEST_URL,
[
diff --git a/swh/lister/tuleap/lister.py b/swh/lister/tuleap/lister.py
--- a/swh/lister/tuleap/lister.py
+++ b/swh/lister/tuleap/lister.py
@@ -11,7 +11,7 @@
import requests
from tenacity.before_sleep import before_sleep_log
-from swh.lister.utils import throttling_retry
+from swh.lister.utils import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -65,7 +65,7 @@
}
)
- @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
+ @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.info("Fetching URL %s with params %s", url, params)
diff --git a/swh/lister/tuleap/tests/test_lister.py b/swh/lister/tuleap/tests/test_lister.py
--- a/swh/lister/tuleap/tests/test_lister.py
+++ b/swh/lister/tuleap/tests/test_lister.py
@@ -92,6 +92,11 @@
return text, headers, page_results, origin_urls
+@pytest.fixture(autouse=True)
+def retry_sleep_mock(mocker):
+ mocker.patch.object(TuleapLister.page_request.retry, "sleep")
+
+
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
"""Asserts that the two collections have the same origin URLs.
diff --git a/swh/lister/utils.py b/swh/lister/utils.py
--- a/swh/lister/utils.py
+++ b/swh/lister/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2021 the Software Heritage developers
+# Copyright (C) 2018-2022 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -66,14 +66,6 @@
return False
-def retry_if_throttling(retry_state) -> bool:
- """
- Custom tenacity retry predicate for handling HTTP responses with
- status code 429 (too many requests).
- """
- return retry_if_exception(retry_state, is_throttling_exception)
-
-
def retry_policy_generic(retry_state) -> bool:
"""
Custom tenacity retry predicate for handling failed requests:
@@ -90,8 +82,8 @@
MAX_NUMBER_ATTEMPTS = 5
-def throttling_retry(
- retry=retry_if_throttling,
+def http_retry(
+ retry=retry_policy_generic,
wait=wait_exponential(exp_base=WAIT_EXP_BASE),
stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS),
**retry_args,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 12:27 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221952
Attached To
D8519: Use generic HTTP retry policy by default and rename dedicated decorator
Event Timeline
Log In to Comment