Page MenuHomeSoftware Heritage

D7194.diff
No OneTemporary

D7194.diff

diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py
--- a/swh/lister/launchpad/lister.py
+++ b/swh/lister/launchpad/lister.py
@@ -10,8 +10,10 @@
import iso8601
from launchpadlib.launchpad import Launchpad
+from lazr.restfulclient.errors import RestfulError
from lazr.restfulclient.resource import Collection
+from swh.lister.utils import retry_if_exception, throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -38,6 +40,10 @@
return repo.git_https_url if vcs_type == "git" else repo.web_link
+def retry_if_restful_error(retry_state):
+ return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError))
+
+
class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
List repositories from Launchpad (git or bzr).
@@ -90,6 +96,23 @@
d[attribute_name] = date_last_modified.isoformat()
return d
+ @throttling_retry(retry=retry_if_restful_error)
+ def _page_request(
+ self, launchpad, vcs_type: str, date_last_modified: Optional[datetime]
+ ) -> Optional[Collection]:
+ """Querying the page of results for a given vcs_type since the date_last_modified. If
+ some issues occurs, this will deal with the retrying policy.
+
+ """
+ get_vcs_fns = {
+ "git": launchpad.git_repositories.getRepositories,
+ "bzr": launchpad.branches.getBranches,
+ }
+
+ return get_vcs_fns[vcs_type](
+ order_by="most neglected first", modified_since_date=date_last_modified,
+ )
+
def get_pages(self) -> Iterator[LaunchpadPageType]:
"""
Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
@@ -103,27 +126,31 @@
"git": self.state.git_date_last_modified,
"bzr": self.state.bzr_date_last_modified,
}
- for vcs_type, get_vcs_fn in [
- ("git", launchpad.git_repositories.getRepositories),
- ("bzr", launchpad.branches.getBranches),
- ]:
- yield vcs_type, get_vcs_fn(
- order_by="most neglected first",
- modified_since_date=self.date_last_modified[vcs_type],
- )
+ for vcs_type in ["git", "bzr"]:
+ try:
+ result = self._page_request(
+ launchpad, vcs_type, self.date_last_modified[vcs_type]
+ )
+ except RestfulError as e:
+ logger.warning("Listing %s origins raised %s", vcs_type, e)
+ result = None
+ if not result:
+ continue
+ yield vcs_type, result
+ @throttling_retry(retry=retry_if_restful_error)
def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
"""
Iterate on all git repositories and yield ListedOrigin instances.
"""
assert self.lister_obj.id is not None
- prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
-
vcs_type, repos = page
assert vcs_type in {"git", "bzr"}
+ prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
+
for repo in repos:
origin_url = origin(vcs_type, repo)
diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py
--- a/swh/lister/launchpad/tests/test_lister.py
+++ b/swh/lister/launchpad/tests/test_lister.py
@@ -8,6 +8,7 @@
from pathlib import Path
from typing import List
+from lazr.restfulclient.errors import RestfulError
import pytest
from ..lister import LaunchpadLister, origin
@@ -57,11 +58,18 @@
def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None):
mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad")
mock_getRepositories = mock_launchpad.git_repositories.getRepositories
- mock_getRepositories.return_value = launchpad_response
+ if isinstance(launchpad_response, Exception):
+ mock_getRepositories.side_effect = launchpad_response
+ else:
+ mock_getRepositories.return_value = launchpad_response
mock_getBranches = mock_launchpad.branches.getBranches
- mock_getBranches.return_value = (
- [] if launchpad_bzr_response is None else launchpad_bzr_response
- )
+ if launchpad_bzr_response is not None:
+ if isinstance(launchpad_bzr_response, Exception):
+ mock_getBranches.side_effect = launchpad_bzr_response
+ else:
+ mock_getBranches.return_value = launchpad_bzr_response
+ else:
+ mock_getBranches.return_value = [] # empty page
mock_launchpad.login_anonymously.return_value = mock_launchpad
return mock_getRepositories, mock_getBranches
@@ -166,7 +174,7 @@
assert lister.incremental
assert lister.updated
- assert stats.pages == 2, "Empty bzr response still accounts for 1 page"
+ assert stats.pages == 1, "Empty bzr page response is ignored"
assert stats.origins == len(launchpad_response2)
mock_getRepositories.assert_called_once_with(
@@ -192,7 +200,7 @@
stats = lister.run()
assert not lister.updated
- assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)"
+ assert stats.pages == 1, "Empty pages are ignored(only 1 git page of results)"
assert stats.origins == 0
@@ -211,5 +219,38 @@
stats = lister.run()
assert lister.updated
- assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)"
+ assert stats.pages == 1, "Empty bzr page are ignored (only 1 git page of results)"
assert stats.origins == 1
+
+
+def test_launchpad_lister_raise_during_listing(
+ swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response
+):
+ lister = LaunchpadLister(scheduler=swh_scheduler)
+ # Exponential retries take a long time, so stub time.sleep
+ mocker.patch.object(lister._page_request.retry, "sleep")
+
+ mock_getRepositories, mock_getBranches = _mock_launchpad(
+ mocker,
+ RestfulError("Refuse to list git page"), # breaks git page listing
+ launchpad_bzr_response,
+ )
+
+ stats = lister.run()
+
+ assert lister.updated
+ assert stats.pages == 1
+ assert stats.origins == len(launchpad_bzr_response)
+
+ mock_getRepositories, mock_getBranches = _mock_launchpad(
+ mocker,
+ launchpad_response1,
+ RestfulError("Refuse to list bzr"), # breaks bzr page listing
+ )
+
+ lister = LaunchpadLister(scheduler=swh_scheduler)
+ stats = lister.run()
+
+ assert lister.updated
+ assert stats.pages == 1
+ assert stats.origins == len(launchpad_response1)

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 10:09 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226157

Event Timeline