diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -10,8 +10,10 @@ import iso8601 from launchpadlib.launchpad import Launchpad +from lazr.restfulclient.errors import RestfulError from lazr.restfulclient.resource import Collection +from swh.lister.utils import retry_if_exception, throttling_retry from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -38,6 +40,10 @@ return repo.git_https_url if vcs_type == "git" else repo.web_link +def retry_if_restful_error(retry_state): + return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError)) + + class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): """ List repositories from Launchpad (git or bzr). @@ -90,6 +96,28 @@ d[attribute_name] = date_last_modified.isoformat() return d + @throttling_retry(retry=retry_if_restful_error) + def _page_request( + self, launchpad, vcs_type: str, date_last_modified: Optional[datetime] + ) -> Optional[Collection]: + """Querying the page of results for a given vcs_type since the date_last_modified. If + some issues occurs, this will deal with the retrying policy. + + """ + get_vcs_fns = { + "git": launchpad.git_repositories.getRepositories, + "bzr": launchpad.branches.getBranches, + } + + try: + page_result = get_vcs_fns[vcs_type]( + order_by="most neglected first", modified_since_date=date_last_modified, + ) + except RestfulError as e: + logger.warning("Listing %s origins raised %s", vcs_type, e) + page_result = None + return page_result + def get_pages(self) -> Iterator[LaunchpadPageType]: """ Yields an iterator on all git/bzr repositories hosted on Launchpad sorted @@ -103,27 +131,27 @@ "git": self.state.git_date_last_modified, "bzr": self.state.bzr_date_last_modified, } - for vcs_type, get_vcs_fn in [ - ("git", launchpad.git_repositories.getRepositories), - ("bzr", launchpad.branches.getBranches), - ]: - yield vcs_type, get_vcs_fn( - order_by="most neglected first", - modified_since_date=self.date_last_modified[vcs_type], + for vcs_type in ["git", "bzr"]: + page_result = self._page_request( + launchpad, vcs_type, self.date_last_modified[vcs_type] ) + if not page_result: + continue + yield vcs_type, page_result + @throttling_retry(retry=retry_if_restful_error) def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]: """ Iterate on all git repositories and yield ListedOrigin instances. """ assert self.lister_obj.id is not None - prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None} - vcs_type, repos = page assert vcs_type in {"git", "bzr"} + prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None} + for repo in repos: origin_url = origin(vcs_type, repo) diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import List +from lazr.restfulclient.errors import RestfulError import pytest from ..lister import LaunchpadLister, origin @@ -57,11 +58,18 @@ def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None): mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad") mock_getRepositories = mock_launchpad.git_repositories.getRepositories - mock_getRepositories.return_value = launchpad_response + if isinstance(launchpad_response, Exception): + mock_getRepositories.side_effect = launchpad_response + else: + mock_getRepositories.return_value = launchpad_response mock_getBranches = mock_launchpad.branches.getBranches - mock_getBranches.return_value = ( - [] if launchpad_bzr_response is None else launchpad_bzr_response - ) + if launchpad_bzr_response is not None: + if isinstance(launchpad_bzr_response, Exception): + mock_getBranches.side_effect = launchpad_bzr_response + else: + mock_getBranches.return_value = launchpad_bzr_response + else: + mock_getBranches.return_value = [] # empty page mock_launchpad.login_anonymously.return_value = mock_launchpad return mock_getRepositories, mock_getBranches @@ -166,7 +174,7 @@ assert lister.incremental assert lister.updated - assert stats.pages == 2, "Empty bzr response still accounts for 1 page" + assert stats.pages == 1, "Empty bzr page response is ignored" assert stats.origins == len(launchpad_response2) mock_getRepositories.assert_called_once_with( @@ -192,7 +200,7 @@ stats = lister.run() assert not lister.updated - assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)" + assert stats.pages == 1, "Empty pages are ignored(only 1 git page of results)" assert stats.origins == 0 @@ -211,5 +219,38 @@ stats = lister.run() assert lister.updated - assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)" + assert stats.pages == 1, "Empty bzr page are ignored (only 1 git page of results)" assert stats.origins == 1 + + +def test_launchpad_lister_raise_during_listing( + swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response +): + lister = LaunchpadLister(scheduler=swh_scheduler) + # Exponential retries take a long time, so stub time.sleep + mocker.patch.object(lister._page_request.retry, "sleep") + + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, + RestfulError("Refuse to list git page"), # breaks git page listing + launchpad_bzr_response, + ) + + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == len(launchpad_bzr_response) + + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, + launchpad_response1, + RestfulError("Refuse to list bzr"), # breaks bzr page listing + ) + + lister = LaunchpadLister(scheduler=swh_scheduler) + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == len(launchpad_response1)