diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py --- a/swh/lister/golang/lister.py +++ b/swh/lister/golang/lister.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from dataclasses import dataclass from datetime import datetime import json import logging @@ -17,14 +18,22 @@ from swh.scheduler.model import ListedOrigin from .. import USER_AGENT -from ..pattern import CredentialsType, StatelessLister +from ..pattern import CredentialsType, Lister logger = logging.getLogger(__name__) + +@dataclass +class GolangStateType: + last_seen: Optional[datetime] = None + """Last timestamp of a package version we have saved. + Used as a starting point for an incremental listing.""" + + GolangPageType = List[Dict[str, Any]] -class GolangLister(StatelessLister[GolangPageType]): +class GolangLister(Lister[GolangStateType, GolangPageType]): """ List all Golang modules and send associated origins to scheduler. @@ -52,6 +61,18 @@ {"Accept": "application/json", "User-Agent": USER_AGENT} ) + def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType: + as_string = d.get("last_seen") + last_seen = iso8601.parse_date(as_string) if as_string is not None else None + return GolangStateType(last_seen=last_seen) + + def state_to_dict(self, state: GolangStateType) -> Dict[str, Any]: + return { + "last_seen": state.last_seen.isoformat() + if state.last_seen is not None + else None + } + @throttling_retry( retry=retry_policy_generic, before_sleep=before_sleep_log(logger, logging.WARNING), @@ -108,11 +129,12 @@ return page, since def get_pages(self) -> Iterator[GolangPageType]: - page, since = self.get_single_page() - + page, since = self.get_single_page(since=self.state.last_seen) + self.state.last_seen = since while page: yield page page, since = self.get_single_page(since=since) + self.state.last_seen = since def get_origins_from_page(self, page: GolangPageType) -> Iterator[ListedOrigin]: """ diff --git a/swh/lister/golang/tests/test_lister.py b/swh/lister/golang/tests/test_lister.py --- a/swh/lister/golang/tests/test_lister.py +++ b/swh/lister/golang/tests/test_lister.py @@ -107,3 +107,71 @@ assert_sleep_calls( mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE] ) + + # Incremental should list nothing + stats = lister.run() + assert stats.pages == 0 + assert stats.origins == 0 + + # Paranoid + stats = lister.run() + assert stats.pages == 0 + assert stats.origins == 0 + + +def test_golang_lister_incremental(swh_scheduler, requests_mock, datadir): + # first listing, should return one origin per package + lister = GolangLister(scheduler=swh_scheduler) + + responses = [ + {"text": Path(datadir, "page-1.txt").read_text(), "status_code": 200}, + # Returns empty text when the list is exhausted + {"text": "", "status_code": 200}, + ] + requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses) + + stats = lister.run() + + assert stats.pages == 1 + assert stats.origins == 5 + + # Incremental should list nothing + stats = lister.run() + assert stats.pages == 0 + assert stats.origins == 0 + + # Add more responses + responses = [ + {"text": Path(datadir, "page-2.txt").read_text(), "status_code": 200}, + {"text": "", "status_code": 200}, + ] + + requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses) + + # Incremental should list new page + stats = lister.run() + assert stats.pages == 1 + assert stats.origins == 4 + + # Incremental should list nothing again + stats = lister.run() + assert stats.pages == 0 + assert stats.origins == 0 + + # Add yet more responses + responses = [ + {"text": Path(datadir, "page-3.txt").read_text(), "status_code": 200}, + {"text": "", "status_code": 200}, + ] + + requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses) + + # Incremental should list new page again + stats = lister.run() + assert stats.pages == 1 + assert stats.origins == 10 + + # Incremental should list nothing one last time + stats = lister.run() + assert stats.pages == 0 + assert stats.origins == 0