Page MenuHomeSoftware Heritage

D8298.id29998.diff
No OneTemporary

D8298.id29998.diff

diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from dataclasses import dataclass
from datetime import datetime
import json
import logging
@@ -17,14 +18,22 @@
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
-from ..pattern import CredentialsType, StatelessLister
+from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
+
+@dataclass
+class GolangStateType:
+ last_seen: Optional[datetime] = None
+ """Last timestamp of a package version we have saved.
+ Used as a starting point for an incremental listing."""
+
+
GolangPageType = List[Dict[str, Any]]
-class GolangLister(StatelessLister[GolangPageType]):
+class GolangLister(Lister[GolangStateType, GolangPageType]):
"""
List all Golang modules and send associated origins to scheduler.
@@ -52,6 +61,18 @@
{"Accept": "application/json", "User-Agent": USER_AGENT}
)
+ def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType:
+ as_string = d.get("last_seen")
+ last_seen = iso8601.parse_date(as_string) if as_string is not None else None
+ return GolangStateType(last_seen=last_seen)
+
+ def state_to_dict(self, state: GolangStateType) -> Dict[str, Any]:
+ return {
+ "last_seen": state.last_seen.isoformat()
+ if state.last_seen is not None
+ else None
+ }
+
@throttling_retry(
retry=retry_policy_generic,
before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -108,11 +129,12 @@
return page, since
def get_pages(self) -> Iterator[GolangPageType]:
- page, since = self.get_single_page()
-
+ page, since = self.get_single_page(since=self.state.last_seen)
+ self.state.last_seen = since
while page:
yield page
page, since = self.get_single_page(since=since)
+ self.state.last_seen = since
def get_origins_from_page(self, page: GolangPageType) -> Iterator[ListedOrigin]:
"""
diff --git a/swh/lister/golang/tests/test_lister.py b/swh/lister/golang/tests/test_lister.py
--- a/swh/lister/golang/tests/test_lister.py
+++ b/swh/lister/golang/tests/test_lister.py
@@ -76,3 +76,71 @@
assert_sleep_calls(
mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE]
)
+
+ # Incremental should list nothing
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Paranoid
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+
+def test_golang_lister_incremental(swh_scheduler, requests_mock, datadir):
+ # first listing, should return one origin per package
+ lister = GolangLister(scheduler=swh_scheduler)
+
+ responses = [
+ {"text": Path(datadir, "page-1.txt").read_text(), "status_code": 200},
+ # Returns empty text when the list is exhausted
+ {"text": "", "status_code": 200},
+ ]
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ stats = lister.run()
+
+ assert stats.pages == 1
+ assert stats.origins == 5
+
+ # Incremental should list nothing
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Add more responses
+ responses = [
+ {"text": Path(datadir, "page-2.txt").read_text(), "status_code": 200},
+ {"text": "", "status_code": 200},
+ ]
+
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ # Incremental should list new page
+ stats = lister.run()
+ assert stats.pages == 1
+ assert stats.origins == 4
+
+ # Incremental should list nothing again
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Add yet more responses
+ responses = [
+ {"text": Path(datadir, "page-3.txt").read_text(), "status_code": 200},
+ {"text": "", "status_code": 200},
+ ]
+
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ # Incremental should list new page again
+ stats = lister.run()
+ assert stats.pages == 1
+ assert stats.origins == 9
+
+ # Incremental should list nothing one last time
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 6:18 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218160

Event Timeline