Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124047
D8298.id29970.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D8298.id29970.diff
View Options
diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from dataclasses import dataclass
from datetime import datetime
import json
import logging
@@ -17,14 +18,22 @@
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
-from ..pattern import CredentialsType, StatelessLister
+from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
+
+@dataclass
+class GolangStateType:
+ last_seen: Optional[datetime] = None
+ """Last timestamp of a package version we have saved.
+ Used as a starting point for an incremental listing."""
+
+
GolangPageType = List[Dict[str, Any]]
-class GolangLister(StatelessLister[GolangPageType]):
+class GolangLister(Lister[GolangStateType, GolangPageType]):
"""
List all Golang modules and send associated origins to scheduler.
@@ -52,6 +61,18 @@
{"Accept": "application/json", "User-Agent": USER_AGENT}
)
+ def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType:
+ as_string = d.get("last_seen")
+ last_seen = iso8601.parse_date(as_string) if as_string is not None else None
+ return GolangStateType(last_seen=last_seen)
+
+ def state_to_dict(self, state: GolangStateType) -> Dict[str, Any]:
+ return {
+ "last_seen": state.last_seen.isoformat()
+ if state.last_seen is not None
+ else None
+ }
+
@throttling_retry(
retry=retry_policy_generic,
before_sleep=before_sleep_log(logger, logging.WARNING),
@@ -108,11 +129,12 @@
return page, since
def get_pages(self) -> Iterator[GolangPageType]:
- page, since = self.get_single_page()
-
+ page, since = self.get_single_page(since=self.state.last_seen)
+ self.state.last_seen = since
while page:
yield page
page, since = self.get_single_page(since=since)
+ self.state.last_seen = since
def get_origins_from_page(self, page: GolangPageType) -> Iterator[ListedOrigin]:
"""
diff --git a/swh/lister/golang/tests/test_lister.py b/swh/lister/golang/tests/test_lister.py
--- a/swh/lister/golang/tests/test_lister.py
+++ b/swh/lister/golang/tests/test_lister.py
@@ -107,3 +107,71 @@
assert_sleep_calls(
mocker, mocked_sleep, [1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE, 1, WAIT_EXP_BASE]
)
+
+ # Incremental should list nothing
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Paranoid
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+
+def test_golang_lister_incremental(swh_scheduler, requests_mock, datadir):
+ # first listing, should return one origin per package
+ lister = GolangLister(scheduler=swh_scheduler)
+
+ responses = [
+ {"text": Path(datadir, "page-1.txt").read_text(), "status_code": 200},
+ # Returns empty text when the list is exhausted
+ {"text": "", "status_code": 200},
+ ]
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ stats = lister.run()
+
+ assert stats.pages == 1
+ assert stats.origins == 5
+
+ # Incremental should list nothing
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Add more responses
+ responses = [
+ {"text": Path(datadir, "page-2.txt").read_text(), "status_code": 200},
+ {"text": "", "status_code": 200},
+ ]
+
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ # Incremental should list new page
+ stats = lister.run()
+ assert stats.pages == 1
+ assert stats.origins == 4
+
+ # Incremental should list nothing again
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
+
+ # Add yet more responses
+ responses = [
+ {"text": Path(datadir, "page-3.txt").read_text(), "status_code": 200},
+ {"text": "", "status_code": 200},
+ ]
+
+ requests_mock.get(GolangLister.GOLANG_MODULES_INDEX_URL, responses)
+
+ # Incremental should list new page again
+ stats = lister.run()
+ assert stats.pages == 1
+ assert stats.origins == 10
+
+ # Incremental should list nothing one last time
+ stats = lister.run()
+ assert stats.pages == 0
+ assert stats.origins == 0
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 11:28 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226314
Attached To
D8298: Add incremental function to Golang Lister
Event Timeline
Log In to Comment