Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gitlab/lister.py
Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines | ): | ||||
instance = parse_url(url).host | instance = parse_url(url).host | ||||
super().__init__( | super().__init__( | ||||
scheduler=scheduler, | scheduler=scheduler, | ||||
credentials=None, # anonymous for now | credentials=None, # anonymous for now | ||||
url=url, | url=url, | ||||
instance=instance, | instance=instance, | ||||
) | ) | ||||
self.incremental = incremental | self.incremental = incremental | ||||
self.last_page: Optional[str] = None | |||||
self.session = requests.Session() | self.session = requests.Session() | ||||
self.session.headers.update( | self.session.headers.update( | ||||
{"Accept": "application/json", "User-Agent": USER_AGENT} | {"Accept": "application/json", "User-Agent": USER_AGENT} | ||||
) | ) | ||||
def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState: | def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState: | ||||
return GitLabListerState(**d) | return GitLabListerState(**d) | ||||
Show All 20 Lines | def get_page_result(self, url: str) -> PageResult: | ||||
next_page = response.links["next"]["url"] | next_page = response.links["next"]["url"] | ||||
else: | else: | ||||
next_page = None | next_page = None | ||||
return PageResult(repositories, next_page) | return PageResult(repositories, next_page) | ||||
def get_pages(self) -> Iterator[PageResult]: | def get_pages(self) -> Iterator[PageResult]: | ||||
next_page: Optional[str] | next_page: Optional[str] | ||||
if self.incremental and self.state is not None: | if self.incremental and self.state and self.state.last_seen_next_link: | ||||
next_page = self.state.last_seen_next_link | next_page = self.state.last_seen_next_link | ||||
else: | else: | ||||
next_page = f"{self.url}projects?page=1&order_by=id&sort=asc&per_page=20" | next_page = f"{self.url}projects?page=1&order_by=id&sort=asc&per_page=20" | ||||
while next_page: | while next_page: | ||||
self.last_page = next_page | |||||
page_result = self.get_page_result(next_page) | page_result = self.get_page_result(next_page) | ||||
yield page_result | yield page_result | ||||
next_page = page_result.next_page | next_page = page_result.next_page | ||||
def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | ||||
assert self.lister_obj.id is not None | assert self.lister_obj.id is not None | ||||
repositories = page_result.repositories if page_result.repositories else [] | repositories = page_result.repositories if page_result.repositories else [] | ||||
Show All 13 Lines | def commit_page(self, page_result: PageResult) -> None: | ||||
superior to the currently stored one. | superior to the currently stored one. | ||||
Note: this is a noop for full listing mode | Note: this is a noop for full listing mode | ||||
""" | """ | ||||
if self.incremental: | if self.incremental: | ||||
# link: https://${project-api}/?...&page=2x... | # link: https://${project-api}/?...&page=2x... | ||||
next_page = page_result.next_page | next_page = page_result.next_page | ||||
if not next_page and self.last_page: | |||||
next_page = self.last_page | |||||
if next_page: | if next_page: | ||||
page_id = _parse_page_id(next_page) | page_id = _parse_page_id(next_page) | ||||
previous_next_page = self.state.last_seen_next_link | previous_next_page = self.state.last_seen_next_link | ||||
previous_page_id = _parse_page_id(previous_next_page) | previous_page_id = _parse_page_id(previous_next_page) | ||||
if previous_next_page is None or ( | if previous_next_page is None or ( | ||||
previous_page_id and page_id and previous_page_id < page_id | previous_page_id and page_id and previous_page_id < page_id | ||||
): | ): | ||||
self.state.last_seen_next_link = next_page | self.state.last_seen_next_link = next_page | ||||
def finalize(self) -> None: | def finalize(self) -> None: | ||||
"""finalize the lister state when relevant (see `fn:commit_page` for details) | """finalize the lister state when relevant (see `fn:commit_page` for details) | ||||
Note: this is a noop for full listing mode | Note: this is a noop for full listing mode | ||||
""" | """ | ||||
next_page = self.state.last_seen_next_link | next_page = self.state.last_seen_next_link | ||||
if self.incremental and next_page: | if self.incremental and next_page: | ||||
# link: https://${project-api}/?...&page=2x... | # link: https://${project-api}/?...&page=2x... | ||||
next_page_id = _parse_page_id(next_page) | next_page_id = _parse_page_id(next_page) | ||||
scheduler_state = self.get_state_from_scheduler() | scheduler_state = self.get_state_from_scheduler() | ||||
previous_next_page_id = _parse_page_id(scheduler_state.last_seen_next_link) | previous_next_page_id = _parse_page_id(scheduler_state.last_seen_next_link) | ||||
if ( | if (not previous_next_page_id and next_page_id) or ( | ||||
previous_next_page_id | previous_next_page_id | ||||
and next_page_id | and next_page_id | ||||
and previous_next_page_id < next_page_id | and previous_next_page_id < next_page_id | ||||
): | ): | ||||
self.updated = True | self.updated = True |