Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/core/page_by_page_lister.py
Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | def run(self, min_bound=None, max_bound=None, check_existence=False): | ||||
check_existence (bool): optional existence check (for | check_existence (bool): optional existence check (for | ||||
incremental lister whose sort | incremental lister whose sort | ||||
order is inverted) | order is inverted) | ||||
Returns: | Returns: | ||||
nothing | nothing | ||||
""" | """ | ||||
status = 'uneventful' | |||||
page = min_bound or 0 | page = min_bound or 0 | ||||
loop_count = 0 | loop_count = 0 | ||||
self.min_page = min_bound | self.min_page = min_bound | ||||
self.max_page = max_bound | self.max_page = max_bound | ||||
while self.is_within_bounds(page, self.min_page, self.max_page): | while self.is_within_bounds(page, self.min_page, self.max_page): | ||||
logging.info('listing repos starting at %s' % page) | logging.info('listing repos starting at %s' % page) | ||||
response, injected_repos = self.ingest_data(page, | response, injected_repos = self.ingest_data(page, | ||||
checks=check_existence) | checks=check_existence) | ||||
if not response and not injected_repos: | if not response and not injected_repos: | ||||
logging.info('No response from api server, stopping') | logging.info('No response from api server, stopping') | ||||
break | break | ||||
elif not injected_repos: | elif not injected_repos: | ||||
logging.info('Repositories already seen, stopping') | logging.info('Repositories already seen, stopping') | ||||
break | break | ||||
status = 'eventful' | |||||
next_page = self.get_next_target_from_response(response) | next_page = self.get_next_target_from_response(response) | ||||
# termination condition | # termination condition | ||||
if (next_page is None) or (next_page == page): | if (next_page is None) or (next_page == page): | ||||
logging.info('stopping after page %s, no next link found' % | logging.info('stopping after page %s, no next link found' % | ||||
page) | page) | ||||
break | break | ||||
else: | else: | ||||
page = next_page | page = next_page | ||||
loop_count += 1 | loop_count += 1 | ||||
if loop_count == 20: | if loop_count == 20: | ||||
logging.info('flushing updates') | logging.info('flushing updates') | ||||
loop_count = 0 | loop_count = 0 | ||||
self.db_session.commit() | self.db_session.commit() | ||||
self.db_session = self.mk_session() | self.db_session = self.mk_session() | ||||
self.db_session.commit() | self.db_session.commit() | ||||
self.db_session = self.mk_session() | self.db_session = self.mk_session() | ||||
return {'status': status} | |||||
class PageByPageHttpLister(ListerHttpTransport, PageByPageLister): | class PageByPageHttpLister(ListerHttpTransport, PageByPageLister): | ||||
"""Convenience class for ensuring right lookup and init order when | """Convenience class for ensuring right lookup and init order when | ||||
combining PageByPageLister and ListerHttpTransport. | combining PageByPageLister and ListerHttpTransport. | ||||
""" | """ | ||||
def __init__(self, url=None, override_config=None): | def __init__(self, url=None, override_config=None): | ||||
PageByPageLister.__init__(self, override_config=override_config) | PageByPageLister.__init__(self, override_config=override_config) | ||||
ListerHttpTransport.__init__(self, url=url) | ListerHttpTransport.__init__(self, url=url) |