Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/core/paging_lister.py
Show First 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | def get_pages_information(self): | ||||
tuple (total number of repositories, total number of | tuple (total number of repositories, total number of | ||||
pages, per_page) | pages, per_page) | ||||
""" | """ | ||||
pass | pass | ||||
# You probably don't need to override anything below this line. | # You probably don't need to override anything below this line. | ||||
def run(self, min_index=None, max_index=None): | def check_existence(self, injected_repos): | ||||
"""Given a list of injected repos, check if we already have them. | |||||
""" | |||||
# FIXME: Implement the check | |||||
return False | |||||
def run(self, min_index=None, max_index=None, check_existence=False): | |||||
"""Main entry function. Sequentially fetches repository data from the | """Main entry function. Sequentially fetches repository data from the | ||||
service according to the basic outline in the class | service according to the basic outline in the class | ||||
docstring. Continually fetching sublists until either there | docstring. Continually fetching sublists until either there | ||||
is no next index reference given or the given next index is | is no next index reference given or the given next index is | ||||
greater than the desired max_index. | greater than the desired max_index. | ||||
Args: | Args: | ||||
min_index (indexable type): optional index to start from | min_index (indexable type): optional index to start from | ||||
max_index (indexable type): optional index to stop at | max_index (indexable type): optional index to stop at | ||||
check_existence (bool): optional existence check (for | |||||
incremental lister whose sort | |||||
order is inverted) | |||||
Returns: | Returns: | ||||
nothing | nothing | ||||
""" | """ | ||||
index = min_index or '' | index = min_index or '' | ||||
loop_count = 0 | loop_count = 0 | ||||
self.min_index = min_index | self.min_index = min_index | ||||
self.max_index = max_index | self.max_index = max_index | ||||
already_seen = False | |||||
while self.is_within_bounds(index, self.min_index, self.max_index): | while self.is_within_bounds(index, self.min_index, self.max_index): | ||||
logging.info('listing repos starting at %s' % index) | logging.info('listing repos starting at %s' % index) | ||||
response, injected_repos = self.ingest_data(index) | response, injected_repos = self.ingest_data(index) | ||||
next_index = self.get_next_target_from_response(response) | next_index = self.get_next_target_from_response(response) | ||||
if check_existence: | |||||
already_seen = self.check_existence(injected_repos) | |||||
# termination condition | # termination condition | ||||
if (next_index is None) or (next_index == index): | if (next_index is None) or (next_index == index): | ||||
logging.info('stopping after index %s, no next link found' % | logging.info('stopping after index %s, no next link found' % | ||||
index) | index) | ||||
break | break | ||||
elif already_seen: | |||||
logging.info('Repositories already seen, stopping') | |||||
break | |||||
else: | else: | ||||
index = next_index | index = next_index | ||||
loop_count += 1 | loop_count += 1 | ||||
if loop_count == 20: | if loop_count == 20: | ||||
logging.info('flushing updates') | logging.info('flushing updates') | ||||
loop_count = 0 | loop_count = 0 | ||||
self.db_session.commit() | self.db_session.commit() | ||||
Show All 16 Lines |