diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -152,23 +152,23 @@ for repo in deleted_repos: repo.task_id = None - def run(self, min_index=None, max_index=None): + def run(self, min_bound=None, max_bound=None): """Main entry function. Sequentially fetches repository data from the service according to the basic outline in the class docstring, continually fetching sublists until either there is no next index reference given or the given next index is greater - than the desired max_index. + than the desired max_bound. Args: - min_index (indexable type): optional index to start from - max_index (indexable type): optional index to stop at + min_bound (indexable type): optional index to start from + max_bound (indexable type): optional index to stop at Returns: nothing """ - index = min_index or '' + index = min_bound or '' loop_count = 0 - self.min_index = min_index - self.max_index = max_index + self.min_index = min_bound + self.max_index = max_bound while self.is_within_bounds(index, self.min_index, self.max_index): logging.info('listing repos starting at %s' % index) diff --git a/swh/lister/core/paging_lister.py b/swh/lister/core/paging_lister.py --- a/swh/lister/core/paging_lister.py +++ b/swh/lister/core/paging_lister.py @@ -9,7 +9,7 @@ from .lister_base import SWHListerBase -class SWHPagingLister(SWHListerBase): +class PageByPageLister(SWHListerBase): """Lister* intermediate class for any service that follows the simple pagination page pattern. @@ -79,41 +79,41 @@ # You probably don't need to override anything below this line. - def run(self, min_index=None, max_index=None): + def run(self, min_bound=None, max_bound=None): """Main entry function. Sequentially fetches repository data from the service according to the basic outline in the class docstring. Continually fetching sublists until either there - is no next index reference given or the given next index is - greater than the desired max_index. + is no next page reference given or the given next page is + greater than the desired max_page. Args: - min_index (indexable type): optional index to start from - max_index (indexable type): optional index to stop at + min_bound: optional page to start from + max_bound: optional page to stop at Returns: nothing """ - index = min_index or '' + page = min_bound or '' loop_count = 0 - self.min_index = min_index - self.max_index = max_index + self.min_page = min_bound + self.max_page = max_bound - while self.is_within_bounds(index, self.min_index, self.max_index): - logging.info('listing repos starting at %s' % index) + while self.is_within_bounds(page, self.min_page, self.max_page): + logging.info('listing repos starting at %s' % page) - response, injected_repos = self.ingest_data(index) - next_index = self.get_next_target_from_response(response) + response, injected_repos = self.ingest_data(page) + next_page = self.get_next_target_from_response(response) # termination condition - if (next_index is None) or (next_index == index): - logging.info('stopping after index %s, no next link found' % - index) + if (next_page is None) or (next_page == page): + logging.info('stopping after page %s, no next link found' % + page) break else: - index = next_index + page = next_page loop_count += 1 if loop_count == 20: @@ -126,11 +126,11 @@ self.db_session = self.mk_session() -class SWHPagingHttpLister(SWHListerHttpTransport, SWHPagingLister): +class PageByPageHttpLister(SWHListerHttpTransport, PageByPageLister): """Convenience class for ensuring right lookup and init order when - combining SWHPagingLister and SWHListerHttpTransport. + combining PageByPageLister and SWHListerHttpTransport. """ def __init__(self, api_baseurl=None, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl) - SWHPagingLister.__init__(self, override_config=override_config) + PageByPageLister.__init__(self, override_config=override_config) diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -24,7 +24,7 @@ """Base testing class for subclasses of swh.lister.core.indexing_lister.SWHIndexingHttpLister. - swh.lister.core.paging_lister.SWHPagingHttpLister + swh.lister.core.paging_lister.PageByPageHttpLister See swh.lister.github.tests.test_gh_lister for an example of how to customize for a specific listing service. @@ -189,7 +189,7 @@ self.disable_storage_and_scheduler(fl) self.disable_db(fl) - fl.run(min_index=1, max_index=1) # stores no results + fl.run(min_bound=1, max_bound=1) # stores no results @istest def test_fetch_one_nodb(self, http_mocker): @@ -199,7 +199,7 @@ self.disable_storage_and_scheduler(fl) self.disable_db(fl) - fl.run(min_index=self.first_index, max_index=self.first_index) + fl.run(min_bound=self.first_index, max_bound=self.first_index) @istest def test_fetch_multiple_pages_nodb(self, http_mocker): @@ -209,7 +209,7 @@ self.disable_storage_and_scheduler(fl) self.disable_db(fl) - fl.run(min_index=self.first_index) + fl.run(min_bound=self.first_index) def init_db(self, db, model): engine = create_engine(db.url()) @@ -231,7 +231,7 @@ # did not succeed yet if not hasattr(fl, 'db_last_index'): # gitlab lister cannot pass here return - fl.run(min_index=self.first_index) + fl.run(min_bound=self.first_index) self.assertEqual(fl.db_last_index(), self.last_index) partitions = fl.db_partition_indices(5) diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -6,11 +6,11 @@ import re import time -from ..core.paging_lister import SWHPagingHttpLister +from ..core.paging_lister import PageByPageHttpLister from .models import GitLabModel -class GitLabLister(SWHPagingHttpLister): +class GitLabLister(PageByPageHttpLister): # Template path expecting an integer that represents the page id PATH_TEMPLATE = '/projects?page=%d&order_by=id&sort=asc&simple=true' API_URL_INDEX_RE = re.compile(r'^.*/projects.*page=(\d+).*')