diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -6,6 +6,7 @@ from celery import group +from .. import utils from ..core.tasks import ListerTaskBase, RangeListerTask from .lister import GitLabLister @@ -29,17 +30,13 @@ """ task_queue = 'swh_lister_gitlab_refresh' + # nb pages + nb_pages = 10 + def run_task(self, *args, **kwargs): lister = self.new_lister(*args, **kwargs) - total, _, per_page = lister.get_pages_information() - - ranges = [] - prev_index = None - for index in range(0, total, per_page): - if index is not None and prev_index is not None: - ranges.append((prev_index, index)) - prev_index = index - + _, total_pages, _ = lister.get_pages_information() + ranges = list(utils.split_range(total_pages, self.nb_pages)) random.shuffle(ranges) range_task = RangeGitLabLister() group(range_task.s(minv, maxv, *args, **kwargs) diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -49,3 +49,19 @@ self.assertIsNone(utils.get({}, None)) with self.assertRaises(AttributeError): self.assertIsNone(utils.get(None, ['a'])) + + @istest + def split_range(self): + actual_ranges = list(utils.split_range(14, 5)) + self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)]) + + actual_ranges = list(utils.split_range(19, 10)) + self.assertEqual(actual_ranges, [(0, 10), (10, 19)]) + + @istest + def split_range_errors(self): + with self.assertRaises(TypeError): + list(utils.split_range(None, 1)) + + with self.assertRaises(TypeError): + list(utils.split_range(100, None)) diff --git a/swh/lister/utils.py b/swh/lister/utils.py --- a/swh/lister/utils.py +++ b/swh/lister/utils.py @@ -12,3 +12,14 @@ if v is not None: return v return None + + +def split_range(total_pages, nb_pages): + prev_index = None + for index in range(0, total_pages, nb_pages): + if index is not None and prev_index is not None: + yield prev_index, index + prev_index = index + + if index != total_pages: + yield index, total_pages