diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index c0dcfb6..9aee77c 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -1,66 +1,63 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group +from .. import utils from ..core.tasks import ListerTaskBase, RangeListerTask from .lister import GitLabLister class GitLabListerTask(ListerTaskBase): def new_lister(self, api_baseurl='https://gitlab.com/api/v4', instance='gitlab.com'): return GitLabLister(api_baseurl=api_baseurl, instance=instance) class RangeGitLabLister(GitLabListerTask, RangeListerTask): """Range GitLab lister (list available origins on specified range) """ task_queue = 'swh_lister_gitlab_refresh' class FullGitLabRelister(GitLabListerTask): """Full GitLab lister (list all available origins from the api). """ task_queue = 'swh_lister_gitlab_refresh' + # nb pages + nb_pages = 10 + def run_task(self, *args, **kwargs): lister = self.new_lister(*args, **kwargs) - total, _, per_page = lister.get_pages_information() - - ranges = [] - prev_index = None - for index in range(0, total, per_page): - if index is not None and prev_index is not None: - ranges.append((prev_index, index)) - prev_index = index - + _, total_pages, _ = lister.get_pages_information() + ranges = list(utils.split_range(total_pages, self.nb_pages)) random.shuffle(ranges) range_task = RangeGitLabLister() group(range_task.s(minv, maxv, *args, **kwargs) for minv, maxv in ranges)() class IncrementalGitLabLister(ListerTaskBase): """Incremental GitLab lister (list only new available origins). """ task_queue = 'swh_lister_gitlab_discover' def new_lister(self, api_baseurl='https://gitlab.com/api/v4', instance='gitlab.com'): # assuming going forward in desc order, page 1 through return GitLabLister(instance=instance, api_baseurl=api_baseurl, sort='desc') def run_task(self, *args, **kwargs): lister = self.new_lister(*args, **kwargs) _, total_pages, _ = lister.get_pages_information() # stopping as soon as existing origins for that instance are detected return lister.run(min_bound=1, max_bound=total_pages, check_existence=True) diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py index 6fa07c6..3bd8939 100644 --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -1,51 +1,67 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from swh.lister import utils class UtilsTest(unittest.TestCase): @istest def get(self): data = { 'X-Next-Page': None, 'x-next-page': 1, } actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) self.assertEqual(actual_value, 1) data = { 'X-Next-Page': 10, 'x-next-page': 1, } actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) self.assertEqual(actual_value, 10) data = { 'x-next-page': 100, } actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) self.assertEqual(actual_value, 100) @istest def get_empty(self): self.assertIsNone(utils.get({}, [])) self.assertIsNone(utils.get({'a': 1}, ['b'])) self.assertIsNone(utils.get({'b': 2}, [])) self.assertIsNone(utils.get({'b': 2}, [])) @istest def get_errors(self): with self.assertRaises(TypeError): self.assertIsNone(utils.get({}, None)) with self.assertRaises(AttributeError): self.assertIsNone(utils.get(None, ['a'])) + + @istest + def split_range(self): + actual_ranges = list(utils.split_range(14, 5)) + self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)]) + + actual_ranges = list(utils.split_range(19, 10)) + self.assertEqual(actual_ranges, [(0, 10), (10, 19)]) + + @istest + def split_range_errors(self): + with self.assertRaises(TypeError): + list(utils.split_range(None, 1)) + + with self.assertRaises(TypeError): + list(utils.split_range(100, None)) diff --git a/swh/lister/utils.py b/swh/lister/utils.py index 996c0af..fba2d23 100644 --- a/swh/lister/utils.py +++ b/swh/lister/utils.py @@ -1,14 +1,25 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information def get(d, keys): """Given a dict, lookup in order for keys with values not None. """ for key in keys: v = d.get(key) if v is not None: return v return None + + +def split_range(total_pages, nb_pages): + prev_index = None + for index in range(0, total_pages, nb_pages): + if index is not None and prev_index is not None: + yield prev_index, index + prev_index = index + + if index != total_pages: + yield index, total_pages