diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -114,11 +114,9 @@ [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, - instance='salsa.debian.org', api_baseurl='https://salsa.debian.org/api/v4') - >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, - instance='gitlab.freedesktop.org', api_baseurl='https://gitlab.freedesktop.org/api/v4') - >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, - instance='gitlab.gnome.org', api_baseurl='https://gitlab.gnome.org/api/v4') - >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, - instance='gitlab.inria.fr', api_baseurl='https://gitlab.inria.fr/api/v4') - >>> + {'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc'}) + >>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task( + {'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc'}) + >>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task( + {'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4', + 'sort': 'asc'}) diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -10,8 +10,8 @@ class BitBucketListerTask(ListerTaskBase): - def new_lister(self): - return BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0') + def new_lister(self, *, api_baseurl='https://api.bitbucket.org/2.0'): + return BitBucketLister(api_baseurl=api_baseurl) class IncrementalBitBucketLister(BitBucketListerTask, diff --git a/swh/lister/core/tasks.py b/swh/lister/core/tasks.py --- a/swh/lister/core/tasks.py +++ b/swh/lister/core/tasks.py @@ -39,13 +39,13 @@ task_queue = AbstractAttribute('Celery Task queue name') @abc.abstractmethod - def new_lister(self, *args, **kwargs): + def new_lister(self, **lister_args): """Return a new lister of the appropriate type. """ pass @abc.abstractmethod - def run_task(self, *args, **kwargs): + def run_task(self, *, lister_args=None): pass @@ -57,8 +57,10 @@ """Range lister task. """ - def run_task(self, start, end, *args, **kwargs): - lister = self.new_lister(*args, **kwargs) + def run_task(self, start, end, lister_args=None): + if lister_args is None: + lister_args = {} + lister = self.new_lister(**lister_args) return lister.run(min_bound=start, max_bound=end) @@ -69,8 +71,10 @@ """Incremental indexing lister task. """ - def run_task(self, *args, **kwargs): - lister = self.new_lister(*args, **kwargs) + def run_task(self, *, lister_args=None): + if lister_args is None: + lister_args = {} + lister = self.new_lister(**lister_args) return lister.run(min_bound=lister.db_last_index(), max_bound=None) @@ -80,10 +84,12 @@ """ GROUP_SPLIT = 10000 - def run_task(self, *args, **kwargs): - lister = self.new_lister(*args, **kwargs) + def run_task(self, *, lister_args=None): + if lister_args is None: + lister_args = {} + lister = self.new_lister(**lister_args) ranges = lister.db_partition_indices(self.GROUP_SPLIT) random.shuffle(ranges) range_task = RangeListerTask() - group(range_task.s(minv, maxv, *args, **kwargs) + group(range_task.s(minv, maxv, lister_args) for minv, maxv in ranges)() diff --git a/swh/lister/debian/tasks.py b/swh/lister/debian/tasks.py --- a/swh/lister/debian/tasks.py +++ b/swh/lister/debian/tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 the Software Heritage developers +# Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py --- a/swh/lister/github/tasks.py +++ b/swh/lister/github/tasks.py @@ -10,8 +10,8 @@ class GitHubListerTask(ListerTaskBase): - def new_lister(self): - return GitHubLister(api_baseurl='https://api.github.com') + def new_lister(self, *, api_baseurl='https://api.github.com'): + return GitHubLister(api_baseurl=api_baseurl) class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask): diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -12,9 +12,10 @@ class GitLabListerTask(ListerTaskBase): - def new_lister(self, api_baseurl='https://gitlab.com/api/v4', - instance='gitlab.com'): - return GitLabLister(api_baseurl=api_baseurl, instance=instance) + def new_lister(self, *, api_baseurl='https://gitlab.com/api/v4', + instance='gitlab.com', sort='asc'): + return GitLabLister( + api_baseurl=api_baseurl, instance=instance, sort=sort) class RangeGitLabLister(GitLabListerTask, RangeListerTask): @@ -33,30 +34,29 @@ # nb pages nb_pages = 10 - def run_task(self, *args, **kwargs): - lister = self.new_lister(*args, **kwargs) + def run_task(self, lister_args=None): + if lister_args is None: + lister_args = {} + lister = self.new_lister(**lister_args) _, total_pages, _ = lister.get_pages_information() ranges = list(utils.split_range(total_pages, self.nb_pages)) random.shuffle(ranges) range_task = RangeGitLabLister() - group(range_task.s(minv, maxv, *args, **kwargs) + group(range_task.s(minv, maxv, lister_args=lister_args) for minv, maxv in ranges)() -class IncrementalGitLabLister(ListerTaskBase): +class IncrementalGitLabLister(GitLabListerTask): """Incremental GitLab lister (list only new available origins). """ task_queue = 'swh_lister_gitlab_discover' - def new_lister(self, api_baseurl='https://gitlab.com/api/v4', - instance='gitlab.com'): - # assuming going forward in desc order, page 1 through - return GitLabLister(instance=instance, api_baseurl=api_baseurl, - sort='desc') - - def run_task(self, *args, **kwargs): - lister = self.new_lister(*args, **kwargs) + def run_task(self, lister_args=None): + if lister_args is None: + lister_args = {} + lister_args['sort'] = 'desc' + lister = self.new_lister(**lister_args) _, total_pages, _ = lister.get_pages_information() # stopping as soon as existing origins for that instance are detected return lister.run(min_bound=1, max_bound=total_pages,