diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py index 972ff13..5299de1 100644 --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -1,58 +1,61 @@ # Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from .lister import BitBucketLister GROUP_SPLIT = 10000 def new_lister(api_baseurl='https://api.bitbucket.org/2.0'): return BitBucketLister(api_baseurl=api_baseurl) @app.task(name='swh.lister.bitbucket.tasks.IncrementalBitBucketLister', bind=True) def incremental_bitbucket_lister(self, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister = new_lister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.bitbucket.tasks.RangeBitBucketLister', bind=True) def range_bitbucket_lister(self, start, end, **lister_args): - self.log.debug('%s(start=%s, end=%d), lister_args=%s' % ( + self.log.debug('%s(start=%s, end=%s), lister_args=%s' % ( self.name, start, end, lister_args)) lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.bitbucket.tasks.FullBitBucketRelister', bind=True) def full_bitbucket_relister(self, split=None, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister = new_lister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) random.shuffle(ranges) promise = group(range_bitbucket_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) - promise.save() # so that we can restore the GroupResult in tests + try: + promise.save() # so that we can restore the GroupResult in tests + except NotImplementedError: + self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name='swh.lister.bitbucket.tasks.ping', bind=True) def ping(self): self.log.debug(self.name) return 'OK' diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py index 372299f..625f5f5 100644 --- a/swh/lister/github/tasks.py +++ b/swh/lister/github/tasks.py @@ -1,59 +1,62 @@ # Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from swh.lister.github.lister import GitHubLister GROUP_SPLIT = 10000 def new_lister(api_baseurl='https://api.github.com', **kw): return GitHubLister(api_baseurl=api_baseurl, **kw) @app.task(name='swh.lister.github.tasks.IncrementalGitHubLister', bind=True) def incremental_github_lister(self, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister = new_lister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.github.tasks.RangeGitHubLister', bind=True) def range_github_lister(self, start, end, **lister_args): self.log.debug('%s(start=%s, end=%d), lister_args=%s' % ( self.name, start, end, lister_args)) lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.github.tasks.FullGitHubRelister', bind=True) def full_github_relister(self, split=None, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister = new_lister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) random.shuffle(ranges) promise = group(range_github_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) - promise.save() # so that we can restore the GroupResult in tests + try: + promise.save() # so that we can restore the GroupResult in tests + except NotImplementedError: + self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name='swh.lister.github.tasks.ping', bind=True) def ping(self): self.log.debug(self.name) return 'OK' diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index 65b8b4a..739d95a 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -1,68 +1,71 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from .. import utils from .lister import GitLabLister NBPAGES = 10 def new_lister(api_baseurl='https://gitlab.com/api/v4', instance=None, sort='asc', per_page=20): return GitLabLister( api_baseurl=api_baseurl, instance=instance, sort=sort, per_page=per_page) @app.task(name='swh.lister.gitlab.tasks.IncrementalGitLabLister', bind=True) def incremental_gitlab_lister(self, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister_args['sort'] = 'desc' lister = new_lister(**lister_args) total_pages = lister.get_pages_information()[1] # stopping as soon as existing origins for that instance are detected lister.run(min_bound=1, max_bound=total_pages, check_existence=True) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.gitlab.tasks.RangeGitLabLister', bind=True) def range_gitlab_lister(self, start, end, **lister_args): self.log.debug('%s(start=%s, end=%d), lister_args=%s' % ( self.name, start, end, lister_args)) lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) self.log.debug('%s OK' % (self.name)) @app.task(name='swh.lister.gitlab.tasks.FullGitLabRelister', bind=True) def full_gitlab_relister(self, **lister_args): self.log.debug('%s, lister_args=%s' % ( self.name, lister_args)) lister = new_lister(**lister_args) _, total_pages, _ = lister.get_pages_information() ranges = list(utils.split_range(total_pages, NBPAGES)) random.shuffle(ranges) promise = group(range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) - promise.save() + try: + promise.save() + except NotImplementedError: + self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name='swh.lister.gitlab.tasks.ping', bind=True) def ping(self): self.log.debug(self.name) return 'OK'