Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/github/tasks.py
# Copyright (C) 2017-2019 the Software Heritage developers | # Copyright (C) 2017-2019 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import random | import random | ||||
from celery import group | from celery import group | ||||
from swh.scheduler.celery_backend.config import app | from swh.scheduler.celery_backend.config import app | ||||
from swh.lister.github.lister import GitHubLister | from swh.lister.github.lister import GitHubLister | ||||
GROUP_SPLIT = 10000 | GROUP_SPLIT = 10000 | ||||
def new_lister(api_baseurl='https://api.github.com', **kw): | |||||
return GitHubLister(api_baseurl=api_baseurl, **kw) | |||||
@app.task(name=__name__ + '.IncrementalGitHubLister') | @app.task(name=__name__ + '.IncrementalGitHubLister') | ||||
def list_github_incremental(**lister_args): | def list_github_incremental(**lister_args): | ||||
'Incremental update of GitHub' | 'Incremental update of GitHub' | ||||
lister = new_lister(**lister_args) | lister = GitHubLister(**lister_args) | ||||
lister.run(min_bound=lister.db_last_index(), max_bound=None) | lister.run(min_bound=lister.db_last_index(), max_bound=None) | ||||
@app.task(name=__name__ + '.RangeGitHubLister') | @app.task(name=__name__ + '.RangeGitHubLister') | ||||
def _range_github_lister(start, end, **lister_args): | def _range_github_lister(start, end, **lister_args): | ||||
lister = new_lister(**lister_args) | lister = GitHubLister(**lister_args) | ||||
lister.run(min_bound=start, max_bound=end) | lister.run(min_bound=start, max_bound=end) | ||||
@app.task(name=__name__ + '.FullGitHubRelister', bind=True) | @app.task(name=__name__ + '.FullGitHubRelister', bind=True) | ||||
def list_github_full(self, split=None, **lister_args): | def list_github_full(self, split=None, **lister_args): | ||||
"""Full update of GitHub | """Full update of GitHub | ||||
It's not to be called for an initial listing. | It's not to be called for an initial listing. | ||||
""" | """ | ||||
lister = new_lister(**lister_args) | lister = GitHubLister(**lister_args) | ||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT) | ranges = lister.db_partition_indices(split or GROUP_SPLIT) | ||||
if not ranges: | if not ranges: | ||||
self.log.info('Nothing to list') | self.log.info('Nothing to list') | ||||
return | return | ||||
random.shuffle(ranges) | random.shuffle(ranges) | ||||
promise = group(_range_github_lister.s(minv, maxv, **lister_args) | promise = group(_range_github_lister.s(minv, maxv, **lister_args) | ||||
for minv, maxv in ranges)() | for minv, maxv in ranges)() | ||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) | self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) | ||||
Show All 10 Lines |