Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gitlab/tasks.py
# Copyright (C) 2018 the Software Heritage developers | # Copyright (C) 2018 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import random | import random | ||||
from celery import group | from celery import group | ||||
from swh.scheduler.celery_backend.config import app | |||||
from swh.scheduler.task import SWHTask | |||||
from .. import utils | from .. import utils | ||||
from ..core.tasks import ListerTaskBase, RangeListerTask | |||||
from .lister import GitLabLister | from .lister import GitLabLister | ||||
class GitLabListerTask(ListerTaskBase): | NBPAGES = 10 | ||||
def new_lister(self, *, api_baseurl='https://gitlab.com/api/v4', | |||||
def new_lister(api_baseurl='https://gitlab.com/api/v4', | |||||
instance='gitlab', sort='asc', per_page=20): | instance='gitlab', sort='asc', per_page=20): | ||||
return GitLabLister( | return GitLabLister( | ||||
api_baseurl=api_baseurl, instance=instance, sort=sort) | api_baseurl=api_baseurl, instance=instance, sort=sort) | ||||
class RangeGitLabLister(GitLabListerTask, RangeListerTask): | @app.task(name='swh.lister.gitlab.tasks.IncrementalGitLabLister', | ||||
"""Range GitLab lister (list available origins on specified range) | base=SWHTask, bind=True) | ||||
def incremental_gitlab_lister(self, **lister_args): | |||||
""" | self.log.debug('%s, lister_args=%s' % ( | ||||
task_queue = 'swh_lister_gitlab_refresh' | self.name, lister_args)) | ||||
lister_args['sort'] = 'desc' | |||||
lister = new_lister(**lister_args) | |||||
class FullGitLabRelister(GitLabListerTask): | total_pages = lister.get_pages_information()[1] | ||||
"""Full GitLab lister (list all available origins from the api). | # stopping as soon as existing origins for that instance are detected | ||||
lister.run(min_bound=1, max_bound=total_pages, check_existence=True) | |||||
""" | self.log.debug('%s OK' % (self.name)) | ||||
task_queue = 'swh_lister_gitlab_refresh' | |||||
# nb pages | |||||
nb_pages = 10 | |||||
def run_task(self, lister_args=None): | @app.task(name='swh.lister.gitlab.tasks.RangeGitLabLister', | ||||
if lister_args is None: | base=SWHTask, bind=True) | ||||
lister_args = {} | def range_gitlab_lister(self, start, end, **lister_args): | ||||
lister = self.new_lister(**lister_args) | self.log.debug('%s(start=%s, end=%d), lister_args=%s' % ( | ||||
self.name, start, end, lister_args)) | |||||
lister = new_lister(**lister_args) | |||||
lister.run(min_bound=start, max_bound=end) | |||||
self.log.debug('%s OK' % (self.name)) | |||||
@app.task(name='swh.lister.gitlab.tasks.FullGitLabRelister', | |||||
base=SWHTask, bind=True) | |||||
def full_gitlab_relister(self, **lister_args): | |||||
self.log.debug('%s, lister_args=%s' % ( | |||||
self.name, lister_args)) | |||||
lister = new_lister(**lister_args) | |||||
_, total_pages, _ = lister.get_pages_information() | _, total_pages, _ = lister.get_pages_information() | ||||
ranges = list(utils.split_range(total_pages, self.nb_pages)) | ranges = list(utils.split_range(total_pages, NBPAGES)) | ||||
random.shuffle(ranges) | random.shuffle(ranges) | ||||
range_task = RangeGitLabLister() | group(range_gitlab_lister.s(minv, maxv, **lister_args) | ||||
group(range_task.s(minv, maxv, lister_args=lister_args) | |||||
for minv, maxv in ranges)() | for minv, maxv in ranges)() | ||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) | |||||
class IncrementalGitLabLister(GitLabListerTask): | |||||
"""Incremental GitLab lister (list only new available origins). | |||||
""" | |||||
task_queue = 'swh_lister_gitlab_discover' | |||||
def run_task(self, lister_args=None): | |||||
if lister_args is None: | |||||
lister_args = {} | |||||
lister_args['sort'] = 'desc' | |||||
lister = self.new_lister(**lister_args) | |||||
_, total_pages, _ = lister.get_pages_information() | |||||
# stopping as soon as existing origins for that instance are detected | |||||
return lister.run(min_bound=1, max_bound=total_pages, | |||||
check_existence=True) |