diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py index a084846..f3f415e 100644 --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -1,57 +1,58 @@ # Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from .lister import BitBucketLister GROUP_SPLIT = 10000 def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100): return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page) @app.task(name=__name__ + '.IncrementalBitBucketLister') -def incremental_bitbucket_lister(**lister_args): +def list_bitbucket_incremental(**lister_args): + '''Incremental update of the BitBucket forge''' lister = new_lister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) @app.task(name=__name__ + '.RangeBitBucketLister') -def range_bitbucket_lister(start, end, **lister_args): +def _range_bitbucket_lister(start, end, **lister_args): lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) @app.task(name=__name__ + '.FullBitBucketRelister', bind=True) -def full_bitbucket_relister(self, split=None, **lister_args): - """Relist from the beginning of what's already been listed. +def list_bitbucket_full(self, split=None, **lister_args): + """Full update of the BitBucket forge It's not to be called for an initial listing. """ lister = new_lister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) if not ranges: self.log.info('Nothing to list') return random.shuffle(ranges) - promise = group(range_bitbucket_lister.s(minv, maxv, **lister_args) + promise = group(_range_bitbucket_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)', (self.name, len(ranges))) try: promise.save() # so that we can restore the GroupResult in tests except (NotImplementedError, AttributeError): self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/cgit/tasks.py b/swh/lister/cgit/tasks.py index 723d44d..0066cf7 100644 --- a/swh/lister/cgit/tasks.py +++ b/swh/lister/cgit/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from .lister import CGitLister @app.task(name=__name__ + '.CGitListerTask') -def cgit_lister(**lister_args): +def list_cgit(**lister_args): + '''Lister task for CGit instances''' CGitLister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/cli.py b/swh/lister/cli.py index 59dc941..50bde74 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -1,133 +1,235 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import os import logging import pkg_resources from copy import deepcopy +from importlib import import_module import click from sqlalchemy import create_engine from swh.core.cli import CONTEXT_SETTINGS +from swh.scheduler import get_scheduler +from swh.scheduler.task import SWHTask from swh.lister.core.models import initialize logger = logging.getLogger(__name__) LISTERS = {entry_point.name.split('.', 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points('swh.workers') if entry_point.name.split('.', 1)[0] == 'lister'} SUPPORTED_LISTERS = list(LISTERS) +# the key in this dict is the suffix used to match new task-type to be added. +# For example for a task which function name is "list_gitlab_full', the default +# value used when inserting a new task-type in the scheduler db will be the one +# under the 'full' key below (because it matches xxx_full). +DEFAULT_TASK_TYPE = { + 'full': { # for tasks like 'list_xxx_full()' + 'default_interval': '90 days', + 'min_interval': '90 days', + 'max_interval': '90 days', + 'backoff_factor': 1 + }, + '*': { # value if not suffix matches + 'default_interval': '1 day', + 'min_interval': '1 day', + 'max_interval': '1 day', + 'backoff_factor': 1 + }, + } + def get_lister(lister_name, db_url=None, **conf): """Instantiate a lister given its name. Args: lister_name (str): Lister's name conf (dict): Configuration dict (lister db cnx, policy, priority...) Returns: Tuple (instantiated lister, drop_tables function, init schema function, insert minimum data function) """ if lister_name not in LISTERS: raise ValueError( 'Invalid lister %s: only supported listers are %s' % (lister_name, SUPPORTED_LISTERS)) if db_url: conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} # To allow api_baseurl override per lister registry_entry = LISTERS[lister_name].load()() lister_cls = registry_entry['lister'] lister = lister_cls(override_config=conf) return lister @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @click.option('--config-file', '-C', default=None, type=click.Path(exists=True, dir_okay=False,), help="Configuration file.") @click.option('--db-url', '-d', default=None, help='SQLAlchemy DB URL; see ' '') # noqa @click.pass_context def lister(ctx, config_file, db_url): '''Software Heritage Lister tools.''' from swh.core import config ctx.ensure_object(dict) override_conf = {} if db_url: override_conf['lister'] = { 'cls': 'local', 'args': {'db': db_url} } + if not config_file: + config_file = os.environ.get('SWH_CONFIG_FILENAME') conf = config.read(config_file, override_conf) ctx.obj['config'] = conf ctx.obj['override_conf'] = override_conf @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) @click.option('--drop-tables', '-D', is_flag=True, default=False, help='Drop tables before creating the database schema') @click.pass_context def db_init(ctx, drop_tables): """Initialize the database model for given listers. """ cfg = ctx.obj['config'] lister_cfg = cfg['lister'] if lister_cfg['cls'] != 'local': click.echo('A local lister configuration is required') ctx.exit(1) db_url = lister_cfg['args']['db'] db_engine = create_engine(db_url) + registry = {} for lister, entrypoint in LISTERS.items(): logger.info('Loading lister %s', lister) - registry_entry = entrypoint.load()() + registry[lister] = entrypoint.load()() logger.info('Initializing database') initialize(db_engine, drop_tables) for lister, entrypoint in LISTERS.items(): + registry_entry = registry[lister] init_hook = registry_entry.get('init') if callable(init_hook): logger.info('Calling init hook for %s', lister) init_hook(db_engine) +@lister.command(name='register-task-types', context_settings=CONTEXT_SETTINGS) +@click.option('--lister', '-l', 'listers', multiple=True, + default=('all', ), show_default=True, + help='Only registers task-types for these listers', + type=click.Choice(['all'] + SUPPORTED_LISTERS)) +@click.pass_context +def register_task_types(ctx, listers): + """Insert missing task-type entries in the scheduler + + According to declared tasks in each loaded lister plugin. + """ + + cfg = ctx.obj['config'] + scheduler = get_scheduler(**cfg['scheduler']) + + for lister, entrypoint in LISTERS.items(): + if 'all' not in listers and lister not in listers: + continue + logger.info('Loading lister %s', lister) + + registry_entry = entrypoint.load()() + for task_module in registry_entry['task_modules']: + mod = import_module(task_module) + for task_name in (x for x in dir(mod) if not x.startswith('_')): + taskobj = getattr(mod, task_name) + if isinstance(taskobj, SWHTask): + task_type = task_name.replace('_', '-') + task_cfg = registry_entry.get('task_types', {}).get( + task_type, {}) + ensure_task_type(task_type, taskobj, task_cfg, scheduler) + + +def ensure_task_type(task_type, swhtask, task_config, scheduler): + """Ensure a task-type is known by the scheduler + + Args: + task_type (str): the type of the task to check/insert (correspond to + the 'type' field in the db) + swhtask (SWHTask): the SWHTask instance the task-type correspond to + task_config (dict): a dict with specific/overloaded values for the + task-type to be created + scheduler: the scheduler object used to access the scheduler db + """ + for suffix, defaults in DEFAULT_TASK_TYPE.items(): + if task_type.endswith('-' + suffix): + task_type_dict = defaults.copy() + break + else: + task_type_dict = DEFAULT_TASK_TYPE['*'].copy() + + task_type_dict['type'] = task_type + task_type_dict['backend_name'] = swhtask.name + if swhtask.__doc__: + task_type_dict['description'] = swhtask.__doc__.splitlines()[0] + + task_type_dict.update(task_config) + + current_task_type = scheduler.get_task_type(task_type) + if current_task_type: + # check some stuff + if current_task_type['backend_name'] != task_type_dict['backend_name']: + logger.warning('Existing task type %s for lister %s has a ' + 'different backend name than current ' + 'code version provides (%s vs. %s)', + task_type, + lister, + current_task_type['backend_name'], + task_type_dict['backend_name'], + ) + else: + logger.info('Create task type %s in scheduler', task_type) + logger.debug(' %s', task_type_dict) + scheduler.create_task_type(task_type_dict) + + @lister.command(name='run', context_settings=CONTEXT_SETTINGS, help='Trigger a full listing run for a particular forge ' 'instance. The output of this listing results in ' '"oneshot" tasks in the scheduler db with a priority ' 'defined by the user') @click.option('--lister', '-l', help='Lister to run', type=click.Choice(SUPPORTED_LISTERS)) @click.option('--priority', '-p', default='high', type=click.Choice(['high', 'medium', 'low']), help='Task priority for the listed repositories to ingest') @click.argument('options', nargs=-1) @click.pass_context def run(ctx, lister, priority, options): from swh.scheduler.cli.utils import parse_options config = deepcopy(ctx.obj['config']) if options: config.update(parse_options(options)[1]) config['priority'] = priority config['policy'] = 'oneshot' get_lister(lister, **config).run() if __name__ == '__main__': lister() diff --git a/swh/lister/cran/tasks.py b/swh/lister/cran/tasks.py index f7098a1..3b449de 100644 --- a/swh/lister/cran/tasks.py +++ b/swh/lister/cran/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from swh.lister.cran.lister import CRANLister @app.task(name=__name__ + '.CRANListerTask') -def cran_lister(**lister_args): +def list_cran(**lister_args): + '''Lister task for the CRAN registry''' CRANLister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/debian/tasks.py b/swh/lister/debian/tasks.py index 9f5af90..356b192 100644 --- a/swh/lister/debian/tasks.py +++ b/swh/lister/debian/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from .lister import DebianLister @app.task(name=__name__ + '.DebianListerTask') -def debian_lister(distribution, **lister_args): +def list_debian_distribution(distribution, **lister_args): + '''List a Debian distribution''' DebianLister(**lister_args).run(distribution) @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py index bc3f8c2..c94db27 100644 --- a/swh/lister/github/tasks.py +++ b/swh/lister/github/tasks.py @@ -1,56 +1,57 @@ # Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from swh.lister.github.lister import GitHubLister GROUP_SPLIT = 10000 def new_lister(api_baseurl='https://api.github.com', **kw): return GitHubLister(api_baseurl=api_baseurl, **kw) @app.task(name=__name__ + '.IncrementalGitHubLister') -def incremental_github_lister(**lister_args): +def list_github_incremental(**lister_args): + 'Incremental update of GitHub' lister = new_lister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) @app.task(name=__name__ + '.RangeGitHubLister') -def range_github_lister(start, end, **lister_args): +def _range_github_lister(start, end, **lister_args): lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) @app.task(name=__name__ + '.FullGitHubRelister', bind=True) -def full_github_relister(self, split=None, **lister_args): - """Relist from the beginning of what's already been listed. +def list_github_full(self, split=None, **lister_args): + """Full update of GitHub It's not to be called for an initial listing. """ lister = new_lister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) if not ranges: self.log.info('Nothing to list') return random.shuffle(ranges) - promise = group(range_github_lister.s(minv, maxv, **lister_args) + promise = group(_range_github_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) try: promise.save() # so that we can restore the GroupResult in tests except (NotImplementedError, AttributeError): self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index eff3114..30c4241 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -1,62 +1,59 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group from swh.scheduler.celery_backend.config import app from .. import utils from .lister import GitLabLister NBPAGES = 10 def new_lister(api_baseurl='https://gitlab.com/api/v4', instance=None, sort='asc', per_page=20): return GitLabLister( api_baseurl=api_baseurl, instance=instance, sort=sort, per_page=per_page) @app.task(name=__name__ + '.IncrementalGitLabLister') -def incremental_gitlab_lister(**lister_args): +def list_gitlab_incremental(**lister_args): + """Incremental update of a GitLab instance""" lister_args['sort'] = 'desc' lister = new_lister(**lister_args) total_pages = lister.get_pages_information()[1] # stopping as soon as existing origins for that instance are detected lister.run(min_bound=1, max_bound=total_pages, check_existence=True) @app.task(name=__name__ + '.RangeGitLabLister') -def range_gitlab_lister(start, end, **lister_args): +def _range_gitlab_lister(start, end, **lister_args): lister = new_lister(**lister_args) lister.run(min_bound=start, max_bound=end) @app.task(name=__name__ + '.FullGitLabRelister', bind=True) -def full_gitlab_relister(self, **lister_args): - """Full lister - - This should be renamed as such. - - """ +def list_gitlab_full(self, **lister_args): + """Full update of a GitLab instance""" lister = new_lister(**lister_args) _, total_pages, _ = lister.get_pages_information() ranges = list(utils.split_range(total_pages, NBPAGES)) random.shuffle(ranges) - promise = group(range_gitlab_lister.s(minv, maxv, **lister_args) + promise = group(_range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges)() self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges))) try: promise.save() except (NotImplementedError, AttributeError): self.log.info('Unable to call save_group with current result backend.') return promise.id @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/gnu/tasks.py b/swh/lister/gnu/tasks.py index 251eccf..7191453 100644 --- a/swh/lister/gnu/tasks.py +++ b/swh/lister/gnu/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from .lister import GNULister @app.task(name=__name__ + '.GNUListerTask') -def gnu_lister(**lister_args): +def list_gnu_full(**lister_args): + 'List lister for the GNU source code archive' GNULister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/npm/__init__.py b/swh/lister/npm/__init__.py index 59802d8..77c3d38 100644 --- a/swh/lister/npm/__init__.py +++ b/swh/lister/npm/__init__.py @@ -1,13 +1,20 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information def register(): from .models import NpmVisitModel, NpmModel from .lister import NpmLister return {'models': [NpmVisitModel, NpmModel], 'lister': NpmLister, 'task_modules': ['%s.tasks' % __name__], + 'task_types': { + 'list-npm-full': { + 'default_interval': '7 days', + 'min_interval': '7 days', + 'max_interval': '7 days', + }, + }, } diff --git a/swh/lister/npm/tasks.py b/swh/lister/npm/tasks.py index 26a243b..18c8374 100644 --- a/swh/lister/npm/tasks.py +++ b/swh/lister/npm/tasks.py @@ -1,60 +1,62 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from contextlib import contextmanager from swh.scheduler.celery_backend.config import app from swh.lister.npm.lister import NpmLister, NpmIncrementalLister from swh.lister.npm.models import NpmVisitModel @contextmanager def save_registry_state(lister): params = {'headers': lister.request_headers()} registry_state = lister.session.get(lister.api_baseurl, **params) registry_state = registry_state.json() keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq', 'disk_size', 'data_size', 'committed_update_seq', 'compacted_seq') state = {key: registry_state[key] for key in keys} state['visit_date'] = datetime.now() yield npm_visit = NpmVisitModel(**state) lister.db_session.add(npm_visit) lister.db_session.commit() def get_last_update_seq(lister): """Get latest ``update_seq`` value for listing only updated packages. """ query = lister.db_session.query(NpmVisitModel.update_seq) row = query.order_by(NpmVisitModel.uid.desc()).first() if not row: raise ValueError('No npm registry listing previously performed ! ' 'This is required prior to the execution of an ' 'incremental listing.') return row[0] @app.task(name=__name__ + '.NpmListerTask') -def npm_lister(**lister_args): +def list_npm_full(**lister_args): + 'Full lister for the npm (javascript) registry' lister = NpmLister(**lister_args) with save_registry_state(lister): lister.run() @app.task(name=__name__ + '.NpmIncrementalListerTask') -def npm_incremental_lister(**lister_args): +def list_npm_incremental(**lister_args): + 'Incremental lister for the npm (javascript) registry' lister = NpmIncrementalLister(**lister_args) update_seq_start = get_last_update_seq(lister) with save_registry_state(lister): lister.run(min_bound=update_seq_start) @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/packagist/tasks.py b/swh/lister/packagist/tasks.py index e17e892..6db17dc 100644 --- a/swh/lister/packagist/tasks.py +++ b/swh/lister/packagist/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from .lister import PackagistLister @app.task(name=__name__ + '.PackagistListerTask') -def packagist_lister(**lister_args): +def list_packagist(**lister_args): + 'List the packagist (php) registry' PackagistLister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/phabricator/tasks.py b/swh/lister/phabricator/tasks.py index e465f69..0b8b77d 100644 --- a/swh/lister/phabricator/tasks.py +++ b/swh/lister/phabricator/tasks.py @@ -1,16 +1,17 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from swh.lister.phabricator.lister import PhabricatorLister @app.task(name=__name__ + '.FullPhabricatorLister') -def full_phabricator_lister(**lister_args): +def list_phabricator_full(**lister_args): + 'Full update of a Phabricator instance' PhabricatorLister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/pypi/tasks.py b/swh/lister/pypi/tasks.py index bf210ab..d6206a4 100644 --- a/swh/lister/pypi/tasks.py +++ b/swh/lister/pypi/tasks.py @@ -1,17 +1,18 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.celery_backend.config import app from .lister import PyPILister @app.task(name=__name__ + '.PyPIListerTask') -def pypi_lister(**lister_args): +def list_pypi(**lister_args): + 'Full update of the PyPI (python) registry' PyPILister(**lister_args).run() @app.task(name=__name__ + '.ping') -def ping(): +def _ping(): return 'OK' diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index c99b53b..a526384 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -1,66 +1,141 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import glob import pytest +import traceback +from datetime import timedelta + +import yaml + +from swh.core.utils import numfile_sortkey as sortkey +from swh.scheduler import get_scheduler +from swh.scheduler.tests.conftest import DUMP_FILES from swh.lister.core.lister_base import ListerBase -from swh.lister.cli import get_lister, SUPPORTED_LISTERS +from swh.lister.cli import lister as cli, get_lister, SUPPORTED_LISTERS from .test_utils import init_db +from click.testing import CliRunner + + +@pytest.fixture +def swh_scheduler_config(request, postgresql_proc, postgresql): + scheduler_config = { + 'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format( + host=postgresql_proc.host, + port=postgresql_proc.port, + user='postgres', + dbname='tests') + } + + all_dump_files = sorted(glob.glob(DUMP_FILES), key=sortkey) + + cursor = postgresql.cursor() + for fname in all_dump_files: + with open(fname) as fobj: + cursor.execute(fobj.read()) + postgresql.commit() + + return scheduler_config def test_get_lister_wrong_input(): """Unsupported lister should raise""" with pytest.raises(ValueError) as e: get_lister('unknown', 'db-url') assert "Invalid lister" in str(e.value) def test_get_lister(): """Instantiating a supported lister should be ok """ db_url = init_db().url() for lister_name in SUPPORTED_LISTERS: lst = get_lister(lister_name, db_url) assert isinstance(lst, ListerBase) def test_get_lister_override(): """Overriding the lister configuration should populate its config """ db_url = init_db().url() listers = { 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'), 'phabricator': ( 'api_baseurl', 'https://somewhere.org/api/diffusion.repository.search'), } # check the override ends up defined in the lister for lister_name, (url_key, url_value) in listers.items(): lst = get_lister( lister_name, db_url, **{ url_key: url_value, 'priority': 'high', 'policy': 'oneshot', }) assert getattr(lst, url_key) == url_value assert lst.config['priority'] == 'high' assert lst.config['policy'] == 'oneshot' # check the default urls are used and not the override (since it's not # passed) for lister_name, (url_key, url_value) in listers.items(): lst = get_lister(lister_name, db_url) # no override so this does not end up in lister's configuration assert url_key not in lst.config assert 'priority' not in lst.config assert 'oneshot' not in lst.config + + +def test_task_types(swh_scheduler_config, tmp_path): + db_url = init_db().url() + + configfile = tmp_path / 'config.yml' + configfile.write_text(yaml.dump({'scheduler': { + 'cls': 'local', + 'args': swh_scheduler_config}})) + runner = CliRunner() + result = runner.invoke(cli, [ + '--db-url', db_url, + '--config-file', configfile.as_posix(), + 'register-task-types']) + + assert result.exit_code == 0, traceback.print_exception(*result.exc_info) + + scheduler = get_scheduler(cls='local', args=swh_scheduler_config) + all_tasks = [ + 'list-bitbucket-full', 'list-bitbucket-incremental', + 'list-cran', + 'list-cgit', + 'list-debian-distribution', + 'list-gitlab-full', 'list-gitlab-incremental', + 'list-github-full', 'list-github-incremental', + 'list-gnu-full', + 'list-npm-full', 'list-npm-incremental', + 'list-phabricator-full', + 'list-packagist', + 'list-pypi', + ] + for task in all_tasks: + task_type_desc = scheduler.get_task_type(task) + assert task_type_desc + assert task_type_desc['type'] == task + assert task_type_desc['backoff_factor'] == 1 + + if task == 'list-npm-full': + delay = timedelta(days=7) # overloaded in the plugin registry + elif task.endswith('-full'): + delay = timedelta(days=90) # default value for 'full' lister tasks + else: + delay = timedelta(days=1) # default value for other lister tasks + assert task_type_desc['default_interval'] == delay, task