diff --git a/swh/lister/cli.py b/swh/lister/cli.py index f5c2a1b..2445140 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -1,237 +1,237 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import click from swh.core.cli import CONTEXT_SETTINGS logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist'] # Base urls for most listers DEFAULT_BASEURLS = { 'gitlab': 'https://gitlab.com/api/v4/', 'phabricator': 'https://forge.softwareheritage.org', 'cgit': ( 'http://git.savannah.gnu.org/cgit/', 'http://git.savannah.gnu.org/git/' ), } -def new_lister(lister_name, db_url, drop_tables=False, **conf): +def get_lister(lister_name, db_url, drop_tables=False, **conf): """Instantiate a lister given its name. Args: lister_name (str): Lister's name db_url (str): Db's service url access conf (dict): Extra configuration (policy, priority for example) Returns: Tuple (instantiated lister, drop_tables function, init schema function, insert minimum data function) """ override_conf = { 'lister': { 'cls': 'local', 'args': {'db': db_url} }, **conf, } # To allow api_baseurl override per lister if 'api_baseurl' in override_conf: api_baseurl = override_conf.pop('api_baseurl') else: api_baseurl = DEFAULT_BASEURLS.get(lister_name) insert_minimum_data_fn = None if lister_name == 'github': from .github.models import IndexingModelBase as ModelBase from .github.lister import GitHubLister _lister = GitHubLister(api_baseurl='https://api.github.com', override_config=override_conf) elif lister_name == 'bitbucket': from .bitbucket.models import IndexingModelBase as ModelBase from .bitbucket.lister import BitBucketLister _lister = BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0', override_config=override_conf) elif lister_name == 'gitlab': from .gitlab.models import ModelBase from .gitlab.lister import GitLabLister _lister = GitLabLister(api_baseurl=api_baseurl, override_config=override_conf) elif lister_name == 'debian': from .debian.lister import DebianLister ModelBase = DebianLister.MODEL # noqa _lister = DebianLister(override_config=override_conf) def insert_minimum_data_fn(lister_name, lister): logger.info('Inserting minimal data for %s', lister_name) from swh.storage.schemata.distribution import ( Distribution, Area) d = Distribution( name='Debian', type='deb', mirror_uri='http://deb.debian.org/debian/') lister.db_session.add(d) areas = [] for distribution_name in ['stretch']: for area_name in ['main', 'contrib', 'non-free']: areas.append(Area( name='%s/%s' % (distribution_name, area_name), distribution=d, )) lister.db_session.add_all(areas) lister.db_session.commit() elif lister_name == 'pypi': from .pypi.models import ModelBase from .pypi.lister import PyPILister _lister = PyPILister(override_config=override_conf) elif lister_name == 'npm': from .npm.models import IndexingModelBase as ModelBase from .npm.models import NpmVisitModel from .npm.lister import NpmLister _lister = NpmLister(override_config=override_conf) def insert_minimum_data_fn(lister_name, lister): logger.info('Inserting minimal data for %s', lister_name) if drop_tables: NpmVisitModel.metadata.drop_all(lister.db_engine) NpmVisitModel.metadata.create_all(lister.db_engine) elif lister_name == 'phabricator': from .phabricator.models import IndexingModelBase as ModelBase from .phabricator.lister import PhabricatorLister _lister = PhabricatorLister(forge_url=api_baseurl, override_config=override_conf) elif lister_name == 'gnu': from .gnu.models import ModelBase from .gnu.lister import GNULister _lister = GNULister(override_config=override_conf) elif lister_name == 'cran': from .cran.models import ModelBase from .cran.lister import CRANLister _lister = CRANLister(override_config=override_conf) elif lister_name == 'cgit': from .cgit.models import ModelBase from .cgit.lister import CGitLister if isinstance(api_baseurl, str): _lister = CGitLister(url=api_baseurl, override_config=override_conf) else: # tuple _lister = CGitLister(url=api_baseurl[0], url_prefix=api_baseurl[1], override_config=override_conf) elif lister_name == 'packagist': from .packagist.models import ModelBase # noqa from .packagist.lister import PackagistLister _lister = PackagistLister(override_config=override_conf) else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % (lister_name, SUPPORTED_LISTERS)) drop_table_fn = None if drop_tables: def drop_table_fn(lister_name, lister): logger.info('Dropping tables for %s', lister_name) ModelBase.metadata.drop_all(lister.db_engine) def init_schema_fn(lister_name, lister): logger.info('Creating tables for %s', lister_name) ModelBase.metadata.create_all(lister.db_engine) return _lister, drop_table_fn, init_schema_fn, insert_minimum_data_fn @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @click.pass_context def lister(ctx): '''Software Heritage Lister tools.''' pass @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) @click.option('--db-url', '-d', default='postgres:///lister', help='SQLAlchemy DB URL; see ' '') # noqa @click.argument('listers', required=1, nargs=-1, type=click.Choice(SUPPORTED_LISTERS + ['all'])) @click.option('--drop-tables', '-D', is_flag=True, default=False, help='Drop tables before creating the database schema') @click.pass_context def cli(ctx, db_url, listers, drop_tables): """Initialize the database model for given listers. """ if 'all' in listers: listers = SUPPORTED_LISTERS for lister_name in listers: logger.info('Initializing lister %s', lister_name) lister, drop_schema_fn, init_schema_fn, insert_minimum_data_fn = \ - new_lister(lister_name, db_url, drop_tables=drop_tables) + get_lister(lister_name, db_url, drop_tables=drop_tables) if drop_schema_fn: drop_schema_fn(lister_name, lister) init_schema_fn(lister_name, lister) if insert_minimum_data_fn: insert_minimum_data_fn(lister_name, lister) @lister.command(name='run', context_settings=CONTEXT_SETTINGS, help='Trigger a full listing run for a particular forge ' 'instance. The output of this listing results in ' '"oneshot" tasks in the scheduler db with a priority ' 'defined by the user') @click.option('--db-url', '-d', default='postgres:///lister', help='SQLAlchemy DB URL; see ' '') # noqa @click.option('--lister', '-l', help='Lister to run', type=click.Choice(SUPPORTED_LISTERS)) @click.option('--priority', '-p', default='high', type=click.Choice(['high', 'medium', 'low']), help='Task priority for the listed repositories to ingest') @click.argument('options', nargs=-1) @click.pass_context def run(ctx, db_url, lister, priority, options): from swh.scheduler.cli.utils import parse_options if options: _, kwargs = parse_options(options) else: kwargs = {} override_config = { 'priority': priority, 'policy': 'oneshot', **kwargs, } - lister, _, _, _ = new_lister(lister, db_url, **override_config) + lister, _, _, _ = get_lister(lister, db_url, **override_config) lister.run() if __name__ == '__main__': cli() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index d0b9c50..57ea7a3 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -1,95 +1,95 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.lister.core.lister_base import ListerBase -from swh.lister.cli import new_lister, SUPPORTED_LISTERS, DEFAULT_BASEURLS +from swh.lister.cli import get_lister, SUPPORTED_LISTERS, DEFAULT_BASEURLS from .test_utils import init_db -def test_new_lister_wrong_input(): +def test_get_lister_wrong_input(): """Unsupported lister should raise""" with pytest.raises(ValueError) as e: - new_lister('unknown', 'db-url') + get_lister('unknown', 'db-url') assert "Invalid lister" in str(e.value) -def test_new_lister(): +def test_get_lister(): """Instantiating a supported lister should be ok """ db_url = init_db().url() supported_listers_with_init = {'npm', 'debian'} supported_listers = set(SUPPORTED_LISTERS) - supported_listers_with_init for lister_name in supported_listers: - lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + lst, drop_fn, init_fn, insert_data_fn = get_lister(lister_name, db_url) assert isinstance(lst, ListerBase) assert drop_fn is None assert init_fn is not None assert insert_data_fn is None for lister_name in supported_listers_with_init: - lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + lst, drop_fn, init_fn, insert_data_fn = get_lister(lister_name, db_url) assert isinstance(lst, ListerBase) assert drop_fn is None assert init_fn is not None assert insert_data_fn is not None for lister_name in supported_listers_with_init: - lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url, + lst, drop_fn, init_fn, insert_data_fn = get_lister(lister_name, db_url, drop_tables=True) assert isinstance(lst, ListerBase) assert drop_fn is not None assert init_fn is not None assert insert_data_fn is not None -def test_new_lister_override(): +def test_get_lister_override(): """Overriding the lister configuration should populate its config """ db_url = init_db().url() listers = { 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'), 'phabricator': ('forge_url', 'https://somewhere.org'), 'cgit': ('url_prefix', 'https://some-cgit.eu/'), } # check the override ends up defined in the lister for lister_name, (url_key, url_value) in listers.items(): - lst, drop_fn, init_fn, insert_data_fn = new_lister( + lst, drop_fn, init_fn, insert_data_fn = get_lister( lister_name, db_url, **{ 'api_baseurl': url_value, 'priority': 'high', 'policy': 'oneshot', }) assert getattr(lst, url_key) == url_value assert lst.config['priority'] == 'high' assert lst.config['policy'] == 'oneshot' # check the default urls are used and not the override (since it's not # passed) for lister_name, (url_key, url_value) in listers.items(): - lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + lst, drop_fn, init_fn, insert_data_fn = get_lister(lister_name, db_url) # no override so this does not end up in lister's configuration assert url_key not in lst.config # then the default base url is used default_url = DEFAULT_BASEURLS[lister_name] if isinstance(default_url, tuple): # cgit implementation detail... default_url = default_url[1] assert getattr(lst, url_key) == default_url assert 'priority' not in lst.config assert 'oneshot' not in lst.config