Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cli.py
| # Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
| # See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
| # License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
| # See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
| import logging | import logging | ||||
| import pkg_resources | |||||
| from copy import deepcopy | |||||
| import click | import click | ||||
| from sqlalchemy import create_engine | |||||
| from swh.core.cli import CONTEXT_SETTINGS | from swh.core.cli import CONTEXT_SETTINGS | ||||
| from swh.lister.core.models import initialize | |||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
| SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', | LISTERS = {entry_point.name.split('.', 1)[1]: entry_point | ||||
| 'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist'] | for entry_point in pkg_resources.iter_entry_points('swh.workers') | ||||
| if entry_point.name.split('.', 1)[0] == 'lister'} | |||||
| SUPPORTED_LISTERS = list(LISTERS) | |||||
| # Base urls for most listers | |||||
| DEFAULT_BASEURLS = { | |||||
| 'gitlab': 'https://gitlab.com/api/v4/', | |||||
| 'phabricator': 'https://forge.softwareheritage.org', | |||||
| } | |||||
| def get_lister(lister_name, db_url, drop_tables=False, **conf): | def get_lister(lister_name, db_url=None, **conf): | ||||
| """Instantiate a lister given its name. | """Instantiate a lister given its name. | ||||
| Args: | Args: | ||||
| lister_name (str): Lister's name | lister_name (str): Lister's name | ||||
| db_url (str): Db's service url access | conf (dict): Configuration dict (lister db cnx, policy, priority...) | ||||
| conf (dict): Extra configuration (policy, priority for example) | |||||
| Returns: | Returns: | ||||
| Tuple (instantiated lister, drop_tables function, init schema function, | Tuple (instantiated lister, drop_tables function, init schema function, | ||||
| insert minimum data function) | insert minimum data function) | ||||
| """ | """ | ||||
| override_conf = { | if lister_name not in LISTERS: | ||||
| 'lister': { | |||||
| 'cls': 'local', | |||||
| 'args': {'db': db_url} | |||||
| }, | |||||
| **conf, | |||||
| } | |||||
| # To allow api_baseurl override per lister | |||||
| if 'api_baseurl' in override_conf: | |||||
| api_baseurl = override_conf.pop('api_baseurl') | |||||
| else: | |||||
| api_baseurl = DEFAULT_BASEURLS.get(lister_name) | |||||
| insert_minimum_data_fn = None | |||||
| if lister_name == 'github': | |||||
| from .github.models import IndexingModelBase as ModelBase | |||||
| from .github.lister import GitHubLister | |||||
| _lister = GitHubLister(api_baseurl='https://api.github.com', | |||||
| override_config=override_conf) | |||||
| elif lister_name == 'bitbucket': | |||||
| from .bitbucket.models import IndexingModelBase as ModelBase | |||||
| from .bitbucket.lister import BitBucketLister | |||||
| _lister = BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0', | |||||
| override_config=override_conf) | |||||
| elif lister_name == 'gitlab': | |||||
| from .gitlab.models import ModelBase | |||||
| from .gitlab.lister import GitLabLister | |||||
| _lister = GitLabLister(api_baseurl=api_baseurl, | |||||
| override_config=override_conf) | |||||
| elif lister_name == 'debian': | |||||
| from .debian.lister import DebianLister | |||||
| ModelBase = DebianLister.MODEL # noqa | |||||
| _lister = DebianLister(override_config=override_conf) | |||||
| def insert_minimum_data_fn(lister_name, lister): | |||||
| logger.info('Inserting minimal data for %s', lister_name) | |||||
| from swh.storage.schemata.distribution import ( | |||||
| Distribution, Area) | |||||
| d = Distribution( | |||||
| name='Debian', | |||||
| type='deb', | |||||
| mirror_uri='http://deb.debian.org/debian/') | |||||
| lister.db_session.add(d) | |||||
| areas = [] | |||||
| for distribution_name in ['stretch']: | |||||
| for area_name in ['main', 'contrib', 'non-free']: | |||||
| areas.append(Area( | |||||
| name='%s/%s' % (distribution_name, area_name), | |||||
| distribution=d, | |||||
| )) | |||||
| lister.db_session.add_all(areas) | |||||
| lister.db_session.commit() | |||||
| elif lister_name == 'pypi': | |||||
| from .pypi.models import ModelBase | |||||
| from .pypi.lister import PyPILister | |||||
| _lister = PyPILister(override_config=override_conf) | |||||
| elif lister_name == 'npm': | |||||
| from .npm.models import IndexingModelBase as ModelBase | |||||
| from .npm.models import NpmVisitModel | |||||
| from .npm.lister import NpmLister | |||||
| _lister = NpmLister(override_config=override_conf) | |||||
| def insert_minimum_data_fn(lister_name, lister): | |||||
| logger.info('Inserting minimal data for %s', lister_name) | |||||
| if drop_tables: | |||||
| NpmVisitModel.metadata.drop_all(lister.db_engine) | |||||
| NpmVisitModel.metadata.create_all(lister.db_engine) | |||||
| elif lister_name == 'phabricator': | |||||
| from .phabricator.models import IndexingModelBase as ModelBase | |||||
| from .phabricator.lister import PhabricatorLister | |||||
| _lister = PhabricatorLister(api_baseurl=api_baseurl, | |||||
| override_config=override_conf) | |||||
| elif lister_name == 'gnu': | |||||
| from .gnu.models import ModelBase | |||||
| from .gnu.lister import GNULister | |||||
| _lister = GNULister(override_config=override_conf) | |||||
| elif lister_name == 'cran': | |||||
| from .cran.models import ModelBase | |||||
| from .cran.lister import CRANLister | |||||
| _lister = CRANLister(override_config=override_conf) | |||||
| elif lister_name == 'cgit': | |||||
| from .cgit.models import ModelBase | |||||
| from .cgit.lister import CGitLister | |||||
| _lister = CGitLister(url=api_baseurl, | |||||
| override_config=override_conf) | |||||
| elif lister_name == 'packagist': | |||||
| from .packagist.models import ModelBase # noqa | |||||
| from .packagist.lister import PackagistLister | |||||
| _lister = PackagistLister(override_config=override_conf) | |||||
| else: | |||||
| raise ValueError( | raise ValueError( | ||||
| 'Invalid lister %s: only supported listers are %s' % | 'Invalid lister %s: only supported listers are %s' % | ||||
| (lister_name, SUPPORTED_LISTERS)) | (lister_name, SUPPORTED_LISTERS)) | ||||
| if db_url: | |||||
| drop_table_fn = None | conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} | ||||
| if drop_tables: | # To allow api_baseurl override per lister | ||||
| def drop_table_fn(lister_name, lister): | registry_entry = LISTERS[lister_name].load()() | ||||
| logger.info('Dropping tables for %s', lister_name) | lister_cls = registry_entry['lister'] | ||||
| ModelBase.metadata.drop_all(lister.db_engine) | lister = lister_cls(override_config=conf) | ||||
| return lister | |||||
| def init_schema_fn(lister_name, lister): | |||||
| logger.info('Creating tables for %s', lister_name) | |||||
| ModelBase.metadata.create_all(lister.db_engine) | |||||
| return _lister, drop_table_fn, init_schema_fn, insert_minimum_data_fn | |||||
| @click.group(name='lister', context_settings=CONTEXT_SETTINGS) | @click.group(name='lister', context_settings=CONTEXT_SETTINGS) | ||||
| @click.option('--config-file', '-C', default=None, | |||||
| type=click.Path(exists=True, dir_okay=False,), | |||||
| help="Configuration file.") | |||||
| @click.option('--db-url', '-d', default=None, | |||||
| help='SQLAlchemy DB URL; see ' | |||||
| '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
| @click.pass_context | @click.pass_context | ||||
| def lister(ctx): | def lister(ctx, config_file, db_url): | ||||
| '''Software Heritage Lister tools.''' | '''Software Heritage Lister tools.''' | ||||
| pass | from swh.core import config | ||||
| ctx.ensure_object(dict) | |||||
| override_conf = {} | |||||
| if db_url: | |||||
| override_conf['lister'] = { | |||||
| 'cls': 'local', | |||||
| 'args': {'db': db_url} | |||||
| } | |||||
| conf = config.read(config_file, override_conf) | |||||
| ctx.obj['config'] = conf | |||||
| ctx.obj['override_conf'] = override_conf | |||||
| @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) | @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) | ||||
| @click.option('--db-url', '-d', default='postgres:///lister', | |||||
| help='SQLAlchemy DB URL; see ' | |||||
| '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
| @click.argument('listers', required=1, nargs=-1, | |||||
| type=click.Choice(SUPPORTED_LISTERS + ['all'])) | |||||
| @click.option('--drop-tables', '-D', is_flag=True, default=False, | @click.option('--drop-tables', '-D', is_flag=True, default=False, | ||||
| help='Drop tables before creating the database schema') | help='Drop tables before creating the database schema') | ||||
| @click.pass_context | @click.pass_context | ||||
| def cli(ctx, db_url, listers, drop_tables): | def db_init(ctx, drop_tables): | ||||
| """Initialize the database model for given listers. | """Initialize the database model for given listers. | ||||
| """ | """ | ||||
| if 'all' in listers: | |||||
| listers = SUPPORTED_LISTERS | |||||
| for lister_name in listers: | |||||
| logger.info('Initializing lister %s', lister_name) | |||||
| lister, drop_schema_fn, init_schema_fn, insert_minimum_data_fn = \ | |||||
| get_lister(lister_name, db_url, drop_tables=drop_tables) | |||||
| if drop_schema_fn: | cfg = ctx.obj['config'] | ||||
| drop_schema_fn(lister_name, lister) | lister_cfg = cfg['lister'] | ||||
| if lister_cfg['cls'] != 'local': | |||||
vlorentz: this looks a lot like the code to generate `SUPPORTED_LISTERS`. | |||||
Done Inline ActionsI know... Not sure yet if it is a good idea to avoid it. douardda: I know... Not sure yet if it is a good idea to avoid it. | |||||
Not Done Inline Actionsyou could do: LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for entry_point
in pkg_resources.iter_entry_points('swh.workers')
if entry_point.name.split('.', 1)[0] == 'lister'}
SUPPORTED_LISTERS = list(LISTERS)vlorentz: you could do:
```
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for… | |||||
Not Done Inline Actionsyeah, i like the @vlorentz's improvment proposal ardumont: yeah, i like the @vlorentz's improvment proposal | |||||
| init_schema_fn(lister_name, lister) | click.echo('A local lister configuration is required') | ||||
| ctx.exit(1) | |||||
| if insert_minimum_data_fn: | |||||
| insert_minimum_data_fn(lister_name, lister) | db_url = lister_cfg['args']['db'] | ||||
| db_engine = create_engine(db_url) | |||||
| for lister, entrypoint in LISTERS.items(): | |||||
| logger.info('Loading lister %s', lister) | |||||
| registry_entry = entrypoint.load()() | |||||
| logger.info('Initializing database') | |||||
| initialize(db_engine, drop_tables) | |||||
| for lister, entrypoint in LISTERS.items(): | |||||
| init_hook = registry_entry.get('init') | |||||
| if callable(init_hook): | |||||
| logger.info('Calling init hook for %s', lister) | |||||
| init_hook(db_engine) | |||||
| @lister.command(name='run', context_settings=CONTEXT_SETTINGS, | @lister.command(name='run', context_settings=CONTEXT_SETTINGS, | ||||
| help='Trigger a full listing run for a particular forge ' | help='Trigger a full listing run for a particular forge ' | ||||
| 'instance. The output of this listing results in ' | 'instance. The output of this listing results in ' | ||||
| '"oneshot" tasks in the scheduler db with a priority ' | '"oneshot" tasks in the scheduler db with a priority ' | ||||
| 'defined by the user') | 'defined by the user') | ||||
| @click.option('--db-url', '-d', default='postgres:///lister', | |||||
| help='SQLAlchemy DB URL; see ' | |||||
| '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
| @click.option('--lister', '-l', help='Lister to run', | @click.option('--lister', '-l', help='Lister to run', | ||||
| type=click.Choice(SUPPORTED_LISTERS)) | type=click.Choice(SUPPORTED_LISTERS)) | ||||
| @click.option('--priority', '-p', default='high', | @click.option('--priority', '-p', default='high', | ||||
| type=click.Choice(['high', 'medium', 'low']), | type=click.Choice(['high', 'medium', 'low']), | ||||
| help='Task priority for the listed repositories to ingest') | help='Task priority for the listed repositories to ingest') | ||||
| @click.argument('options', nargs=-1) | @click.argument('options', nargs=-1) | ||||
| @click.pass_context | @click.pass_context | ||||
| def run(ctx, db_url, lister, priority, options): | def run(ctx, lister, priority, options): | ||||
| from swh.scheduler.cli.utils import parse_options | from swh.scheduler.cli.utils import parse_options | ||||
| config = deepcopy(ctx.obj['config']) | |||||
| if options: | if options: | ||||
| _, kwargs = parse_options(options) | config.update(parse_options(options)[1]) | ||||
| else: | |||||
| kwargs = {} | config['priority'] = priority | ||||
| config['policy'] = 'oneshot' | |||||
| override_config = { | |||||
| 'priority': priority, | |||||
| 'policy': 'oneshot', | |||||
| **kwargs, | |||||
| } | |||||
| lister, _, _, _ = get_lister(lister, db_url, **override_config) | get_lister(lister, **config).run() | ||||
| lister.run() | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| cli() | lister() | ||||
this looks a lot like the code to generate SUPPORTED_LISTERS.