Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cli.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import logging | import logging | ||||
import pkg_resources | |||||
from copy import deepcopy | |||||
import click | import click | ||||
from sqlalchemy import create_engine | |||||
from swh.core.cli import CONTEXT_SETTINGS | from swh.core.cli import CONTEXT_SETTINGS | ||||
from swh.lister.core.models import initialize | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', | LISTERS = {entry_point.name.split('.', 1)[1]: entry_point | ||||
'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist'] | for entry_point in pkg_resources.iter_entry_points('swh.workers') | ||||
if entry_point.name.split('.', 1)[0] == 'lister'} | |||||
SUPPORTED_LISTERS = list(LISTERS) | |||||
# Base urls for most listers | |||||
DEFAULT_BASEURLS = { | |||||
'gitlab': 'https://gitlab.com/api/v4/', | |||||
'phabricator': 'https://forge.softwareheritage.org', | |||||
} | |||||
def get_lister(lister_name, db_url, drop_tables=False, **conf): | def get_lister(lister_name, db_url=None, **conf): | ||||
"""Instantiate a lister given its name. | """Instantiate a lister given its name. | ||||
Args: | Args: | ||||
lister_name (str): Lister's name | lister_name (str): Lister's name | ||||
db_url (str): Db's service url access | conf (dict): Configuration dict (lister db cnx, policy, priority...) | ||||
conf (dict): Extra configuration (policy, priority for example) | |||||
Returns: | Returns: | ||||
Tuple (instantiated lister, drop_tables function, init schema function, | Tuple (instantiated lister, drop_tables function, init schema function, | ||||
insert minimum data function) | insert minimum data function) | ||||
""" | """ | ||||
override_conf = { | if lister_name not in LISTERS: | ||||
'lister': { | |||||
'cls': 'local', | |||||
'args': {'db': db_url} | |||||
}, | |||||
**conf, | |||||
} | |||||
# To allow api_baseurl override per lister | |||||
if 'api_baseurl' in override_conf: | |||||
api_baseurl = override_conf.pop('api_baseurl') | |||||
else: | |||||
api_baseurl = DEFAULT_BASEURLS.get(lister_name) | |||||
insert_minimum_data_fn = None | |||||
if lister_name == 'github': | |||||
from .github.models import IndexingModelBase as ModelBase | |||||
from .github.lister import GitHubLister | |||||
_lister = GitHubLister(api_baseurl='https://api.github.com', | |||||
override_config=override_conf) | |||||
elif lister_name == 'bitbucket': | |||||
from .bitbucket.models import IndexingModelBase as ModelBase | |||||
from .bitbucket.lister import BitBucketLister | |||||
_lister = BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0', | |||||
override_config=override_conf) | |||||
elif lister_name == 'gitlab': | |||||
from .gitlab.models import ModelBase | |||||
from .gitlab.lister import GitLabLister | |||||
_lister = GitLabLister(api_baseurl=api_baseurl, | |||||
override_config=override_conf) | |||||
elif lister_name == 'debian': | |||||
from .debian.lister import DebianLister | |||||
ModelBase = DebianLister.MODEL # noqa | |||||
_lister = DebianLister(override_config=override_conf) | |||||
def insert_minimum_data_fn(lister_name, lister): | |||||
logger.info('Inserting minimal data for %s', lister_name) | |||||
from swh.storage.schemata.distribution import ( | |||||
Distribution, Area) | |||||
d = Distribution( | |||||
name='Debian', | |||||
type='deb', | |||||
mirror_uri='http://deb.debian.org/debian/') | |||||
lister.db_session.add(d) | |||||
areas = [] | |||||
for distribution_name in ['stretch']: | |||||
for area_name in ['main', 'contrib', 'non-free']: | |||||
areas.append(Area( | |||||
name='%s/%s' % (distribution_name, area_name), | |||||
distribution=d, | |||||
)) | |||||
lister.db_session.add_all(areas) | |||||
lister.db_session.commit() | |||||
elif lister_name == 'pypi': | |||||
from .pypi.models import ModelBase | |||||
from .pypi.lister import PyPILister | |||||
_lister = PyPILister(override_config=override_conf) | |||||
elif lister_name == 'npm': | |||||
from .npm.models import IndexingModelBase as ModelBase | |||||
from .npm.models import NpmVisitModel | |||||
from .npm.lister import NpmLister | |||||
_lister = NpmLister(override_config=override_conf) | |||||
def insert_minimum_data_fn(lister_name, lister): | |||||
logger.info('Inserting minimal data for %s', lister_name) | |||||
if drop_tables: | |||||
NpmVisitModel.metadata.drop_all(lister.db_engine) | |||||
NpmVisitModel.metadata.create_all(lister.db_engine) | |||||
elif lister_name == 'phabricator': | |||||
from .phabricator.models import IndexingModelBase as ModelBase | |||||
from .phabricator.lister import PhabricatorLister | |||||
_lister = PhabricatorLister(api_baseurl=api_baseurl, | |||||
override_config=override_conf) | |||||
elif lister_name == 'gnu': | |||||
from .gnu.models import ModelBase | |||||
from .gnu.lister import GNULister | |||||
_lister = GNULister(override_config=override_conf) | |||||
elif lister_name == 'cran': | |||||
from .cran.models import ModelBase | |||||
from .cran.lister import CRANLister | |||||
_lister = CRANLister(override_config=override_conf) | |||||
elif lister_name == 'cgit': | |||||
from .cgit.models import ModelBase | |||||
from .cgit.lister import CGitLister | |||||
_lister = CGitLister(url=api_baseurl, | |||||
override_config=override_conf) | |||||
elif lister_name == 'packagist': | |||||
from .packagist.models import ModelBase # noqa | |||||
from .packagist.lister import PackagistLister | |||||
_lister = PackagistLister(override_config=override_conf) | |||||
else: | |||||
raise ValueError( | raise ValueError( | ||||
'Invalid lister %s: only supported listers are %s' % | 'Invalid lister %s: only supported listers are %s' % | ||||
(lister_name, SUPPORTED_LISTERS)) | (lister_name, SUPPORTED_LISTERS)) | ||||
if db_url: | |||||
drop_table_fn = None | conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} | ||||
if drop_tables: | # To allow api_baseurl override per lister | ||||
def drop_table_fn(lister_name, lister): | registry_entry = LISTERS[lister_name].load()() | ||||
logger.info('Dropping tables for %s', lister_name) | lister_cls = registry_entry['lister'] | ||||
ModelBase.metadata.drop_all(lister.db_engine) | lister = lister_cls(override_config=conf) | ||||
return lister | |||||
def init_schema_fn(lister_name, lister): | |||||
logger.info('Creating tables for %s', lister_name) | |||||
ModelBase.metadata.create_all(lister.db_engine) | |||||
return _lister, drop_table_fn, init_schema_fn, insert_minimum_data_fn | |||||
@click.group(name='lister', context_settings=CONTEXT_SETTINGS) | @click.group(name='lister', context_settings=CONTEXT_SETTINGS) | ||||
@click.option('--config-file', '-C', default=None, | |||||
type=click.Path(exists=True, dir_okay=False,), | |||||
help="Configuration file.") | |||||
@click.option('--db-url', '-d', default=None, | |||||
help='SQLAlchemy DB URL; see ' | |||||
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
@click.pass_context | @click.pass_context | ||||
def lister(ctx): | def lister(ctx, config_file, db_url): | ||||
'''Software Heritage Lister tools.''' | '''Software Heritage Lister tools.''' | ||||
pass | from swh.core import config | ||||
ctx.ensure_object(dict) | |||||
override_conf = {} | |||||
if db_url: | |||||
override_conf['lister'] = { | |||||
'cls': 'local', | |||||
'args': {'db': db_url} | |||||
} | |||||
conf = config.read(config_file, override_conf) | |||||
ctx.obj['config'] = conf | |||||
ctx.obj['override_conf'] = override_conf | |||||
@lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) | @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) | ||||
@click.option('--db-url', '-d', default='postgres:///lister', | |||||
help='SQLAlchemy DB URL; see ' | |||||
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
@click.argument('listers', required=1, nargs=-1, | |||||
type=click.Choice(SUPPORTED_LISTERS + ['all'])) | |||||
@click.option('--drop-tables', '-D', is_flag=True, default=False, | @click.option('--drop-tables', '-D', is_flag=True, default=False, | ||||
help='Drop tables before creating the database schema') | help='Drop tables before creating the database schema') | ||||
@click.pass_context | @click.pass_context | ||||
def cli(ctx, db_url, listers, drop_tables): | def db_init(ctx, drop_tables): | ||||
"""Initialize the database model for given listers. | """Initialize the database model for given listers. | ||||
""" | """ | ||||
if 'all' in listers: | |||||
listers = SUPPORTED_LISTERS | |||||
for lister_name in listers: | |||||
logger.info('Initializing lister %s', lister_name) | |||||
lister, drop_schema_fn, init_schema_fn, insert_minimum_data_fn = \ | |||||
get_lister(lister_name, db_url, drop_tables=drop_tables) | |||||
if drop_schema_fn: | cfg = ctx.obj['config'] | ||||
drop_schema_fn(lister_name, lister) | lister_cfg = cfg['lister'] | ||||
if lister_cfg['cls'] != 'local': | |||||
vlorentz: this looks a lot like the code to generate `SUPPORTED_LISTERS`. | |||||
Done Inline ActionsI know... Not sure yet if it is a good idea to avoid it. douardda: I know... Not sure yet if it is a good idea to avoid it. | |||||
Not Done Inline Actionsyou could do: LISTERS = {entry_point.name.split('.', 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points('swh.workers') if entry_point.name.split('.', 1)[0] == 'lister'} SUPPORTED_LISTERS = list(LISTERS) vlorentz: you could do:
```
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for… | |||||
Not Done Inline Actionsyeah, i like the @vlorentz's improvment proposal ardumont: yeah, i like the @vlorentz's improvment proposal | |||||
init_schema_fn(lister_name, lister) | click.echo('A local lister configuration is required') | ||||
ctx.exit(1) | |||||
if insert_minimum_data_fn: | |||||
insert_minimum_data_fn(lister_name, lister) | db_url = lister_cfg['args']['db'] | ||||
db_engine = create_engine(db_url) | |||||
for lister, entrypoint in LISTERS.items(): | |||||
logger.info('Loading lister %s', lister) | |||||
registry_entry = entrypoint.load()() | |||||
logger.info('Initializing database') | |||||
initialize(db_engine, drop_tables) | |||||
for lister, entrypoint in LISTERS.items(): | |||||
init_hook = registry_entry.get('init') | |||||
if callable(init_hook): | |||||
logger.info('Calling init hook for %s', lister) | |||||
init_hook(db_engine) | |||||
@lister.command(name='run', context_settings=CONTEXT_SETTINGS, | @lister.command(name='run', context_settings=CONTEXT_SETTINGS, | ||||
help='Trigger a full listing run for a particular forge ' | help='Trigger a full listing run for a particular forge ' | ||||
'instance. The output of this listing results in ' | 'instance. The output of this listing results in ' | ||||
'"oneshot" tasks in the scheduler db with a priority ' | '"oneshot" tasks in the scheduler db with a priority ' | ||||
'defined by the user') | 'defined by the user') | ||||
@click.option('--db-url', '-d', default='postgres:///lister', | |||||
help='SQLAlchemy DB URL; see ' | |||||
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa | |||||
@click.option('--lister', '-l', help='Lister to run', | @click.option('--lister', '-l', help='Lister to run', | ||||
type=click.Choice(SUPPORTED_LISTERS)) | type=click.Choice(SUPPORTED_LISTERS)) | ||||
@click.option('--priority', '-p', default='high', | @click.option('--priority', '-p', default='high', | ||||
type=click.Choice(['high', 'medium', 'low']), | type=click.Choice(['high', 'medium', 'low']), | ||||
help='Task priority for the listed repositories to ingest') | help='Task priority for the listed repositories to ingest') | ||||
@click.argument('options', nargs=-1) | @click.argument('options', nargs=-1) | ||||
@click.pass_context | @click.pass_context | ||||
def run(ctx, db_url, lister, priority, options): | def run(ctx, lister, priority, options): | ||||
from swh.scheduler.cli.utils import parse_options | from swh.scheduler.cli.utils import parse_options | ||||
config = deepcopy(ctx.obj['config']) | |||||
if options: | if options: | ||||
_, kwargs = parse_options(options) | config.update(parse_options(options)[1]) | ||||
else: | |||||
kwargs = {} | config['priority'] = priority | ||||
config['policy'] = 'oneshot' | |||||
override_config = { | |||||
'priority': priority, | |||||
'policy': 'oneshot', | |||||
**kwargs, | |||||
} | |||||
lister, _, _, _ = get_lister(lister, db_url, **override_config) | get_lister(lister, **config).run() | ||||
lister.run() | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
cli() | lister() |
this looks a lot like the code to generate SUPPORTED_LISTERS.