diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ swh.core -swh.storage >= 0.0.103 -swh.storage[schemata] >= 0.0.76 +swh.storage >= 0.0.122 +swh.storage[schemata] swh.scheduler >= 0.0.39 diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -51,6 +51,10 @@ extras_require={'testing': parse_requirements('test')}, vcversioner={'version_module_paths': ['swh/lister/_version.py']}, include_package_data=True, + entry_points=''' + [console_scripts] + swh-lister=swh.lister.cli:cli + ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -3,9 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import logging import click +logger = logging.getLogger(__name__) + SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm'] @@ -14,89 +17,105 @@ '--db-url', '-d', default='postgres:///lister-gitlab.com', help='SQLAlchemy DB URL; see ' '') # noqa -@click.option('--lister', required=1, - type=click.Choice(SUPPORTED_LISTERS), - help='Lister to act upon') -@click.option('--create-tables', is_flag=True, default=False, - help='create tables') -@click.option('--drop-tables', is_flag=True, default=False, - help='Drop tables') -@click.option('--with-data', is_flag=True, default=False, - help='Insert minimum required data') -def cli(db_url, lister, create_tables, drop_tables, with_data): +@click.argument('listers', required=1, nargs=-1, + type=click.Choice(SUPPORTED_LISTERS + ['all'])) +@click.option('--drop-tables', '-D', is_flag=True, default=False, + help='Drop tables before creating the database schema') +def cli(db_url, listers, drop_tables): """Initialize db model according to lister. """ - override_conf = {'lister_db_url': db_url} + override_conf = { + 'lister_db_url': db_url, + 'lister': { + 'cls': 'local', + 'args': {'db': db_url} + } + } insert_minimum_data = None - - if lister == 'github': - from .github.models import IndexingModelBase as ModelBase - from .github.lister import GitHubLister - - _lister = GitHubLister(api_baseurl='https://api.github.com', - override_config=override_conf) - elif lister == 'bitbucket': - from .bitbucket.models import IndexingModelBase as ModelBase - from .bitbucket.lister import BitBucketLister - _lister = BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0', - override_config=override_conf) - - elif lister == 'gitlab': - from .gitlab.models import ModelBase - from .gitlab.lister import GitLabLister - _lister = GitLabLister(api_baseurl='https://gitlab.com/api/v4/', - override_config=override_conf) - elif lister == 'debian': - from .debian.lister import DebianLister - ModelBase = DebianLister.MODEL # noqa - _lister = DebianLister(override_config=override_conf) - - def insert_minimum_data(lister): - from swh.storage.schemata.distribution import Distribution, Area - d = Distribution( - name='Debian', - type='deb', - mirror_uri='http://deb.debian.org/debian/') - lister.db_session.add(d) - - areas = [] - for distribution_name in ['stretch']: - for area_name in ['main', 'contrib', 'non-free']: - areas.append(Area( - name='%s/%s' % (distribution_name, area_name), - distribution=d, - )) - lister.db_session.add_all(areas) - lister.db_session.commit() - - elif lister == 'pypi': - from .pypi.models import ModelBase - from .pypi.lister import PyPILister - _lister = PyPILister(override_config=override_conf) - - elif lister == 'npm': - from .npm.models import IndexingModelBase as ModelBase - from .npm.models import NpmVisitModel - from .npm.lister import NpmLister - _lister = NpmLister(override_config=override_conf) - if drop_tables: - NpmVisitModel.metadata.drop_all(_lister.db_engine) - if create_tables: + if 'all' in listers: + listers = SUPPORTED_LISTERS + + for lister in listers: + logger.info('Initializing lister %s', lister) + if lister == 'github': + from .github.models import IndexingModelBase as ModelBase + from .github.lister import GitHubLister + + _lister = GitHubLister( + api_baseurl='https://api.github.com', + override_config=override_conf) + elif lister == 'bitbucket': + from .bitbucket.models import IndexingModelBase as ModelBase + from .bitbucket.lister import BitBucketLister + _lister = BitBucketLister( + api_baseurl='https://api.bitbucket.org/2.0', + override_config=override_conf) + + elif lister == 'gitlab': + from .gitlab.models import ModelBase + from .gitlab.lister import GitLabLister + _lister = GitLabLister( + api_baseurl='https://gitlab.com/api/v4/', + override_config=override_conf) + elif lister == 'debian': + from .debian.lister import DebianLister + ModelBase = DebianLister.MODEL # noqa + _lister = DebianLister(override_config=override_conf) + + def insert_minimum_data(lister): + from swh.storage.schemata.distribution import ( + Distribution, Area) + d = Distribution( + name='Debian', + type='deb', + mirror_uri='http://deb.debian.org/debian/') + lister.db_session.add(d) + + areas = [] + for distribution_name in ['stretch']: + for area_name in ['main', 'contrib', 'non-free']: + areas.append(Area( + name='%s/%s' % (distribution_name, area_name), + distribution=d, + )) + lister.db_session.add_all(areas) + lister.db_session.commit() + + elif lister == 'pypi': + from .pypi.models import ModelBase + from .pypi.lister import PyPILister + _lister = PyPILister(override_config=override_conf) + + elif lister == 'npm': + from .npm.models import IndexingModelBase as ModelBase + from .npm.models import NpmVisitModel + from .npm.lister import NpmLister + _lister = NpmLister(override_config=override_conf) + if drop_tables: + NpmVisitModel.metadata.drop_all(_lister.db_engine) NpmVisitModel.metadata.create_all(_lister.db_engine) - else: - raise ValueError('Only supported listers are %s' % SUPPORTED_LISTERS) + else: + raise ValueError( + 'Invalid lister %s: only supported listers are %s' % + (lister, SUPPORTED_LISTERS)) - if drop_tables: - ModelBase.metadata.drop_all(_lister.db_engine) + if drop_tables: + logger.info('Dropping tables for %s', lister) + ModelBase.metadata.drop_all(_lister.db_engine) - if create_tables: + logger.info('Creating tables for %s', lister) ModelBase.metadata.create_all(_lister.db_engine) - if with_data and insert_minimum_data: - insert_minimum_data(_lister) + if insert_minimum_data: + logger.info('Inserting minimal data for %s', lister) + try: + insert_minimum_data(_lister) + except Exception: + logger.warning( + 'Failed to insert minumum data in %s', lister) if __name__ == '__main__': diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -223,7 +223,13 @@ 'args': { 'url': 'http://localhost:5008/' }, - }) + }), + 'lister': ('dict', { + 'cls': 'local', + 'args': { + 'db': 'postgresql:///lister', + }, + }), } @property @@ -233,8 +239,6 @@ @property def ADDITIONAL_CONFIG(self): # noqa: N802 return { - 'lister_db_url': - ('str', 'postgresql:///lister-%s' % self.LISTER_NAME), 'credentials': ('list[dict]', []), 'cache_responses': @@ -264,7 +268,7 @@ logger.debug('%s CONFIG=%s' % (self, self.config)) self.storage = get_storage(**self.config['storage']) self.scheduler = get_scheduler(**self.config['scheduler']) - self.db_engine = create_engine(self.config['lister_db_url']) + self.db_engine = create_engine(self.config['lister']['args']['db']) self.mk_session = sessionmaker(bind=self.db_engine) self.db_session = self.mk_session()