diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -56,6 +56,14 @@ swh-lister=swh.lister.cli:cli [swh.cli.subcommands] lister=swh.lister.cli:lister + [swh.workers] + lister.bitbucket=swh.lister.bitbucket:register + lister.debian=swh.lister.debian:register + lister.github=swh.lister.github:register + lister.gitlab=swh.lister.gitlab:register + lister.npm=swh.lister.npm:register + lister.phabricator=swh.lister.phabricator:register + lister.pypi=swh.lister.pypi:register ''', classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/lister/bitbucket/__init__.py b/swh/lister/bitbucket/__init__.py --- a/swh/lister/bitbucket/__init__.py +++ b/swh/lister/bitbucket/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import BitBucketModel as Model + from .lister import BitBucketLister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)} diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -4,15 +4,22 @@ # See top-level LICENSE file for more information import logging +import pkg_resources + import click +from sqlalchemy import create_engine + +from swh.core.config import SWHConfig from swh.core.cli import CONTEXT_SETTINGS logger = logging.getLogger(__name__) -SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator'] +SUPPORTED_LISTERS = [entry_point.name.split('.', 1)[-1] + for entry_point + in pkg_resources.iter_entry_points('swh.workers') + if entry_point.name.split('.', 1)[0] == 'lister'] @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @@ -24,7 +31,7 @@ @lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) @click.option( - '--db-url', '-d', default='postgres:///lister-gitlab.com', + '--db-url', '-d', default=None, help='SQLAlchemy DB URL; see ' '') # noqa @click.argument('listers', required=1, nargs=-1, @@ -36,104 +43,33 @@ """Initialize the database model for given listers. """ - override_conf = { - 'lister': { + override_conf = {} + if db_url: + override_conf['lister'] = { 'cls': 'local', 'args': {'db': db_url} } - } + cfg = SWHConfig.parse_config_file(additional_configs=[override_conf]) + lister_cfg = cfg['lister'] + if lister_cfg['cls'] != 'local': + click.echo('A local lister configuration is required') + ctx.exit(1) + + db_url = lister_cfg['args']['db'] + db_engine = create_engine(db_url) if 'all' in listers: listers = SUPPORTED_LISTERS + plugins = {entry_point.name.split('.', 1)[1]: entry_point + for entry_point + in pkg_resources.iter_entry_points('swh.workers') + if entry_point.name.split('.', 1)[0] == 'lister'} for lister in listers: logger.info('Initializing lister %s', lister) - insert_minimum_data = None - if lister == 'github': - from .github.models import IndexingModelBase as ModelBase - from .github.lister import GitHubLister - - _lister = GitHubLister( - api_baseurl='https://api.github.com', - override_config=override_conf) - elif lister == 'bitbucket': - from .bitbucket.models import IndexingModelBase as ModelBase - from .bitbucket.lister import BitBucketLister - _lister = BitBucketLister( - api_baseurl='https://api.bitbucket.org/2.0', - override_config=override_conf) - - elif lister == 'gitlab': - from .gitlab.models import ModelBase - from .gitlab.lister import GitLabLister - _lister = GitLabLister( - api_baseurl='https://gitlab.com/api/v4/', - override_config=override_conf) - elif lister == 'debian': - from .debian.lister import DebianLister - ModelBase = DebianLister.MODEL # noqa - _lister = DebianLister(override_config=override_conf) - - def insert_minimum_data(lister): - from swh.storage.schemata.distribution import ( - Distribution, Area) - d = Distribution( - name='Debian', - type='deb', - mirror_uri='http://deb.debian.org/debian/') - lister.db_session.add(d) - - areas = [] - for distribution_name in ['stretch']: - for area_name in ['main', 'contrib', 'non-free']: - areas.append(Area( - name='%s/%s' % (distribution_name, area_name), - distribution=d, - )) - lister.db_session.add_all(areas) - lister.db_session.commit() - - elif lister == 'pypi': - from .pypi.models import ModelBase - from .pypi.lister import PyPILister - _lister = PyPILister(override_config=override_conf) - - elif lister == 'npm': - from .npm.models import IndexingModelBase as ModelBase - from .npm.models import NpmVisitModel - from .npm.lister import NpmLister - _lister = NpmLister(override_config=override_conf) - if drop_tables: - NpmVisitModel.metadata.drop_all(_lister.db_engine) - NpmVisitModel.metadata.create_all(_lister.db_engine) - - elif lister == 'phabricator': - from .phabricator.models import IndexingModelBase as ModelBase - from .phabricator.lister import PhabricatorLister - _lister = PhabricatorLister( - forge_url='https://forge.softwareheritage.org', - api_token='', - override_config=override_conf) - - else: - raise ValueError( - 'Invalid lister %s: only supported listers are %s' % - (lister, SUPPORTED_LISTERS)) - - if drop_tables: - logger.info('Dropping tables for %s', lister) - ModelBase.metadata.drop_all(_lister.db_engine) - - logger.info('Creating tables for %s', lister) - ModelBase.metadata.create_all(_lister.db_engine) - - if insert_minimum_data: - logger.info('Inserting minimal data for %s', lister) - try: - insert_minimum_data(_lister) - except Exception: - logger.warning( - 'Failed to insert minimum data in %s', lister) + + registry_entry = plugins[lister].load()() + registry_entry['init'](db_engine, drop_tables) if __name__ == '__main__': diff --git a/swh/lister/core/models.py b/swh/lister/core/models.py --- a/swh/lister/core/models.py +++ b/swh/lister/core/models.py @@ -4,12 +4,15 @@ import abc from datetime import datetime +import logging from sqlalchemy import Column, DateTime, Integer, String from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta from .abstractattribute import AbstractAttribute +logger = logging.getLogger(__name__) + SQLBase = declarative_base() @@ -48,3 +51,12 @@ # The value used for sorting, segmenting, or api query paging, # because uids aren't always sequential. indexable = AbstractAttribute('Column(, index=True)') + + +def initialize(model, db_engine, drop_tables=False, override_conf=None): + if drop_tables: + logger.info('Dropping tables') + model.metadata.drop_all(db_engine) + + logger.info('Creating tables') + model.metadata.create_all(db_engine) diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -1,15 +1 @@ -import pytest from swh.scheduler.tests.conftest import * # noqa - - -@pytest.fixture(scope='session') -def celery_includes(): - return [ - 'swh.lister.bitbucket.tasks', - 'swh.lister.debian.tasks', - 'swh.lister.github.tasks', - 'swh.lister.gitlab.tasks', - 'swh.lister.npm.tasks', - 'swh.lister.pypi.tasks', - 'swh.lister.phabricator.tasks', - ] diff --git a/swh/lister/debian/__init__.py b/swh/lister/debian/__init__.py --- a/swh/lister/debian/__init__.py +++ b/swh/lister/debian/__init__.py @@ -0,0 +1,42 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def debian_init(db_engine, drop_tables=False, override_conf=None): + from swh.storage.schemata.distribution import ( + Distribution, Area) + from swh.lister.core.models import initialize + from .lister import DebianLister as Lister + + lister = Lister(override_config=override_conf) + initialize(lister.MODEL, db_engine, drop_tables, override_conf) + + if not lister.db_session\ + .query(Distribution)\ + .filter(Distribution.name == 'Debian')\ + .one_or_none(): + + d = Distribution( + name='Debian', + type='deb', + mirror_uri='http://deb.debian.org/debian/') + lister.db_session.add(d) + + areas = [] + for distribution_name in ['stretch', 'buster']: + for area_name in ['main', 'contrib', 'non-free']: + areas.append(Area( + name='%s/%s' % (distribution_name, area_name), + distribution=d, + )) + lister.db_session.add_all(areas) + lister.db_session.commit() + + +def register(): + from .lister import DebianLister as Lister + return {'model': Lister.MODEL, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': debian_init} diff --git a/swh/lister/github/__init__.py b/swh/lister/github/__init__.py --- a/swh/lister/github/__init__.py +++ b/swh/lister/github/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import GitHubModel as Model + from .lister import GitHubLister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)} diff --git a/swh/lister/gitlab/__init__.py b/swh/lister/gitlab/__init__.py --- a/swh/lister/gitlab/__init__.py +++ b/swh/lister/gitlab/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import GitLabModel as Model + from .lister import GitLabLister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)} diff --git a/swh/lister/npm/__init__.py b/swh/lister/npm/__init__.py --- a/swh/lister/npm/__init__.py +++ b/swh/lister/npm/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import NpmVisitModel as Model + from .lister import NpmLister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)} diff --git a/swh/lister/phabricator/__init__.py b/swh/lister/phabricator/__init__.py --- a/swh/lister/phabricator/__init__.py +++ b/swh/lister/phabricator/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import PhabricatorModel as Model + from .lister import PhabricatorLister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)} diff --git a/swh/lister/pypi/__init__.py b/swh/lister/pypi/__init__.py --- a/swh/lister/pypi/__init__.py +++ b/swh/lister/pypi/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from functools import partial + +from swh.lister.core.models import initialize + + +def register(): + from .models import PyPIModel as Model + from .lister import PyPILister as Lister + + return {'model': Model, + 'lister': Lister, + 'tasks': '%s.tasks' % __name__, + 'init': partial(initialize, Model)}