diff --git a/bin/ghlister b/bin/ghlister index 95b10b5..a18ab64 100755 --- a/bin/ghlister +++ b/bin/ghlister @@ -1,103 +1,101 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import sys from swh.lister.github import models from swh.lister.github.lister import GitHubLister DEFAULT_CONF = { 'cache_dir': './cache', 'log_dir': './log', 'cache_json': 'False', } def int_interval(s): """parse an "N-M" string as an interval. Return an (N,M) int (or None) pair """ def not_an_interval(): raise argparse.ArgumentTypeError('not an interval: ' + s) def parse_int(s): if s: return int(s) else: return None if '-' not in s: not_an_interval() parts = s.split('-') if len(parts) > 2: not_an_interval() return tuple([parse_int(p) for p in parts]) def parse_args(): cli = argparse.ArgumentParser( description='list GitHub repositories and load them into a DB') cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL', help='SQLAlchemy DB URL (override conffile); see ' '') # NOQA cli.add_argument('--verbose', '-v', action='store_true', help='be verbose') subcli = cli.add_subparsers(dest='action') subcli.add_parser('createdb', help='initialize DB') subcli.add_parser('dropdb', help='destroy DB') list_cli = subcli.add_parser('list', help='list repositories') list_cli.add_argument('interval', type=int_interval, help='interval of repository IDs to list, ' 'in N-M format; either N or M can be omitted.') list_cli = subcli.add_parser('catchup', help='catchup with new repos since last time') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': logging.basicConfig(level=logging.INFO) # XXX args = parse_args() override_conf = {} - if args.db_url: - override_conf['lister_db_url'] = args.db_url lister = GitHubLister(lister_name='github.com', api_baseurl='https://api.github.com', override_config=override_conf) if args.action == 'createdb': models.ModelBase.metadata.create_all(lister.db_engine) elif args.action == 'dropdb': models.ModelBase.metadata.drop_all(lister.db_engine) elif args.action == 'list': lister.fetch(min_id=args.interval[0], max_id=args.interval[1]) elif args.action == 'catchup': last_known_id = lister.last_repo_id() if last_known_id is not None: logging.info('catching up from last known repo id: %d' % last_known_id) lister.fetch(min_id=last_known_id + 1, max_id=None) else: logging.error('Cannot catchup: no last known id found. Abort.') sys.exit(2) diff --git a/swh/lister/cli.py b/swh/lister/cli.py index 0c16da0..8809025 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -1,122 +1,121 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import click logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm'] @click.command() @click.option( '--db-url', '-d', default='postgres:///lister-gitlab.com', help='SQLAlchemy DB URL; see ' '') # noqa @click.argument('listers', required=1, nargs=-1, type=click.Choice(SUPPORTED_LISTERS + ['all'])) @click.option('--drop-tables', '-D', is_flag=True, default=False, help='Drop tables before creating the database schema') def cli(db_url, listers, drop_tables): """Initialize db model according to lister. """ override_conf = { - 'lister_db_url': db_url, 'lister': { 'cls': 'local', 'args': {'db': db_url} } } if 'all' in listers: listers = SUPPORTED_LISTERS for lister in listers: logger.info('Initializing lister %s', lister) insert_minimum_data = None if lister == 'github': from .github.models import IndexingModelBase as ModelBase from .github.lister import GitHubLister _lister = GitHubLister( api_baseurl='https://api.github.com', override_config=override_conf) elif lister == 'bitbucket': from .bitbucket.models import IndexingModelBase as ModelBase from .bitbucket.lister import BitBucketLister _lister = BitBucketLister( api_baseurl='https://api.bitbucket.org/2.0', override_config=override_conf) elif lister == 'gitlab': from .gitlab.models import ModelBase from .gitlab.lister import GitLabLister _lister = GitLabLister( api_baseurl='https://gitlab.com/api/v4/', override_config=override_conf) elif lister == 'debian': from .debian.lister import DebianLister ModelBase = DebianLister.MODEL # noqa _lister = DebianLister(override_config=override_conf) def insert_minimum_data(lister): from swh.storage.schemata.distribution import ( Distribution, Area) d = Distribution( name='Debian', type='deb', mirror_uri='http://deb.debian.org/debian/') lister.db_session.add(d) areas = [] for distribution_name in ['stretch']: for area_name in ['main', 'contrib', 'non-free']: areas.append(Area( name='%s/%s' % (distribution_name, area_name), distribution=d, )) lister.db_session.add_all(areas) lister.db_session.commit() elif lister == 'pypi': from .pypi.models import ModelBase from .pypi.lister import PyPILister _lister = PyPILister(override_config=override_conf) elif lister == 'npm': from .npm.models import IndexingModelBase as ModelBase from .npm.models import NpmVisitModel from .npm.lister import NpmLister _lister = NpmLister(override_config=override_conf) if drop_tables: NpmVisitModel.metadata.drop_all(_lister.db_engine) NpmVisitModel.metadata.create_all(_lister.db_engine) else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % (lister, SUPPORTED_LISTERS)) if drop_tables: logger.info('Dropping tables for %s', lister) ModelBase.metadata.drop_all(_lister.db_engine) logger.info('Creating tables for %s', lister) ModelBase.metadata.create_all(_lister.db_engine) if insert_minimum_data: logger.info('Inserting minimal data for %s', lister) try: insert_minimum_data(_lister) except Exception: logger.warning( 'Failed to insert minimum data in %s', lister) if __name__ == '__main__': cli()