diff --git a/bin/batch b/bin/batch deleted file mode 100755 --- a/bin/batch +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2015 Stefano Zacchiroli -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -export https_proxy="127.0.0.1:8118" # use Tor -export PYTHONPATH=`pwd` - -DBNAME=github -DBCONN="-p 5433" - -psql="psql $DBCONN --no-psqlrc --pset t --pset format=unaligned ${DBNAME}" - -BATCH_NO="$1" -shift -if [ -z "$BATCH_NO" ] ; then - echo "Usage: batch MILLION_NO [ MIN_ID | continue ]" - exit 2 -fi - -MIN_ID="$1" -shift - -min_id=$[ ($BATCH_NO - 1) * 1000000 + 1 ] -max_id=$[ $BATCH_NO * 1000000 ] - -# allow min_id override on the command line -if [ "$MIN_ID" = "continue" ] ; then - last_id=$(echo "select max(id) from repos where ${min_id} <= id and id <= ${max_id}" | $psql) - if [ "$last_id" -eq "$last_id" ] 2> /dev/null ; then # is an integer? - echo "Continuing from last known id ${last_id}" - min_id=$last_id - fi -elif [ -n "$MIN_ID" ] ; then - min_id=$[ $MIN_ID > $min_id ? $MIN_ID : $min_id ] -fi - -cmd="bin/ghlister list ${min_id}-${max_id}" -echo Running $cmd ... -$cmd diff --git a/bin/ghlister b/bin/ghlister deleted file mode 100755 --- a/bin/ghlister +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2015 Stefano Zacchiroli -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import argparse -import logging -import sys - -from swh.lister.github import models -from swh.lister.github.lister import GitHubLister - -DEFAULT_CONF = { - 'cache_dir': './cache', - 'log_dir': './log', - 'cache_json': 'False', -} - - -def int_interval(s): - """parse an "N-M" string as an interval. - - Return an (N,M) int (or None) pair - - """ - def not_an_interval(): - raise argparse.ArgumentTypeError('not an interval: ' + s) - - def parse_int(s): - if s: - return int(s) - else: - return None - - if '-' not in s: - not_an_interval() - parts = s.split('-') - if len(parts) > 2: - not_an_interval() - return tuple([parse_int(p) for p in parts]) - - -def parse_args(): - cli = argparse.ArgumentParser( - description='list GitHub repositories and load them into a DB') - cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL', - help='SQLAlchemy DB URL (override conffile); see ' - '') # NOQA - cli.add_argument('--verbose', '-v', action='store_true', - help='be verbose') - - subcli = cli.add_subparsers(dest='action') - subcli.add_parser('createdb', help='initialize DB') - subcli.add_parser('dropdb', help='destroy DB') - - list_cli = subcli.add_parser('list', help='list repositories') - list_cli.add_argument('interval', - type=int_interval, - help='interval of repository IDs to list, ' - 'in N-M format; either N or M can be omitted.') - - list_cli = subcli.add_parser('catchup', - help='catchup with new repos since last time') - - args = cli.parse_args() - - if not args.action: - cli.error('no action given') - - return args - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) # XXX - - args = parse_args() - - override_conf = {} - - lister = GitHubLister(lister_name='github.com', - api_baseurl='https://api.github.com', - override_config=override_conf) - - if args.action == 'createdb': - models.ModelBase.metadata.create_all(lister.db_engine) - elif args.action == 'dropdb': - models.ModelBase.metadata.drop_all(lister.db_engine) - elif args.action == 'list': - lister.fetch(min_id=args.interval[0], - max_id=args.interval[1]) - elif args.action == 'catchup': - last_known_id = lister.last_repo_id() - if last_known_id is not None: - logging.info('catching up from last known repo id: %d' % - last_known_id) - lister.fetch(min_id=last_known_id + 1, - max_id=None) - else: - logging.error('Cannot catchup: no last known id found. Abort.') - sys.exit(2) diff --git a/bin/reset.sh b/bin/reset.sh deleted file mode 100644 --- a/bin/reset.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2015 Stefano Zacchiroli -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -export PYTHONPATH=`pwd` -dropdb github -createdb github -bin/ghlister createdb -rm cache/* diff --git a/bin/status b/bin/status deleted file mode 100755 --- a/bin/status +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2015 Stefano Zacchiroli -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -# intended usage: watch -n 60 bin/status - -DBNAME="github" -DBCONN="-p 5433" - -psql="psql $DBCONN --no-psqlrc" - -ps auxw | grep bin/batch -echo "SELECT COUNT(*), MAX(id) FROM repos" | $psql "$DBNAME" -echo "\\l+ ${DBNAME}" | $psql "$DBNAME" -du -sh cache/ -zgrep -i --color=auto "'X-RateLimit-Remaining'" cache/$(ls -t cache/ | head -n 4 | tail -n 1) diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DLSGH/', packages=find_packages(), - scripts=['bin/ghlister'], install_requires=parse_requirements() + parse_requirements('swh'), tests_require=parse_requirements('test'), setup_requires=['vcversioner'],