Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/bin/batch b/bin/batch
deleted file mode 100755
index 9796387..0000000
--- a/bin/batch
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-export https_proxy="127.0.0.1:8118" # use Tor
-export PYTHONPATH=`pwd`
-
-DBNAME=github
-DBCONN="-p 5433"
-
-psql="psql $DBCONN --no-psqlrc --pset t --pset format=unaligned ${DBNAME}"
-
-BATCH_NO="$1"
-shift
-if [ -z "$BATCH_NO" ] ; then
- echo "Usage: batch MILLION_NO [ MIN_ID | continue ]"
- exit 2
-fi
-
-MIN_ID="$1"
-shift
-
-min_id=$[ ($BATCH_NO - 1) * 1000000 + 1 ]
-max_id=$[ $BATCH_NO * 1000000 ]
-
-# allow min_id override on the command line
-if [ "$MIN_ID" = "continue" ] ; then
- last_id=$(echo "select max(id) from repos where ${min_id} <= id and id <= ${max_id}" | $psql)
- if [ "$last_id" -eq "$last_id" ] 2> /dev/null ; then # is an integer?
- echo "Continuing from last known id ${last_id}"
- min_id=$last_id
- fi
-elif [ -n "$MIN_ID" ] ; then
- min_id=$[ $MIN_ID > $min_id ? $MIN_ID : $min_id ]
-fi
-
-cmd="bin/ghlister list ${min_id}-${max_id}"
-echo Running $cmd ...
-$cmd
diff --git a/bin/ghlister b/bin/ghlister
deleted file mode 100755
index a18ab64..0000000
--- a/bin/ghlister
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import argparse
-import logging
-import sys
-
-from swh.lister.github import models
-from swh.lister.github.lister import GitHubLister
-
-DEFAULT_CONF = {
- 'cache_dir': './cache',
- 'log_dir': './log',
- 'cache_json': 'False',
-}
-
-
-def int_interval(s):
- """parse an "N-M" string as an interval.
-
- Return an (N,M) int (or None) pair
-
- """
- def not_an_interval():
- raise argparse.ArgumentTypeError('not an interval: ' + s)
-
- def parse_int(s):
- if s:
- return int(s)
- else:
- return None
-
- if '-' not in s:
- not_an_interval()
- parts = s.split('-')
- if len(parts) > 2:
- not_an_interval()
- return tuple([parse_int(p) for p in parts])
-
-
-def parse_args():
- cli = argparse.ArgumentParser(
- description='list GitHub repositories and load them into a DB')
- cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL',
- help='SQLAlchemy DB URL (override conffile); see '
- '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # NOQA
- cli.add_argument('--verbose', '-v', action='store_true',
- help='be verbose')
-
- subcli = cli.add_subparsers(dest='action')
- subcli.add_parser('createdb', help='initialize DB')
- subcli.add_parser('dropdb', help='destroy DB')
-
- list_cli = subcli.add_parser('list', help='list repositories')
- list_cli.add_argument('interval',
- type=int_interval,
- help='interval of repository IDs to list, '
- 'in N-M format; either N or M can be omitted.')
-
- list_cli = subcli.add_parser('catchup',
- help='catchup with new repos since last time')
-
- args = cli.parse_args()
-
- if not args.action:
- cli.error('no action given')
-
- return args
-
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.INFO) # XXX
-
- args = parse_args()
-
- override_conf = {}
-
- lister = GitHubLister(lister_name='github.com',
- api_baseurl='https://api.github.com',
- override_config=override_conf)
-
- if args.action == 'createdb':
- models.ModelBase.metadata.create_all(lister.db_engine)
- elif args.action == 'dropdb':
- models.ModelBase.metadata.drop_all(lister.db_engine)
- elif args.action == 'list':
- lister.fetch(min_id=args.interval[0],
- max_id=args.interval[1])
- elif args.action == 'catchup':
- last_known_id = lister.last_repo_id()
- if last_known_id is not None:
- logging.info('catching up from last known repo id: %d' %
- last_known_id)
- lister.fetch(min_id=last_known_id + 1,
- max_id=None)
- else:
- logging.error('Cannot catchup: no last known id found. Abort.')
- sys.exit(2)
diff --git a/bin/reset.sh b/bin/reset.sh
deleted file mode 100644
index f5bf69b..0000000
--- a/bin/reset.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-export PYTHONPATH=`pwd`
-dropdb github
-createdb github
-bin/ghlister createdb
-rm cache/*
diff --git a/bin/status b/bin/status
deleted file mode 100755
index 8a3105f..0000000
--- a/bin/status
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-# intended usage: watch -n 60 bin/status
-
-DBNAME="github"
-DBCONN="-p 5433"
-
-psql="psql $DBCONN --no-psqlrc"
-
-ps auxw | grep bin/batch
-echo "SELECT COUNT(*), MAX(id) FROM repos" | $psql "$DBNAME"
-echo "\\l+ ${DBNAME}" | $psql "$DBNAME"
-du -sh cache/
-zgrep -i --color=auto "'X-RateLimit-Remaining'" cache/$(ls -t cache/ | head -n 4 | tail -n 1)
diff --git a/setup.py b/setup.py
index 521546c..99e74c2 100755
--- a/setup.py
+++ b/setup.py
@@ -1,72 +1,71 @@
#!/usr/bin/env python3
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = 'requirements-%s.txt' % name
else:
reqf = 'requirements.txt'
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
setup(
name='swh.lister',
description='Software Heritage lister',
long_description=long_description,
long_description_content_type='text/markdown',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DLSGH/',
packages=find_packages(),
- scripts=['bin/ghlister'],
install_requires=parse_requirements() + parse_requirements('swh'),
tests_require=parse_requirements('test'),
setup_requires=['vcversioner'],
extras_require={'testing': parse_requirements('test')},
vcversioner={'version_module_paths': ['swh/lister/_version.py']},
include_package_data=True,
entry_points='''
[console_scripts]
swh-lister=swh.lister.cli:cli
[swh.cli.subcommands]
lister=swh.lister.cli:lister
''',
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
],
project_urls={
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
'Funding': 'https://www.softwareheritage.org/donate',
'Source': 'https://forge.softwareheritage.org/source/swh-lister',
},
)

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jun 21, 7:40 PM (3 w, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3247758

Event Timeline