Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9124866
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
View Options
diff --git a/bin/batch b/bin/batch
deleted file mode 100755
index 9796387..0000000
--- a/bin/batch
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-export https_proxy="127.0.0.1:8118" # use Tor
-export PYTHONPATH=`pwd`
-
-DBNAME=github
-DBCONN="-p 5433"
-
-psql="psql $DBCONN --no-psqlrc --pset t --pset format=unaligned ${DBNAME}"
-
-BATCH_NO="$1"
-shift
-if [ -z "$BATCH_NO" ] ; then
- echo "Usage: batch MILLION_NO [ MIN_ID | continue ]"
- exit 2
-fi
-
-MIN_ID="$1"
-shift
-
-min_id=$[ ($BATCH_NO - 1) * 1000000 + 1 ]
-max_id=$[ $BATCH_NO * 1000000 ]
-
-# allow min_id override on the command line
-if [ "$MIN_ID" = "continue" ] ; then
- last_id=$(echo "select max(id) from repos where ${min_id} <= id and id <= ${max_id}" | $psql)
- if [ "$last_id" -eq "$last_id" ] 2> /dev/null ; then # is an integer?
- echo "Continuing from last known id ${last_id}"
- min_id=$last_id
- fi
-elif [ -n "$MIN_ID" ] ; then
- min_id=$[ $MIN_ID > $min_id ? $MIN_ID : $min_id ]
-fi
-
-cmd="bin/ghlister list ${min_id}-${max_id}"
-echo Running $cmd ...
-$cmd
diff --git a/bin/ghlister b/bin/ghlister
deleted file mode 100755
index a18ab64..0000000
--- a/bin/ghlister
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import argparse
-import logging
-import sys
-
-from swh.lister.github import models
-from swh.lister.github.lister import GitHubLister
-
-DEFAULT_CONF = {
- 'cache_dir': './cache',
- 'log_dir': './log',
- 'cache_json': 'False',
-}
-
-
-def int_interval(s):
- """parse an "N-M" string as an interval.
-
- Return an (N,M) int (or None) pair
-
- """
- def not_an_interval():
- raise argparse.ArgumentTypeError('not an interval: ' + s)
-
- def parse_int(s):
- if s:
- return int(s)
- else:
- return None
-
- if '-' not in s:
- not_an_interval()
- parts = s.split('-')
- if len(parts) > 2:
- not_an_interval()
- return tuple([parse_int(p) for p in parts])
-
-
-def parse_args():
- cli = argparse.ArgumentParser(
- description='list GitHub repositories and load them into a DB')
- cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL',
- help='SQLAlchemy DB URL (override conffile); see '
- '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # NOQA
- cli.add_argument('--verbose', '-v', action='store_true',
- help='be verbose')
-
- subcli = cli.add_subparsers(dest='action')
- subcli.add_parser('createdb', help='initialize DB')
- subcli.add_parser('dropdb', help='destroy DB')
-
- list_cli = subcli.add_parser('list', help='list repositories')
- list_cli.add_argument('interval',
- type=int_interval,
- help='interval of repository IDs to list, '
- 'in N-M format; either N or M can be omitted.')
-
- list_cli = subcli.add_parser('catchup',
- help='catchup with new repos since last time')
-
- args = cli.parse_args()
-
- if not args.action:
- cli.error('no action given')
-
- return args
-
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.INFO) # XXX
-
- args = parse_args()
-
- override_conf = {}
-
- lister = GitHubLister(lister_name='github.com',
- api_baseurl='https://api.github.com',
- override_config=override_conf)
-
- if args.action == 'createdb':
- models.ModelBase.metadata.create_all(lister.db_engine)
- elif args.action == 'dropdb':
- models.ModelBase.metadata.drop_all(lister.db_engine)
- elif args.action == 'list':
- lister.fetch(min_id=args.interval[0],
- max_id=args.interval[1])
- elif args.action == 'catchup':
- last_known_id = lister.last_repo_id()
- if last_known_id is not None:
- logging.info('catching up from last known repo id: %d' %
- last_known_id)
- lister.fetch(min_id=last_known_id + 1,
- max_id=None)
- else:
- logging.error('Cannot catchup: no last known id found. Abort.')
- sys.exit(2)
diff --git a/bin/reset.sh b/bin/reset.sh
deleted file mode 100644
index f5bf69b..0000000
--- a/bin/reset.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-export PYTHONPATH=`pwd`
-dropdb github
-createdb github
-bin/ghlister createdb
-rm cache/*
diff --git a/bin/status b/bin/status
deleted file mode 100755
index 8a3105f..0000000
--- a/bin/status
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-# intended usage: watch -n 60 bin/status
-
-DBNAME="github"
-DBCONN="-p 5433"
-
-psql="psql $DBCONN --no-psqlrc"
-
-ps auxw | grep bin/batch
-echo "SELECT COUNT(*), MAX(id) FROM repos" | $psql "$DBNAME"
-echo "\\l+ ${DBNAME}" | $psql "$DBNAME"
-du -sh cache/
-zgrep -i --color=auto "'X-RateLimit-Remaining'" cache/$(ls -t cache/ | head -n 4 | tail -n 1)
diff --git a/setup.py b/setup.py
index 521546c..99e74c2 100755
--- a/setup.py
+++ b/setup.py
@@ -1,72 +1,71 @@
#!/usr/bin/env python3
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = 'requirements-%s.txt' % name
else:
reqf = 'requirements.txt'
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
setup(
name='swh.lister',
description='Software Heritage lister',
long_description=long_description,
long_description_content_type='text/markdown',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DLSGH/',
packages=find_packages(),
- scripts=['bin/ghlister'],
install_requires=parse_requirements() + parse_requirements('swh'),
tests_require=parse_requirements('test'),
setup_requires=['vcversioner'],
extras_require={'testing': parse_requirements('test')},
vcversioner={'version_module_paths': ['swh/lister/_version.py']},
include_package_data=True,
entry_points='''
[console_scripts]
swh-lister=swh.lister.cli:cli
[swh.cli.subcommands]
lister=swh.lister.cli:lister
''',
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
],
project_urls={
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
'Funding': 'https://www.softwareheritage.org/donate',
'Source': 'https://forge.softwareheritage.org/source/swh-lister',
},
)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 7:40 PM (3 w, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3247758
Attached To
rDLS Listers
Event Timeline
Log In to Comment