Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9338834
ghlister
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
ghlister
View Options
#!/usr/bin/python3
# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
argparse
import
configparser
import
logging
import
os
import
sys
from
sqlalchemy
import
create_engine
from
sqlalchemy.orm
import
sessionmaker
from
swh.lister.github
import
lister
,
models
from
swh.lister.github.db_utils
import
session_scope
DEFAULT_CONF
=
{
'cache_dir'
:
'./cache'
,
'log_dir'
:
'./log'
,
'cache_json'
:
'False'
,
}
def
db_connect
(
db_url
):
engine
=
create_engine
(
db_url
)
session
=
sessionmaker
(
bind
=
engine
)
return
(
engine
,
session
)
def
int_interval
(
s
):
"""parse an "N-M" string as an interval.
Return an (N,M) int (or None) pair
"""
def
not_an_interval
():
raise
argparse
.
ArgumentTypeError
(
'not an interval: '
+
s
)
def
parse_int
(
s
):
if
s
:
return
int
(
s
)
else
:
return
None
if
'-'
not
in
s
:
not_an_interval
()
parts
=
s
.
split
(
'-'
)
if
len
(
parts
)
>
2
:
not_an_interval
()
return
tuple
([
parse_int
(
p
)
for
p
in
parts
])
def
parse_args
():
cli
=
argparse
.
ArgumentParser
(
description
=
'list GitHub repositories and load them into a DB'
)
cli
.
add_argument
(
'--db-url'
,
'-d'
,
metavar
=
'SQLALCHEMY_URL'
,
help
=
'SQLAlchemy DB URL (override conffile); see '
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>'
)
# NOQA
cli
.
add_argument
(
'--verbose'
,
'-v'
,
action
=
'store_true'
,
help
=
'be verbose'
)
subcli
=
cli
.
add_subparsers
(
dest
=
'action'
)
subcli
.
add_parser
(
'createdb'
,
help
=
'initialize DB'
)
subcli
.
add_parser
(
'dropdb'
,
help
=
'destroy DB'
)
list_cli
=
subcli
.
add_parser
(
'list'
,
help
=
'list repositories'
)
list_cli
.
add_argument
(
'interval'
,
type
=
int_interval
,
help
=
'interval of repository IDs to list, '
'in N-M format; either N or M can be omitted.'
)
list_cli
=
subcli
.
add_parser
(
'catchup'
,
help
=
'catchup with new repos since last time'
)
args
=
cli
.
parse_args
()
if
not
args
.
action
:
cli
.
error
(
'no action given'
)
return
args
def
read_conf
(
args
):
config
=
configparser
.
ConfigParser
(
defaults
=
DEFAULT_CONF
)
config
.
read
(
os
.
path
.
expanduser
(
'~/.config/swh/lister-github.ini'
))
conf
=
config
.
_sections
[
'main'
]
# overrides
if
args
.
db_url
:
conf
[
'db_url'
]
=
args
.
db_url
# typing
if
'cache_json'
in
conf
and
conf
[
'cache_json'
]
.
lower
()
==
'true'
:
conf
[
'cache_json'
]
=
True
else
:
conf
[
'cache_json'
]
=
False
if
'credentials'
in
conf
:
credentials
=
conf
[
'credentials'
]
.
split
()
conf
[
'credentials'
]
=
[]
for
user_pair
in
credentials
:
username
,
password
=
user_pair
.
split
(
':'
)
conf
[
'credentials'
]
.
append
({
'username'
:
username
,
'password'
:
password
,
})
else
:
conf
[
'credentials'
]
=
[{
'username'
:
conf
[
'username'
],
'password'
:
conf
[
'password'
],
}]
return
conf
if
__name__
==
'__main__'
:
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# XXX
args
=
parse_args
()
conf
=
read_conf
(
args
)
db_engine
,
mk_session
=
db_connect
(
conf
[
'db_url'
])
if
args
.
action
==
'createdb'
:
models
.
SQLBase
.
metadata
.
create_all
(
db_engine
)
elif
args
.
action
==
'dropdb'
:
models
.
SQLBase
.
metadata
.
drop_all
(
db_engine
)
elif
args
.
action
==
'list'
:
lister
.
fetch
(
conf
,
mk_session
,
min_id
=
args
.
interval
[
0
],
max_id
=
args
.
interval
[
1
])
elif
args
.
action
==
'catchup'
:
with
session_scope
(
mk_session
)
as
db_session
:
last_known_id
=
lister
.
last_repo_id
(
db_session
)
if
last_known_id
is
not
None
:
logging
.
info
(
'catching up from last known repo id:
%d
'
%
last_known_id
)
lister
.
fetch
(
conf
,
mk_session
,
min_id
=
last_known_id
+
1
,
max_id
=
None
)
else
:
logging
.
error
(
'Cannot catchup: no last known id found. Abort.'
)
sys
.
exit
(
2
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jul 4 2025, 9:10 AM (6 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3436772
Attached To
rDLS Listers
Event Timeline
Log In to Comment