diff --git a/.gitignore b/.gitignore index 67cfee3..47817bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,11 @@ *.pyc *.sw? *~ .coverage .eggs/ __pycache__ -dist +build/ +dist/ *.egg-info version.txt swh/lister/_version.py diff --git a/PKG-INFO b/PKG-INFO index a836b9f..27f7d11 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,222 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: swh.lister -Version: 0.0.17 -Summary: Software Heritage GitHub lister +Version: 0.0.18 +Summary: Software Heritage lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Description: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister +Description: SWH-lister + ============ + + The Software Heritage Lister is both a library module to permit to + centralize lister behaviors, and to provide lister implementations. + + Actual lister implementations are: + + - swh-lister-bitbucket + - swh-lister-debian + - swh-lister-github + - swh-lister-gitlab + - swh-lister-pypi + + Licensing + ---------- + + This program is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free Software + Foundation, either version 3 of the License, or (at your option) any later + version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU General Public License for more details. + + See top-level LICENSE file for the full text of the GNU General Public License + along with this program. + + + Dependencies + ------------ + + - python3 + - python3-requests + - python3-sqlalchemy + + More details in requirements*.txt + + + Local deployment + ----------- + + ## lister-github + + ### Preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/github.com/ + 3. create configuration file ~/.config/swh/lister-github.com.yml + 4. Bootstrap the db instance schema + + $ createdb lister-github + $ python3 -m swh.lister.cli --db-url postgres:///lister-github \ + --lister github \ + --create-tables + + ### Configuration file sample + + Minimalistic configuration: + + $ cat ~/.config/swh/lister-github.com.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-github + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/github.com + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + >>> import logging + >>> logging.basicConfig(level=logging.DEBUG) + >>> from swh.lister.github.tasks import RangeGitHubLister; RangeGitHubLister().run(364, 365) + INFO:root:listing repos starting at 364 + DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com + DEBUG:urllib3.connectionpool:https://api.github.com:443 "GET /repositories?since=364 HTTP/1.1" 200 None + DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost + DEBUG:urllib3.connectionpool:http://localhost:5002 "POST /origin/add HTTP/1.1" 200 1 + + + ## lister-gitlab + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/gitlab/ + 3. create configuration file ~/.config/swh/lister-gitlab.yml + 4. Bootstrap the db instance schema + + $ createdb lister-gitlab + $ python3 -m swh.lister.cli --db-url postgres:///lister-gitlab \ + --lister gitlab \ + --create-tables + + ### Configuration file sample + + $ cat ~/.config/swh/lister-gitlab.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-gitlab + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/gitlab + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, + {'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc', 'per_page': 20}) + >>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task( + {'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc', 'per_page': 20}) + >>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task( + {'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4', + 'sort': 'asc', 'per_page': 20}) + + ## lister-debian + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/debian/ + 3. create configuration file ~/.config/swh/lister-debian.yml + 4. Bootstrap the db instance schema + + $ createdb lister-debian + $ python3 -m swh.lister.cli --db-url postgres:///lister-debian \ + --lister debian \ + --create-tables \ + --with-data + + Note: This bootstraps a minimum data set needed for the debian + lister to run (for development) + + ### Configuration file sample + + $ cat ~/.config/swh/lister-debian.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-debian + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/debian + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> import logging; logging.basicConfig(level=logging.DEBUG); from swh.lister.debian.tasks import DebianListerTask; DebianListerTask().run_task('Debian') + DEBUG:root:Creating snapshot for distribution Distribution(Debian (deb) on http://deb.debian.org/debian/) on date 2018-07-27 09:22:50.461165+00:00 + DEBUG:root:Processing area Area(stretch/main of Debian) + DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): deb.debian.org + DEBUG:urllib3.connectionpool:http://deb.debian.org:80 "GET /debian//dists/stretch/main/source/Sources.xz HTTP/1.1" 302 325 + ... + + + ## lister-pypi + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/pypi/ + 3. create configuration file ~/.config/swh/lister-pypi.yml + 4. Bootstrap the db instance schema + + $ createdb lister-pypi + $ python3 -m swh.lister.cli --db-url postgres:///lister-pypi \ + --lister pypi \ + --create-tables \ + --with-data + + Note: This bootstraps a minimum data set needed for the pypi + lister to run (for development) + + ### Configuration file sample + + $ cat ~/.config/swh/lister-pypi.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-pypi + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/pypi + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> from swh.lister.pypi.tasks import PyPIListerTask; PyPIListerTask().run_task() + >>> + Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) +Classifier: Operating System :: OS Independent +Classifier: Development Status :: 5 - Production/Stable +Description-Content-Type: text/markdown +Provides-Extra: testing diff --git a/bin/ghlister b/bin/ghlister index d8e6cf5..95b10b5 100755 --- a/bin/ghlister +++ b/bin/ghlister @@ -1,103 +1,103 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import sys from swh.lister.github import models from swh.lister.github.lister import GitHubLister DEFAULT_CONF = { 'cache_dir': './cache', 'log_dir': './log', 'cache_json': 'False', } def int_interval(s): """parse an "N-M" string as an interval. Return an (N,M) int (or None) pair """ def not_an_interval(): raise argparse.ArgumentTypeError('not an interval: ' + s) def parse_int(s): if s: return int(s) else: return None if '-' not in s: not_an_interval() parts = s.split('-') if len(parts) > 2: not_an_interval() return tuple([parse_int(p) for p in parts]) def parse_args(): cli = argparse.ArgumentParser( description='list GitHub repositories and load them into a DB') cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL', help='SQLAlchemy DB URL (override conffile); see ' '') # NOQA cli.add_argument('--verbose', '-v', action='store_true', help='be verbose') subcli = cli.add_subparsers(dest='action') subcli.add_parser('createdb', help='initialize DB') subcli.add_parser('dropdb', help='destroy DB') list_cli = subcli.add_parser('list', help='list repositories') list_cli.add_argument('interval', type=int_interval, help='interval of repository IDs to list, ' 'in N-M format; either N or M can be omitted.') list_cli = subcli.add_parser('catchup', help='catchup with new repos since last time') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': logging.basicConfig(level=logging.INFO) # XXX args = parse_args() override_conf = {} if args.db_url: override_conf['lister_db_url'] = args.db_url lister = GitHubLister(lister_name='github.com', api_baseurl='https://api.github.com', override_config=override_conf) if args.action == 'createdb': models.ModelBase.metadata.create_all(lister.db_engine) elif args.action == 'dropdb': models.ModelBase.metadata.drop_all(lister.db_engine) elif args.action == 'list': lister.fetch(min_id=args.interval[0], max_id=args.interval[1]) elif args.action == 'catchup': last_known_id = lister.last_repo_id() if last_known_id is not None: logging.info('catching up from last known repo id: %d' % last_known_id) lister.fetch(min_id=last_known_id + 1, max_id=None) else: logging.error('Cannot catchup: no last known id found. Abort.') sys.exit(2) diff --git a/docs/index.rst b/docs/index.rst index 653b85e..3b76f15 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,22 +1,28 @@ .. _swh-lister: -Software Heritage listers -========================= +Software Heritage - Listers +=========================== + +Collection of listers for source code distribution places like development +forges, FOSS distributions, package managers, etc. Each lister is in charge to +enumerate the software origins (e.g., VCS, packages, etc.) available at a +source code distribution place. + .. toctree:: :maxdepth: 2 :caption: Contents: Overview -------- * :ref:`lister-tutorial` Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 42e154f..ab5e565 --- a/setup.py +++ b/setup.py @@ -1,41 +1,67 @@ #!/usr/bin/env python3 - -import os +# Copyright (C) 2015-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information from setuptools import setup, find_packages +from os import path +from io import open + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] - if not os.path.exists(reqf): + if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.lister', - description='Software Heritage GitHub lister', + description='Software Heritage lister', + long_description=long_description, + long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DLSGH/', packages=find_packages(), scripts=['bin/ghlister'], install_requires=parse_requirements() + parse_requirements('swh'), test_requires=parse_requirements('test'), test_suite='nose.collector', setup_requires=['vcversioner'], + extras_require={'testing': parse_requirements('test')}, vcversioner={'version_module_paths': ['swh/lister/_version.py']}, include_package_data=True, + classifiers=[ + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + ], + project_urls={ + 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', + 'Funding': 'https://www.softwareheritage.org/donate', + 'Source': 'https://forge.softwareheritage.org/source/swh-lister', + }, ) diff --git a/swh.lister.egg-info/PKG-INFO b/swh.lister.egg-info/PKG-INFO index a836b9f..27f7d11 100644 --- a/swh.lister.egg-info/PKG-INFO +++ b/swh.lister.egg-info/PKG-INFO @@ -1,10 +1,222 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: swh.lister -Version: 0.0.17 -Summary: Software Heritage GitHub lister +Version: 0.0.18 +Summary: Software Heritage lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Description: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister +Description: SWH-lister + ============ + + The Software Heritage Lister is both a library module to permit to + centralize lister behaviors, and to provide lister implementations. + + Actual lister implementations are: + + - swh-lister-bitbucket + - swh-lister-debian + - swh-lister-github + - swh-lister-gitlab + - swh-lister-pypi + + Licensing + ---------- + + This program is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free Software + Foundation, either version 3 of the License, or (at your option) any later + version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU General Public License for more details. + + See top-level LICENSE file for the full text of the GNU General Public License + along with this program. + + + Dependencies + ------------ + + - python3 + - python3-requests + - python3-sqlalchemy + + More details in requirements*.txt + + + Local deployment + ----------- + + ## lister-github + + ### Preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/github.com/ + 3. create configuration file ~/.config/swh/lister-github.com.yml + 4. Bootstrap the db instance schema + + $ createdb lister-github + $ python3 -m swh.lister.cli --db-url postgres:///lister-github \ + --lister github \ + --create-tables + + ### Configuration file sample + + Minimalistic configuration: + + $ cat ~/.config/swh/lister-github.com.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-github + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/github.com + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + >>> import logging + >>> logging.basicConfig(level=logging.DEBUG) + >>> from swh.lister.github.tasks import RangeGitHubLister; RangeGitHubLister().run(364, 365) + INFO:root:listing repos starting at 364 + DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com + DEBUG:urllib3.connectionpool:https://api.github.com:443 "GET /repositories?since=364 HTTP/1.1" 200 None + DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost + DEBUG:urllib3.connectionpool:http://localhost:5002 "POST /origin/add HTTP/1.1" 200 1 + + + ## lister-gitlab + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/gitlab/ + 3. create configuration file ~/.config/swh/lister-gitlab.yml + 4. Bootstrap the db instance schema + + $ createdb lister-gitlab + $ python3 -m swh.lister.cli --db-url postgres:///lister-gitlab \ + --lister gitlab \ + --create-tables + + ### Configuration file sample + + $ cat ~/.config/swh/lister-gitlab.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-gitlab + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/gitlab + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, + {'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc', 'per_page': 20}) + >>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task( + {'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc', 'per_page': 20}) + >>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task( + {'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4', + 'sort': 'asc', 'per_page': 20}) + + ## lister-debian + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/debian/ + 3. create configuration file ~/.config/swh/lister-debian.yml + 4. Bootstrap the db instance schema + + $ createdb lister-debian + $ python3 -m swh.lister.cli --db-url postgres:///lister-debian \ + --lister debian \ + --create-tables \ + --with-data + + Note: This bootstraps a minimum data set needed for the debian + lister to run (for development) + + ### Configuration file sample + + $ cat ~/.config/swh/lister-debian.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-debian + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/debian + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> import logging; logging.basicConfig(level=logging.DEBUG); from swh.lister.debian.tasks import DebianListerTask; DebianListerTask().run_task('Debian') + DEBUG:root:Creating snapshot for distribution Distribution(Debian (deb) on http://deb.debian.org/debian/) on date 2018-07-27 09:22:50.461165+00:00 + DEBUG:root:Processing area Area(stretch/main of Debian) + DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): deb.debian.org + DEBUG:urllib3.connectionpool:http://deb.debian.org:80 "GET /debian//dists/stretch/main/source/Sources.xz HTTP/1.1" 302 325 + ... + + + ## lister-pypi + + ### preparation steps + + 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) + 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/pypi/ + 3. create configuration file ~/.config/swh/lister-pypi.yml + 4. Bootstrap the db instance schema + + $ createdb lister-pypi + $ python3 -m swh.lister.cli --db-url postgres:///lister-pypi \ + --lister pypi \ + --create-tables \ + --with-data + + Note: This bootstraps a minimum data set needed for the pypi + lister to run (for development) + + ### Configuration file sample + + $ cat ~/.config/swh/lister-pypi.yml + # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls + lister_db_url: postgres:///lister-pypi + credentials: [] + cache_responses: True + cache_dir: /home/user/.cache/swh/lister/pypi + + Note: This expects storage (5002) and scheduler (5008) services to run locally + + ### Run + + $ python3 + Python 3.6.6 (default, Jun 27 2018, 14:44:17) + [GCC 8.1.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> from swh.lister.pypi.tasks import PyPIListerTask; PyPIListerTask().run_task() + >>> + Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) +Classifier: Operating System :: OS Independent +Classifier: Development Status :: 5 - Production/Stable +Description-Content-Type: text/markdown +Provides-Extra: testing diff --git a/swh.lister.egg-info/requires.txt b/swh.lister.egg-info/requires.txt index 52dd64b..421835d 100644 --- a/swh.lister.egg-info/requires.txt +++ b/swh.lister.egg-info/requires.txt @@ -1,10 +1,15 @@ SQLAlchemy arrow python_debian requests setuptools swh.core swh.scheduler>=0.0.31 swh.storage>=0.0.103 swh.storage[schemata]>=0.0.76 xmltodict + +[testing] +nose +requests_mock +testing.postgresql diff --git a/swh/lister/_version.py b/swh/lister/_version.py index e25055e..c6eccd0 100644 --- a/swh/lister/_version.py +++ b/swh/lister/_version.py @@ -1,5 +1,5 @@ # This file is automatically generated by setup.py. -__version__ = '0.0.17' -__sha__ = 'g5b20eff' -__revision__ = 'g5b20eff' +__version__ = '0.0.18' +__sha__ = 'g8f5b10b' +__revision__ = 'g8f5b10b' diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index d24d773..97b8024 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,122 +1,123 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random import time from ..core.page_by_page_lister import PageByPageHttpLister from .models import GitLabModel class GitLabLister(PageByPageHttpLister): # Template path expecting an integer that represents the page id PATH_TEMPLATE = '/projects?page=%d&order_by=id' MODEL = GitLabModel LISTER_NAME = 'gitlab' def __init__(self, api_baseurl=None, instance=None, override_config=None, sort='asc', per_page=20): super().__init__(api_baseurl=api_baseurl, override_config=override_config) self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort) if per_page != 20: self.PATH_TEMPLATE = '%s&per_page=%s' % ( self.PATH_TEMPLATE, per_page) @property def ADDITIONAL_CONFIG(self): """Override additional config as the 'credentials' structure change between the ancestor classes and this class. cf. request_params method below """ default_config = super().ADDITIONAL_CONFIG # 'credentials' is a dict of (instance, {username, password}) dict default_config['credentials'] = ('dict', {}) return default_config def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. For the gitlab lister, the 'credentials' entries is configured per instance. For example: - credentials: - gitlab.com: - username: user0 password: - username: user1 password: - ... - other-gitlab-instance: ... """ params = { 'headers': self.request_headers() or {} } - # Retrieve the credentials per instance - creds = self.config['credentials'] - if creds: - creds_lister = creds[self.instance] - auth = random.choice(creds_lister) if creds else None + creds_lister = self.config['credentials'].get(self.instance) + if creds_lister: + auth = random.choice(creds_lister) if auth: params['auth'] = (auth['username'], auth['password']) return params def uid(self, repo): return '%s/%s' % (self.instance, repo['path_with_namespace']) def get_model_from_repo(self, repo): return { 'instance': self.instance, 'uid': self.uid(repo), 'name': repo['name'], 'full_name': repo['path_with_namespace'], 'html_url': repo['web_url'], 'origin_url': repo['http_url_to_repo'], 'origin_type': 'git', 'description': repo['description'], } def transport_quota_check(self, response): """Deal with rate limit if any. """ # not all gitlab instance have rate limit if 'RateLimit-Remaining' in response.headers: reqs_remaining = int(response.headers['RateLimit-Remaining']) if response.status_code == 403 and reqs_remaining == 0: reset_at = int(response.headers['RateLimit-Reset']) delay = min(reset_at - time.time(), 3600) return True, delay return False, 0 def _get_int(self, headers, key): _val = headers.get(key) if _val: return int(_val) def get_next_target_from_response(self, response): """Determine the next page identifier. """ return self._get_int(response.headers, 'x-next-page') def get_pages_information(self): """Determine pages information. """ response = self.transport_head(identifier=1) + if not response.ok: + raise ValueError( + 'Problem during information fetch: %s' % response.status_code) h = response.headers return (self._get_int(h, 'x-total'), self._get_int(h, 'x-total-pages'), self._get_int(h, 'x-per-page')) def transport_response_simplified(self, response): repos = response.json() return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py index 68841bf..2b045a3 100644 --- a/swh/lister/gitlab/models.py +++ b/swh/lister/gitlab/models.py @@ -1,28 +1,28 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from sqlalchemy import Column, Integer, String +from sqlalchemy import Column, String from ..core.models import ModelBase class GitLabModel(ModelBase): """a Gitlab repository from a gitlab instance """ __tablename__ = 'gitlab_repo' uid = Column(String, primary_key=True) instance = Column(String, index=True) def __init__(self, uid=None, indexable=None, name=None, full_name=None, html_url=None, origin_url=None, origin_type=None, description=None, task_id=None, origin_id=None, instance=None): super().__init__(uid=uid, name=name, full_name=full_name, html_url=html_url, origin_url=origin_url, origin_type=origin_type, description=description, task_id=task_id, origin_id=origin_id) self.instance = instance diff --git a/version.txt b/version.txt index d82033b..12470b5 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.17-0-g5b20eff \ No newline at end of file +v0.0.18-0-g8f5b10b \ No newline at end of file