diff --git a/PKG-INFO b/PKG-INFO index 2d0c65e..4bb8d05 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,222 +1,222 @@ Metadata-Version: 2.1 Name: swh.lister -Version: 0.0.19 +Version: 0.0.20 Summary: Software Heritage lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest -Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: SWH-lister ============ The Software Heritage Lister is both a library module to permit to centralize lister behaviors, and to provide lister implementations. Actual lister implementations are: - swh-lister-bitbucket - swh-lister-debian - swh-lister-github - swh-lister-gitlab - swh-lister-pypi Licensing ---------- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See top-level LICENSE file for the full text of the GNU General Public License along with this program. Dependencies ------------ - python3 - python3-requests - python3-sqlalchemy More details in requirements*.txt Local deployment ----------- ## lister-github ### Preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/github.com/ 3. create configuration file ~/.config/swh/lister-github.com.yml 4. Bootstrap the db instance schema $ createdb lister-github $ python3 -m swh.lister.cli --db-url postgres:///lister-github \ --lister github \ --create-tables ### Configuration file sample Minimalistic configuration: $ cat ~/.config/swh/lister-github.com.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-github credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/github.com Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 >>> import logging >>> logging.basicConfig(level=logging.DEBUG) >>> from swh.lister.github.tasks import RangeGitHubLister; RangeGitHubLister().run(364, 365) INFO:root:listing repos starting at 364 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com DEBUG:urllib3.connectionpool:https://api.github.com:443 "GET /repositories?since=364 HTTP/1.1" 200 None DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost DEBUG:urllib3.connectionpool:http://localhost:5002 "POST /origin/add HTTP/1.1" 200 1 ## lister-gitlab ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/gitlab/ 3. create configuration file ~/.config/swh/lister-gitlab.yml 4. Bootstrap the db instance schema $ createdb lister-gitlab $ python3 -m swh.lister.cli --db-url postgres:///lister-gitlab \ --lister gitlab \ --create-tables ### Configuration file sample $ cat ~/.config/swh/lister-gitlab.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-gitlab credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/gitlab Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, {'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc', 'per_page': 20}) >>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task( {'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc', 'per_page': 20}) >>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task( {'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4', 'sort': 'asc', 'per_page': 20}) ## lister-debian ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/debian/ 3. create configuration file ~/.config/swh/lister-debian.yml 4. Bootstrap the db instance schema $ createdb lister-debian $ python3 -m swh.lister.cli --db-url postgres:///lister-debian \ --lister debian \ --create-tables \ --with-data Note: This bootstraps a minimum data set needed for the debian lister to run (for development) ### Configuration file sample $ cat ~/.config/swh/lister-debian.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-debian credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/debian Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import logging; logging.basicConfig(level=logging.DEBUG); from swh.lister.debian.tasks import DebianListerTask; DebianListerTask().run_task('Debian') DEBUG:root:Creating snapshot for distribution Distribution(Debian (deb) on http://deb.debian.org/debian/) on date 2018-07-27 09:22:50.461165+00:00 DEBUG:root:Processing area Area(stretch/main of Debian) DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): deb.debian.org DEBUG:urllib3.connectionpool:http://deb.debian.org:80 "GET /debian//dists/stretch/main/source/Sources.xz HTTP/1.1" 302 325 ... ## lister-pypi ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/pypi/ 3. create configuration file ~/.config/swh/lister-pypi.yml 4. Bootstrap the db instance schema $ createdb lister-pypi $ python3 -m swh.lister.cli --db-url postgres:///lister-pypi \ --lister pypi \ --create-tables \ --with-data Note: This bootstraps a minimum data set needed for the pypi lister to run (for development) ### Configuration file sample $ cat ~/.config/swh/lister-pypi.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-pypi credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/pypi Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> from swh.lister.pypi.tasks import PyPIListerTask; PyPIListerTask().run_task() >>> Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.lister.egg-info/PKG-INFO b/swh.lister.egg-info/PKG-INFO index 2d0c65e..4bb8d05 100644 --- a/swh.lister.egg-info/PKG-INFO +++ b/swh.lister.egg-info/PKG-INFO @@ -1,222 +1,222 @@ Metadata-Version: 2.1 Name: swh.lister -Version: 0.0.19 +Version: 0.0.20 Summary: Software Heritage lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest -Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-lister +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: SWH-lister ============ The Software Heritage Lister is both a library module to permit to centralize lister behaviors, and to provide lister implementations. Actual lister implementations are: - swh-lister-bitbucket - swh-lister-debian - swh-lister-github - swh-lister-gitlab - swh-lister-pypi Licensing ---------- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See top-level LICENSE file for the full text of the GNU General Public License along with this program. Dependencies ------------ - python3 - python3-requests - python3-sqlalchemy More details in requirements*.txt Local deployment ----------- ## lister-github ### Preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/github.com/ 3. create configuration file ~/.config/swh/lister-github.com.yml 4. Bootstrap the db instance schema $ createdb lister-github $ python3 -m swh.lister.cli --db-url postgres:///lister-github \ --lister github \ --create-tables ### Configuration file sample Minimalistic configuration: $ cat ~/.config/swh/lister-github.com.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-github credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/github.com Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 >>> import logging >>> logging.basicConfig(level=logging.DEBUG) >>> from swh.lister.github.tasks import RangeGitHubLister; RangeGitHubLister().run(364, 365) INFO:root:listing repos starting at 364 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com DEBUG:urllib3.connectionpool:https://api.github.com:443 "GET /repositories?since=364 HTTP/1.1" 200 None DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost DEBUG:urllib3.connectionpool:http://localhost:5002 "POST /origin/add HTTP/1.1" 200 1 ## lister-gitlab ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/gitlab/ 3. create configuration file ~/.config/swh/lister-gitlab.yml 4. Bootstrap the db instance schema $ createdb lister-gitlab $ python3 -m swh.lister.cli --db-url postgres:///lister-gitlab \ --lister gitlab \ --create-tables ### Configuration file sample $ cat ~/.config/swh/lister-gitlab.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-gitlab credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/gitlab Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2, {'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc', 'per_page': 20}) >>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task( {'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc', 'per_page': 20}) >>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task( {'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4', 'sort': 'asc', 'per_page': 20}) ## lister-debian ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/debian/ 3. create configuration file ~/.config/swh/lister-debian.yml 4. Bootstrap the db instance schema $ createdb lister-debian $ python3 -m swh.lister.cli --db-url postgres:///lister-debian \ --lister debian \ --create-tables \ --with-data Note: This bootstraps a minimum data set needed for the debian lister to run (for development) ### Configuration file sample $ cat ~/.config/swh/lister-debian.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-debian credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/debian Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import logging; logging.basicConfig(level=logging.DEBUG); from swh.lister.debian.tasks import DebianListerTask; DebianListerTask().run_task('Debian') DEBUG:root:Creating snapshot for distribution Distribution(Debian (deb) on http://deb.debian.org/debian/) on date 2018-07-27 09:22:50.461165+00:00 DEBUG:root:Processing area Area(stretch/main of Debian) DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): deb.debian.org DEBUG:urllib3.connectionpool:http://deb.debian.org:80 "GET /debian//dists/stretch/main/source/Sources.xz HTTP/1.1" 302 325 ... ## lister-pypi ### preparation steps 1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing) 2. mkdir ~/.config/swh/ ~/.cache/swh/lister/pypi/ 3. create configuration file ~/.config/swh/lister-pypi.yml 4. Bootstrap the db instance schema $ createdb lister-pypi $ python3 -m swh.lister.cli --db-url postgres:///lister-pypi \ --lister pypi \ --create-tables \ --with-data Note: This bootstraps a minimum data set needed for the pypi lister to run (for development) ### Configuration file sample $ cat ~/.config/swh/lister-pypi.yml # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls lister_db_url: postgres:///lister-pypi credentials: [] cache_responses: True cache_dir: /home/user/.cache/swh/lister/pypi Note: This expects storage (5002) and scheduler (5008) services to run locally ### Run $ python3 Python 3.6.6 (default, Jun 27 2018, 14:44:17) [GCC 8.1.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> from swh.lister.pypi.tasks import PyPIListerTask; PyPIListerTask().run_task() >>> Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh/lister/_version.py b/swh/lister/_version.py index 8eb674b..b67cb27 100644 --- a/swh/lister/_version.py +++ b/swh/lister/_version.py @@ -1,5 +1,5 @@ # This file is automatically generated by setup.py. -__version__ = '0.0.19' -__sha__ = 'g1756e2e' -__revision__ = 'g1756e2e' +__version__ = '0.0.20' +__sha__ = 'g2a588d2' +__revision__ = 'g2a588d2' diff --git a/swh/lister/cli.py b/swh/lister/cli.py index 9a9a030..26d4322 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -1,122 +1,122 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import click logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm'] @click.command() @click.option( '--db-url', '-d', default='postgres:///lister-gitlab.com', help='SQLAlchemy DB URL; see ' '') # noqa @click.argument('listers', required=1, nargs=-1, type=click.Choice(SUPPORTED_LISTERS + ['all'])) @click.option('--drop-tables', '-D', is_flag=True, default=False, help='Drop tables before creating the database schema') def cli(db_url, listers, drop_tables): """Initialize db model according to lister. """ override_conf = { 'lister_db_url': db_url, 'lister': { 'cls': 'local', 'args': {'db': db_url} } } insert_minimum_data = None if 'all' in listers: listers = SUPPORTED_LISTERS for lister in listers: logger.info('Initializing lister %s', lister) if lister == 'github': from .github.models import IndexingModelBase as ModelBase from .github.lister import GitHubLister _lister = GitHubLister( api_baseurl='https://api.github.com', override_config=override_conf) elif lister == 'bitbucket': from .bitbucket.models import IndexingModelBase as ModelBase from .bitbucket.lister import BitBucketLister _lister = BitBucketLister( api_baseurl='https://api.bitbucket.org/2.0', override_config=override_conf) elif lister == 'gitlab': from .gitlab.models import ModelBase from .gitlab.lister import GitLabLister _lister = GitLabLister( api_baseurl='https://gitlab.com/api/v4/', override_config=override_conf) elif lister == 'debian': from .debian.lister import DebianLister ModelBase = DebianLister.MODEL # noqa _lister = DebianLister(override_config=override_conf) def insert_minimum_data(lister): from swh.storage.schemata.distribution import ( Distribution, Area) d = Distribution( name='Debian', type='deb', mirror_uri='http://deb.debian.org/debian/') lister.db_session.add(d) areas = [] for distribution_name in ['stretch']: for area_name in ['main', 'contrib', 'non-free']: areas.append(Area( name='%s/%s' % (distribution_name, area_name), distribution=d, )) lister.db_session.add_all(areas) lister.db_session.commit() elif lister == 'pypi': from .pypi.models import ModelBase from .pypi.lister import PyPILister _lister = PyPILister(override_config=override_conf) elif lister == 'npm': from .npm.models import IndexingModelBase as ModelBase from .npm.models import NpmVisitModel from .npm.lister import NpmLister _lister = NpmLister(override_config=override_conf) if drop_tables: NpmVisitModel.metadata.drop_all(_lister.db_engine) NpmVisitModel.metadata.create_all(_lister.db_engine) else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % (lister, SUPPORTED_LISTERS)) if drop_tables: logger.info('Dropping tables for %s', lister) ModelBase.metadata.drop_all(_lister.db_engine) logger.info('Creating tables for %s', lister) ModelBase.metadata.create_all(_lister.db_engine) if insert_minimum_data: logger.info('Inserting minimal data for %s', lister) try: insert_minimum_data(_lister) except Exception: logger.warning( - 'Failed to insert minumum data in %s', lister) + 'Failed to insert minimum data in %s', lister) if __name__ == '__main__': cli() diff --git a/version.txt b/version.txt index 00a8745..dd34fe2 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.19-0-g1756e2e \ No newline at end of file +v0.0.20-0-g2a588d2 \ No newline at end of file