Page MenuHomeSoftware Heritage

D352.id1126.diff
No OneTemporary

D352.id1126.diff

diff --git a/swh/lister/cli.py b/swh/lister/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cli.py
@@ -0,0 +1,98 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+
+
+CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.option(
+ '--db-url', '-d', default='postgres:///lister-gitlab.com',
+ help='SQLAlchemy DB URL; see '
+ '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa
+@click.pass_context
+def cli(ctx, db_url):
+ """Initialize db model according to lister.
+
+ """
+ config = {}
+ if db_url:
+ config['db_url'] = db_url
+ ctx.obj = config
+
+
+@cli.command('github')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def github(ctx, createdb, dropdb):
+ from .github import models
+ from .github.lister import GitHubLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitHubLister(lister_name='github.com',
+ api_baseurl='https://api.github.com',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('gitlab')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def gitlab(ctx, createdb, dropdb):
+ from .gitlab import models
+ from .gitlab.lister import GitlabLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitlabLister(lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4/',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('bitbucket')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def bitbucket(ctx, createdb, dropdb):
+ from .bitbucket import models
+ from .bitbucket.lister import BitBucketLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = BitBucketLister(lister_name='bitbucket.com',
+ api_baseurl='https://api.bitbucket.org/2.0',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/swh/lister/gitlab/__init__.py b/swh/lister/gitlab/__init__.py
new file mode 100644
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/lister.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+import time
+
+from ..core.indexing_lister import SWHIndexingHttpLister
+from .models import GitlabModel
+
+
+class GitlabLister(SWHIndexingHttpLister):
+ # Path to give and mentioning the last id for the next page
+ PATH_TEMPLATE = '/projects?page=%d'
+ # gitlab api do not have an indexable identifier so using the page
+ # id
+ API_URL_INDEX_RE = re.compile(r'^.*/projects.*\&page=(\d+).*')
+ # The indexable field, the one we are supposed to use in the api
+ # query is not part of the lookup query. So, we cannot filter
+ # (method filter_before_inject), nor detect and disable origins
+ # (method disable_deleted_repo_tasks)
+ MODEL = GitlabModel
+
+ def filter_before_inject(self, models_list):
+ """We cannot filter so returns the models_list as is.
+
+ """
+ return models_list
+
+ def get_model_from_repo(self, repo):
+ return {
+ 'uid': repo['id'],
+ 'indexable': repo['id'],
+ 'name': repo['name'],
+ 'full_name': repo['path_with_namespace'],
+ 'html_url': repo['web_url'],
+ 'origin_url': repo['http_url_to_repo'],
+ 'origin_type': 'git',
+ 'description': repo['description'],
+ # FIXME: How to determine the fork nature? Do we need that
+ # information? Variable `repo` holds a `count_fork` key
+ # which is the number of forks for that
+ # repository. Default to False for now.
+ 'fork': False,
+ }
+
+ def transport_quota_check(self, response):
+ """Deal with rate limit
+
+ """
+ reqs_remaining = int(response.headers['RateLimit-Remaining'])
+ # TODO: need to dig further about the actual returned code
+ # (not seen yet in documentation)
+ if response.status_code == 403 and reqs_remaining == 0:
+ reset_at = int(response.headers['RateLimit-Reset'])
+ delay = min(reset_at - time.time(), 3600)
+ return True, delay
+ return False, 0
+
+ def get_next_target_from_response(self, response):
+ """Deal with pagination
+
+ """
+ if 'next' in response.links:
+ next_url = response.links['next']['url']
+ return int(self.API_URL_INDEX_RE.match(next_url).group(1))
+ return None
+
+ def transport_response_simplified(self, response):
+ repos = response.json()
+ return [self.get_model_from_repo(repo) for repo in repos]
diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/models.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, Boolean, Integer
+
+from ..core.models import ModelBase
+
+
+class GitlabModel(ModelBase):
+ """a Gitlab repository"""
+ __tablename__ = 'main_gitlab_repos'
+
+ uid = Column(Integer, primary_key=True)
+ indexable = Column(Integer, index=True)
+ fork = Column(Boolean)
+
+ def __init__(self, *args, **kwargs):
+ self.fork = kwargs.pop('fork', False)
+ super().__init__(*args, **kwargs)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tasks.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
+ IndexingRangeListerTask,
+ IndexingRefreshListerTask, ListerTaskBase)
+
+from .lister import GitlabLister
+
+
+class GitlabDotComListerTask(ListerTaskBase):
+ def new_lister(self):
+ return GitlabLister(lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4')
+
+
+class IncrementalGitlabDotComLister(GitlabDotComListerTask,
+ IndexingDiscoveryListerTask):
+ task_queue = 'swh_lister_gitlab_discover'
+
+
+class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask):
+ task_queue = 'swh_lister_gitlab_refresh'
+
+
+class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask):
+ task_queue = 'swh_lister_gitlab_refresh'

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 1:47 AM (2 d, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231609

Event Timeline