Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123180
D352.id1126.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D352.id1126.diff
View Options
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cli.py
@@ -0,0 +1,98 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+
+
+CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.option(
+ '--db-url', '-d', default='postgres:///lister-gitlab.com',
+ help='SQLAlchemy DB URL; see '
+ '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa
+@click.pass_context
+def cli(ctx, db_url):
+ """Initialize db model according to lister.
+
+ """
+ config = {}
+ if db_url:
+ config['db_url'] = db_url
+ ctx.obj = config
+
+
+@cli.command('github')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def github(ctx, createdb, dropdb):
+ from .github import models
+ from .github.lister import GitHubLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitHubLister(lister_name='github.com',
+ api_baseurl='https://api.github.com',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('gitlab')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def gitlab(ctx, createdb, dropdb):
+ from .gitlab import models
+ from .gitlab.lister import GitlabLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitlabLister(lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4/',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('bitbucket')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def bitbucket(ctx, createdb, dropdb):
+ from .bitbucket import models
+ from .bitbucket.lister import BitBucketLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = BitBucketLister(lister_name='bitbucket.com',
+ api_baseurl='https://api.bitbucket.org/2.0',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/swh/lister/gitlab/__init__.py b/swh/lister/gitlab/__init__.py
new file mode 100644
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/lister.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+import time
+
+from ..core.indexing_lister import SWHIndexingHttpLister
+from .models import GitlabModel
+
+
+class GitlabLister(SWHIndexingHttpLister):
+ # Path to give and mentioning the last id for the next page
+ PATH_TEMPLATE = '/projects?page=%d'
+ # gitlab api do not have an indexable identifier so using the page
+ # id
+ API_URL_INDEX_RE = re.compile(r'^.*/projects.*\&page=(\d+).*')
+ # The indexable field, the one we are supposed to use in the api
+ # query is not part of the lookup query. So, we cannot filter
+ # (method filter_before_inject), nor detect and disable origins
+ # (method disable_deleted_repo_tasks)
+ MODEL = GitlabModel
+
+ def filter_before_inject(self, models_list):
+ """We cannot filter so returns the models_list as is.
+
+ """
+ return models_list
+
+ def get_model_from_repo(self, repo):
+ return {
+ 'uid': repo['id'],
+ 'indexable': repo['id'],
+ 'name': repo['name'],
+ 'full_name': repo['path_with_namespace'],
+ 'html_url': repo['web_url'],
+ 'origin_url': repo['http_url_to_repo'],
+ 'origin_type': 'git',
+ 'description': repo['description'],
+ # FIXME: How to determine the fork nature? Do we need that
+ # information? Variable `repo` holds a `count_fork` key
+ # which is the number of forks for that
+ # repository. Default to False for now.
+ 'fork': False,
+ }
+
+ def transport_quota_check(self, response):
+ """Deal with rate limit
+
+ """
+ reqs_remaining = int(response.headers['RateLimit-Remaining'])
+ # TODO: need to dig further about the actual returned code
+ # (not seen yet in documentation)
+ if response.status_code == 403 and reqs_remaining == 0:
+ reset_at = int(response.headers['RateLimit-Reset'])
+ delay = min(reset_at - time.time(), 3600)
+ return True, delay
+ return False, 0
+
+ def get_next_target_from_response(self, response):
+ """Deal with pagination
+
+ """
+ if 'next' in response.links:
+ next_url = response.links['next']['url']
+ return int(self.API_URL_INDEX_RE.match(next_url).group(1))
+ return None
+
+ def transport_response_simplified(self, response):
+ repos = response.json()
+ return [self.get_model_from_repo(repo) for repo in repos]
diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/models.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, Boolean, Integer
+
+from ..core.models import ModelBase
+
+
+class GitlabModel(ModelBase):
+ """a Gitlab repository"""
+ __tablename__ = 'main_gitlab_repos'
+
+ uid = Column(Integer, primary_key=True)
+ indexable = Column(Integer, index=True)
+ fork = Column(Boolean)
+
+ def __init__(self, *args, **kwargs):
+ self.fork = kwargs.pop('fork', False)
+ super().__init__(*args, **kwargs)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tasks.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
+ IndexingRangeListerTask,
+ IndexingRefreshListerTask, ListerTaskBase)
+
+from .lister import GitlabLister
+
+
+class GitlabDotComListerTask(ListerTaskBase):
+ def new_lister(self):
+ return GitlabLister(lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4')
+
+
+class IncrementalGitlabDotComLister(GitlabDotComListerTask,
+ IndexingDiscoveryListerTask):
+ task_queue = 'swh_lister_gitlab_discover'
+
+
+class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask):
+ task_queue = 'swh_lister_gitlab_refresh'
+
+
+class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask):
+ task_queue = 'swh_lister_gitlab_refresh'
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 1:47 AM (1 d, 23 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231609
Attached To
D352: Bootstrap gitlab lister
Event Timeline
Log In to Comment