diff --git a/swh/lister/gitlab/__init__.py b/swh/lister/gitlab/__init__.py new file mode 100644 diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gitlab/lister.py @@ -0,0 +1,55 @@ +# Copyright (C) 2018 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import re +import time + +from ..core.indexing_lister import SWHIndexingHttpLister +from .models import GitlabModel + + +class GitlabLister(SWHIndexingHttpLister): + # Path to give and mentioning the last id for the next page + PATH_TEMPLATE = '/projects?since=%d&visibility=public' + # base orm model + MODEL = GitlabModel + API_URL_INDEX_RE = re.compile(r'^.*/projects\?since=(\d+)&visibility=public') + + def get_model_from_repo(self, repo): + return { + 'uid': repo['id'], + 'indexable': repo['id'], + 'name': repo['name'], + 'full_name': repo['path_with_namespace'], + 'html_url': repo['web_url'], + 'origin_url': repo['web_url'], + 'origin_type': 'git', + 'description': repo['description'], + # FIXME: How to determine the fork nature? + 'fork': False, + } + + def transport_quota_check(self, response): + """Deal with ratelimit + + """ + reqs_remaining = int(response.headers['RateLimit-Remaining']) + if response.status_code == 403 and reqs_remaining == 0: + reset_at = int(response.headers['RateLimit-Reset']) + delay = min(reset_at - time.time(), 3600) + return True, delay + return False, 0 + + def get_next_target_from_response(self, response): + """Deal with pagination + + """ + if 'next' in response.links: + next_url = response.links['next']['url'] + return int(self.API_URL_INDEX_RE.match(next_url).group(1)) + return None + + def transport_response_simplified(self, response): + repos = response.json() + return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/gitlab/models.py @@ -0,0 +1,20 @@ +# Copyright (C) 2018 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, Boolean, Integer + +from ..core.models import ModelBase + + +class GitlabModel(ModelBase): + """a Gitlab repository""" + __tablename__ = 'main_gitlab_repos' + + uid = Column(Integer, primary_key=True) + indexable = Column(Integer, index=True) + fork = Column(Boolean) + + def __init__(self, *args, **kwargs): + self.fork = kwargs.pop('fork', False) + super().__init__(*args, **kwargs) diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gitlab/tasks.py @@ -0,0 +1,28 @@ +# Copyright (C) 2018 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.core.tasks import (IndexingDiscoveryListerTask, + IndexingRangeListerTask, + IndexingRefreshListerTask, ListerTaskBase) + +from .lister import GitlabLister + + +class GitlabDotComListerTask(ListerTaskBase): + def new_lister(self): + return GitlabLister(lister_name='gitlab.com', + api_baseurl='https://gitlab.com/api/v4') + + +class IncrementalGitlabDotComLister(GitlabDotComListerTask, + IndexingDiscoveryListerTask): + task_queue = 'swh_lister_gitlab_discover' + + +class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask): + task_queue = 'swh_lister_gitlab_refresh' + + +class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask): + task_queue = 'swh_lister_gitlab_refresh'