diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 3f0ed96..494a5d3 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,71 +1,71 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import time from ..core.indexing_lister import SWHIndexingHttpLister -from .models import GitlabModel +from .models import GitLabModel -class GitlabLister(SWHIndexingHttpLister): +class GitLabLister(SWHIndexingHttpLister): # Path to give and mentioning the last id for the next page PATH_TEMPLATE = '/projects?page=%d' # gitlab api do not have an indexable identifier so using the page # id API_URL_INDEX_RE = re.compile(r'^.*/projects.*\&page=(\d+).*') # The indexable field, the one we are supposed to use in the api # query is not part of the lookup query. So, we cannot filter # (method filter_before_inject), nor detect and disable origins # (method disable_deleted_repo_tasks) - MODEL = GitlabModel + MODEL = GitLabModel def filter_before_inject(self, models_list): """We cannot filter so returns the models_list as is. """ return models_list def get_model_from_repo(self, repo): return { 'uid': repo['id'], 'indexable': repo['id'], 'name': repo['name'], 'full_name': repo['path_with_namespace'], 'html_url': repo['web_url'], 'origin_url': repo['http_url_to_repo'], 'origin_type': 'git', 'description': repo['description'], # FIXME: How to determine the fork nature? Do we need that # information? Variable `repo` holds a `count_fork` key # which is the number of forks for that # repository. Default to False for now. 'fork': False, } def transport_quota_check(self, response): """Deal with rate limit """ reqs_remaining = int(response.headers['RateLimit-Remaining']) # TODO: need to dig further about the actual returned code # (not seen yet in documentation) if response.status_code == 403 and reqs_remaining == 0: reset_at = int(response.headers['RateLimit-Reset']) delay = min(reset_at - time.time(), 3600) return True, delay return False, 0 def get_next_target_from_response(self, response): """Deal with pagination """ if 'next' in response.links: next_url = response.links['next']['url'] return int(self.API_URL_INDEX_RE.match(next_url).group(1)) return None def transport_response_simplified(self, response): repos = response.json() return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py index b1c538b..65b7287 100644 --- a/swh/lister/gitlab/models.py +++ b/swh/lister/gitlab/models.py @@ -1,20 +1,20 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from sqlalchemy import Column, Boolean, Integer from ..core.models import ModelBase -class GitlabModel(ModelBase): +class GitLabModel(ModelBase): """a Gitlab repository""" __tablename__ = 'main_gitlab_repos' uid = Column(Integer, primary_key=True) indexable = Column(Integer, index=True) fork = Column(Boolean) def __init__(self, *args, **kwargs): self.fork = kwargs.pop('fork', False) super().__init__(*args, **kwargs) diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index 6686104..a8ca30a 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -1,28 +1,28 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.lister.core.tasks import (IndexingDiscoveryListerTask, IndexingRangeListerTask, IndexingRefreshListerTask, ListerTaskBase) -from .lister import GitlabLister +from .lister import GitLabLister -class GitlabDotComListerTask(ListerTaskBase): +class GitLabDotComListerTask(ListerTaskBase): def new_lister(self): - return GitlabLister(lister_name='gitlab.com', + return GitLabLister(lister_name='gitlab.com', api_baseurl='https://gitlab.com/api/v4') -class IncrementalGitlabDotComLister(GitlabDotComListerTask, +class IncrementalGitLabDotComLister(GitLabDotComListerTask, IndexingDiscoveryListerTask): task_queue = 'swh_lister_gitlab_discover' -class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask): +class RangeGitLabLister(GitLabDotComListerTask, IndexingRangeListerTask): task_queue = 'swh_lister_gitlab_refresh' -class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask): +class FullGitLabRelister(GitLabDotComListerTask, IndexingRefreshListerTask): task_queue = 'swh_lister_gitlab_refresh'