diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 60b5320..6e01005 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,81 +1,82 @@ -# Copyright (C) 2018-2019 the Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time from urllib3.util import parse_url from ..core.page_by_page_lister import PageByPageHttpLister from .models import GitLabModel class GitLabLister(PageByPageHttpLister): # Template path expecting an integer that represents the page id PATH_TEMPLATE = '/projects?page=%d&order_by=id' DEFAULT_URL = 'https://gitlab.com/api/v4/' MODEL = GitLabModel LISTER_NAME = 'gitlab' def __init__(self, url=None, instance=None, override_config=None, sort='asc', per_page=20): super().__init__(url=url, override_config=override_config) if instance is None: instance = parse_url(self.url).host self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % ( self.PATH_TEMPLATE, sort, per_page) def uid(self, repo): return '%s/%s' % (self.instance, repo['path_with_namespace']) def get_model_from_repo(self, repo): return { 'instance': self.instance, 'uid': self.uid(repo), 'name': repo['name'], 'full_name': repo['path_with_namespace'], 'html_url': repo['web_url'], 'origin_url': repo['http_url_to_repo'], 'origin_type': 'git', } def transport_quota_check(self, response): """Deal with rate limit if any. """ # not all gitlab instance have rate limit if 'RateLimit-Remaining' in response.headers: reqs_remaining = int(response.headers['RateLimit-Remaining']) if response.status_code == 403 and reqs_remaining == 0: reset_at = int(response.headers['RateLimit-Reset']) delay = min(reset_at - time.time(), 3600) return True, delay return False, 0 def _get_int(self, headers, key): _val = headers.get(key) if _val: return int(_val) def get_next_target_from_response(self, response): """Determine the next page identifier. """ return self._get_int(response.headers, 'x-next-page') def get_pages_information(self): """Determine pages information. """ response = self.transport_head(identifier=1) if not response.ok: raise ValueError( 'Problem during information fetch: %s' % response.status_code) h = response.headers return (self._get_int(h, 'x-total'), self._get_int(h, 'x-total-pages'), self._get_int(h, 'x-per-page')) def transport_response_simplified(self, response): repos = response.json() return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/gitlab/tests/conftest.py b/swh/lister/gitlab/tests/conftest.py index 507fef9..2426987 100644 --- a/swh/lister/gitlab/tests/conftest.py +++ b/swh/lister/gitlab/tests/conftest.py @@ -1 +1,6 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/gitlab/tests/api_empty_response.json b/swh/lister/gitlab/tests/data/gitlab.com/api_empty_response.json similarity index 100% rename from swh/lister/gitlab/tests/api_empty_response.json rename to swh/lister/gitlab/tests/data/gitlab.com/api_empty_response.json diff --git a/swh/lister/gitlab/tests/data/gitlab.com/api_response.json b/swh/lister/gitlab/tests/data/gitlab.com/api_response.json new file mode 120000 index 0000000..4a9356e --- /dev/null +++ b/swh/lister/gitlab/tests/data/gitlab.com/api_response.json @@ -0,0 +1 @@ +api_v4__projects,page=0,order_by=id,sort=asc,per_page=20 \ No newline at end of file diff --git a/swh/lister/gitlab/tests/api_response.json b/swh/lister/gitlab/tests/data/gitlab.com/api_v4__projects,page=0,order_by=id,sort=asc,per_page=20 similarity index 100% rename from swh/lister/gitlab/tests/api_response.json rename to swh/lister/gitlab/tests/data/gitlab.com/api_v4__projects,page=0,order_by=id,sort=asc,per_page=20 diff --git a/swh/lister/gitlab/tests/test_gitlab_lister.py b/swh/lister/gitlab/tests/test_lister.py similarity index 55% rename from swh/lister/gitlab/tests/test_gitlab_lister.py rename to swh/lister/gitlab/tests/test_lister.py index 9e43816..2201bea 100644 --- a/swh/lister/gitlab/tests/test_gitlab_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -1,38 +1,68 @@ -# Copyright (C) 2017-2019 the Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import logging import re import unittest from datetime import datetime, timedelta from swh.lister.core.tests.test_lister import HttpListerTesterBase from swh.lister.gitlab.lister import GitLabLister +logger = logging.getLogger(__name__) + + class GitLabListerTester(HttpListerTesterBase, unittest.TestCase): Lister = GitLabLister test_re = re.compile(r'^.*/projects.*page=(\d+).*') lister_subdir = 'gitlab' - good_api_response_file = 'api_response.json' - bad_api_response_file = 'api_empty_response.json' + good_api_response_file = 'data/gitlab.com/api_response.json' + bad_api_response_file = 'data/gitlab.com/api_empty_response.json' first_index = 1 entries_per_page = 10 convert_type = int def response_headers(self, request): headers = {'RateLimit-Remaining': '1'} if self.request_index(request) == self.first_index: headers.update({ 'x-next-page': '3', }) return headers def mock_rate_quota(self, n, request, context): self.rate_limit += 1 context.status_code = 403 context.headers['RateLimit-Remaining'] = '0' one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp()) context.headers['RateLimit-Reset'] = str(one_second) return '{"error":"dummy"}' + + +def test_lister_gitlab(swh_listers, requests_mock_datadir): + lister = swh_listers['gitlab'] + + lister.run() + + r = lister.scheduler.search_tasks(task_type='load-git') + assert len(r) == 10 + + for row in r: + assert row['type'] == 'load-git' + # arguments check + args = row['arguments']['args'] + assert len(args) == 1 + + url = args[0] + assert url.startswith('https://gitlab.com') + + # kwargs + kwargs = row['arguments']['kwargs'] + assert kwargs == {} + + assert row['policy'] == 'recurring' + assert row['priority'] is None