diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index ac3cb31..3824b23 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,124 +1,120 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random import time from .. import utils from ..core.paging_lister import PageByPageHttpLister from .models import GitLabModel class GitLabLister(PageByPageHttpLister): # Template path expecting an integer that represents the page id PATH_TEMPLATE = '/projects?page=%d&order_by=id' MODEL = GitLabModel LISTER_NAME = 'gitlab' def __init__(self, api_baseurl=None, instance=None, override_config=None, sort='asc'): super().__init__(api_baseurl=api_baseurl, override_config=override_config) self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort) @property def ADDITIONAL_CONFIG(self): """Override additional config as the 'credentials' structure change between the ancestor classes and this class. cf. request_params method below """ default_config = super().ADDITIONAL_CONFIG # 'credentials' is a dict of (instance, {username, password}) dict default_config['credentials'] = ('dict', {}) return default_config def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. For the gitlab lister, the 'credentials' entries is configured per instance. For example: - credentials: - gitlab.com: - username: user0 password: - username: user1 password: - ... - other-gitlab-instance: ... """ params = { 'headers': self.request_headers() or {} } # Retrieve the credentials per instance creds = self.config['credentials'] if creds: creds_lister = creds[self.instance] auth = random.choice(creds_lister) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) return params def uid(self, repo): return '%s/%s' % (self.instance, repo['path_with_namespace']) def get_model_from_repo(self, repo): return { 'instance': self.instance, 'uid': self.uid(repo), 'name': repo['name'], 'full_name': repo['path_with_namespace'], 'html_url': repo['web_url'], 'origin_url': repo['http_url_to_repo'], 'origin_type': 'git', 'description': repo['description'], } def transport_quota_check(self, response): """Deal with rate limit if any. """ # not all gitlab instance have rate limit if 'RateLimit-Remaining' in response.headers: reqs_remaining = int(response.headers['RateLimit-Remaining']) if response.status_code == 403 and reqs_remaining == 0: reset_at = int(response.headers['RateLimit-Reset']) delay = min(reset_at - time.time(), 3600) return True, delay return False, 0 + def _get_int(self, headers, key): + _val = headers.get(key) + if _val: + return int(_val) + def get_next_target_from_response(self, response): """Determine the next page identifier. """ - _next = utils.get(response.headers, ['X-Next-Page', 'x-next-page']) - if _next: - return int(_next) + return self._get_int(response.headers, 'x-next-page') def get_pages_information(self): """Determine pages information. """ response = self.transport_head(identifier=1) h = response.headers - total = utils.get(h, ['X-Total', 'x-total']) - total_pages = utils.get(h, ['X-Total-Pages', 'x-total-pages']) - per_page = utils.get(h, ['X-Per-Page', 'x-per-page']) - if total is not None: - total = int(total) - if total_pages is not None: - total_pages = int(total_pages) - if per_page is not None: - per_page = int(per_page) - return total, total_pages, per_page + return (self._get_int(h, 'x-total'), + self._get_int(h, 'x-total-pages'), + self._get_int('x-per-page')) def transport_response_simplified(self, response): repos = response.json() return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py index 3bd8939..978127a 100644 --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -1,67 +1,28 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from swh.lister import utils class UtilsTest(unittest.TestCase): - @istest - def get(self): - data = { - 'X-Next-Page': None, - 'x-next-page': 1, - } - actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) - - self.assertEqual(actual_value, 1) - - data = { - 'X-Next-Page': 10, - 'x-next-page': 1, - } - actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) - - self.assertEqual(actual_value, 10) - - data = { - 'x-next-page': 100, - } - actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) - - self.assertEqual(actual_value, 100) - - @istest - def get_empty(self): - self.assertIsNone(utils.get({}, [])) - self.assertIsNone(utils.get({'a': 1}, ['b'])) - self.assertIsNone(utils.get({'b': 2}, [])) - self.assertIsNone(utils.get({'b': 2}, [])) - - @istest - def get_errors(self): - with self.assertRaises(TypeError): - self.assertIsNone(utils.get({}, None)) - with self.assertRaises(AttributeError): - self.assertIsNone(utils.get(None, ['a'])) - @istest def split_range(self): actual_ranges = list(utils.split_range(14, 5)) self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)]) actual_ranges = list(utils.split_range(19, 10)) self.assertEqual(actual_ranges, [(0, 10), (10, 19)]) @istest def split_range_errors(self): with self.assertRaises(TypeError): list(utils.split_range(None, 1)) with self.assertRaises(TypeError): list(utils.split_range(100, None)) diff --git a/swh/lister/utils.py b/swh/lister/utils.py index fba2d23..68e8b82 100644 --- a/swh/lister/utils.py +++ b/swh/lister/utils.py @@ -1,25 +1,14 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -def get(d, keys): - """Given a dict, lookup in order for keys with values not None. - - """ - for key in keys: - v = d.get(key) - if v is not None: - return v - return None - - def split_range(total_pages, nb_pages): prev_index = None for index in range(0, total_pages, nb_pages): if index is not None and prev_index is not None: yield prev_index, index prev_index = index if index != total_pages: yield index, total_pages