diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -12,7 +12,7 @@ import requests import xmltodict -from typing import Optional, Union +from typing import Any, Dict, Mapping, Optional, Tuple, Union from swh.lister import USER_AGENT_TEMPLATE, __version__ @@ -37,7 +37,11 @@ ' the API-specific class inheriting this.' ) # type: Union[AbstractAttribute, Optional[str]] - EXPECTED_STATUS_CODES = (200, 429, 403, 404) + EXPECTED_STATUS_CODES = (200, 429, 401, 403, 404) + # All instance credentials to use + creds: Optional[Dict[str, Any]] = None + # The current elected authentication tuple + auth: Optional[Tuple[str, str]] = None def request_headers(self): """Returns dictionary of any request headers needed by the server. @@ -48,7 +52,7 @@ 'User-Agent': USER_AGENT_TEMPLATE % self.lister_version } - def request_instance_credentials(self): + def request_instance_credentials(self) -> Mapping[str, Any]: """Returns dictionary of any credentials configuration needed by the forge instance to list. @@ -71,22 +75,37 @@ riseup: # has many instances - username: someone password: ... - - ... + - username: another + ... gitlab: - username: someone password: ... - - ... Returns: - list of credential dicts for the current lister. + Dict of credential dicts (key: login, value: dict) for the current + lister. For example, for lister 'gitlab', instance 'riseup': + + { + someone: {'username': 'someone', "password": ...}, + another: {'username': 'another', "password": ...}, + ... + } + """ - all_creds = self.config.get('credentials') - if not all_creds: - return [] - lister_creds = all_creds.get(self.LISTER_NAME, {}) - creds = lister_creds.get(self.instance, []) - return creds + if self.creds is None: + all_creds = self.config.get('credentials') # type: ignore + if not all_creds: + return {} + lister_creds = all_creds.get(self.LISTER_NAME, {}) # type: ignore + instance_creds = lister_creds.get( + self.instance, []) # type: ignore + creds = {} + for cred in instance_creds: + creds[cred['username']] = cred + self.creds = creds + + return self.creds def request_uri(self, identifier): """Get the full request URI given the transport_request identifier. @@ -113,11 +132,33 @@ creds = self.request_instance_credentials() if not creds: return params - auth = random.choice(creds) if creds else None - if auth: - params['auth'] = (auth['username'], auth['password']) + if self.auth: # Already elected token, reuse + params['auth'] = self.auth + else: + login = random.choice(list(creds)) if creds else None + if login: + auth = creds[login] + self.auth = (auth['username'], auth['password']) + params['auth'] = self.auth return params + def reset_authentication_token(self) -> bool: + """Reset the current authentication token if any. + + Returns + True if the reset is actually effective (token has been removed), + False otherwise. + + """ + effective_reset = False + if self.auth is not None and self.creds: + login = self.auth[0] + actual_token = self.creds.get(login) + if actual_token == self.auth: + self.creds.pop(login) + effective_reset = True + return effective_reset + def transport_quota_check(self, response): """Implements ListerBase.transport_quota_check with standard 429 code check for HTTP with Requests library. diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -34,14 +34,31 @@ } def transport_quota_check(self, response): + """Check for rate limit usage + + """ + def delay(response) -> float: + """Compute next delay query tryout given the current response""" + reset_at = int(response.headers['X-RateLimit-Reset']) + delay = min(reset_at - time.time(), 3600) + return delay + x_rate_limit_remaining = response.headers.get('X-RateLimit-Remaining') if not x_rate_limit_remaining: return False, 0 reqs_remaining = int(x_rate_limit_remaining) if response.status_code == 403 and reqs_remaining == 0: - reset_at = int(response.headers['X-RateLimit-Reset']) - delay = min(reset_at - time.time(), 3600) - return True, delay + return True, delay(response) + if response.status_code == 401: + data = response.json() + if data['message'] == 'Bad Credentials': + # The authentication token used is possibly expired + effective_reset = self.reset_authentication_token() + if effective_reset: + # Remove it from the configuration and try again + # immediately with another + return False, 0 + return True, delay(response) return False, 0 def get_next_target_from_response(self, response): diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -47,8 +47,9 @@ if not creds: raise ValueError( 'Phabricator forge needs authentication credential to list.') - api_token = random.choice(creds)['password'] + login = random.choice(list(creds)) + api_token = creds[login]['password'] return {'headers': self.request_headers() or {}, 'params': {'api.token': api_token}} diff --git a/swh/lister/phabricator/tests/conftest.py b/swh/lister/phabricator/tests/conftest.py --- a/swh/lister/phabricator/tests/conftest.py +++ b/swh/lister/phabricator/tests/conftest.py @@ -18,6 +18,7 @@ 'credentials': { 'phabricator': { lister.instance: [{ + 'username': 'user', 'password': 'foo' }] }} diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -49,7 +49,7 @@ """ if override_config or self.fl is None: credentials = {'phabricator': {'fake': [ - {'password': 'toto'} + {'username': 'fake', 'password': 'toto'} ]}} override_config = dict(credentials=credentials, **(override_config or {})) @@ -96,6 +96,7 @@ fl.config['credentials'] = { 'phabricator': { 'other_fake': [{ + 'username': 'fake-user', 'password': 'foo' }] }