diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -12,7 +12,7 @@ import requests import xmltodict -from typing import Optional, Union +from typing import Any, Mapping, Optional, Union from swh.lister import USER_AGENT_TEMPLATE, __version__ @@ -37,7 +37,8 @@ ' the API-specific class inheriting this.' ) # type: Union[AbstractAttribute, Optional[str]] - EXPECTED_STATUS_CODES = (200, 429, 403, 404) + EXPECTED_STATUS_CODES = (200, 429, 401, 403, 404) + creds: Optional[Mapping[str, Any]] = None def request_headers(self): """Returns dictionary of any request headers needed by the server. @@ -48,7 +49,7 @@ 'User-Agent': USER_AGENT_TEMPLATE % self.lister_version } - def request_instance_credentials(self): + def request_instance_credentials(self) -> Mapping[str, Any]: """Returns dictionary of any credentials configuration needed by the forge instance to list. @@ -71,22 +72,37 @@ riseup: # has many instances - username: someone password: ... - - ... + - username: another + ... gitlab: - username: someone password: ... - - ... Returns: - list of credential dicts for the current lister. + Dict of credential dicts (key: login, value: dict) for the current + lister. For example, for lister 'gitlab', instance 'riseup': + + { + someone: {'username': 'someone', "password": ...}, + another: {'username': 'another', "password": ...}, + ... + } + """ - all_creds = self.config.get('credentials') - if not all_creds: - return [] - lister_creds = all_creds.get(self.LISTER_NAME, {}) - creds = lister_creds.get(self.instance, []) - return creds + if not self.creds: + all_creds = self.config.get('credentials') # type: ignore + if not all_creds: + return {} + lister_creds = all_creds.get(self.LISTER_NAME, {}) # type: ignore + instance_creds = lister_creds.get( + self.instance, []) # type: ignore + creds = {} + for cred in instance_creds: + creds[cred['username']] = cred + self.creds = creds + + return self.creds def request_uri(self, identifier): """Get the full request URI given the transport_request identifier. @@ -113,8 +129,9 @@ creds = self.request_instance_credentials() if not creds: return params - auth = random.choice(creds) if creds else None - if auth: + login = random.choice(list(creds)) if creds else None + if login: + auth = creds[login] params['auth'] = (auth['username'], auth['password']) return params diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -6,7 +6,7 @@ import re import time -from typing import Any +from typing import Any, Optional from swh.lister.core.indexing_lister import IndexingHttpLister from swh.lister.github.models import GitHubModel @@ -34,14 +34,44 @@ } def transport_quota_check(self, response): + """Check for rate limit usage + + """ + def delay(response) -> float: + """Compute next delay query tryout given the current response""" + reset_at = int(response.headers['X-RateLimit-Reset']) + delay = min(reset_at - time.time(), 3600) + return delay + + def credential_used(response) -> Optional[str]: + """Compute the current credential used given the current response + + """ + authorization = response.request.headers.get('Authorization') + if not authorization: + return None + authorization = authorization.split('Basic ') + from base64 import b64decode + cred_tuple = b64decode(authorization).decode('utf-8').split(':') + return cred_tuple[0] # the credential login + x_rate_limit_remaining = response.headers.get('X-RateLimit-Remaining') if not x_rate_limit_remaining: return False, 0 reqs_remaining = int(x_rate_limit_remaining) if response.status_code == 403 and reqs_remaining == 0: - reset_at = int(response.headers['X-RateLimit-Reset']) - delay = min(reset_at - time.time(), 3600) - return True, delay + return True, delay(response) + if response.status_code == 401: + data = response.json() + if data['message'] == 'Bad Credentials': + # The authentication token used is expired. Remove it from the + # configuration and try again immediately with another + if hasattr(self, 'creds'): + login = credential_used(response) + if login in self.creds: + self.creds.pop(login) + return False, 0 + return True, delay(response) return False, 0 def get_next_target_from_response(self, response): diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -47,8 +47,9 @@ if not creds: raise ValueError( 'Phabricator forge needs authentication credential to list.') - api_token = random.choice(creds)['password'] + login = random.choice(list(creds)) + api_token = creds[login]['password'] return {'headers': self.request_headers() or {}, 'params': {'api.token': api_token}} diff --git a/swh/lister/phabricator/tests/conftest.py b/swh/lister/phabricator/tests/conftest.py --- a/swh/lister/phabricator/tests/conftest.py +++ b/swh/lister/phabricator/tests/conftest.py @@ -18,6 +18,7 @@ 'credentials': { 'phabricator': { lister.instance: [{ + 'username': 'user', 'password': 'foo' }] }} diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -49,7 +49,7 @@ """ if override_config or self.fl is None: credentials = {'phabricator': {'fake': [ - {'password': 'toto'} + {'username': 'fake', 'password': 'toto'} ]}} override_config = dict(credentials=credentials, **(override_config or {})) @@ -96,6 +96,7 @@ fl.config['credentials'] = { 'phabricator': { 'other_fake': [{ + 'username': 'fake-user', 'password': 'foo' }] }