Changeset View
Standalone View
swh/lister/core/lister_transports.py
# Copyright (C) 2017-2018 the Software Heritage developers | # Copyright (C) 2017-2018 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import abc | import abc | ||||
import random | import random | ||||
from datetime import datetime | from datetime import datetime | ||||
from email.utils import parsedate | from email.utils import parsedate | ||||
from pprint import pformat | from pprint import pformat | ||||
import logging | import logging | ||||
import requests | import requests | ||||
import xmltodict | import xmltodict | ||||
from typing import Optional, Union | from typing import Any, Mapping, Optional, Union | ||||
from swh.lister import USER_AGENT_TEMPLATE, __version__ | from swh.lister import USER_AGENT_TEMPLATE, __version__ | ||||
from .abstractattribute import AbstractAttribute | from .abstractattribute import AbstractAttribute | ||||
from .lister_base import FetchError | from .lister_base import FetchError | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class ListerHttpTransport(abc.ABC): | class ListerHttpTransport(abc.ABC): | ||||
"""Use the Requests library for making Lister endpoint requests. | """Use the Requests library for making Lister endpoint requests. | ||||
To be used in conjunction with ListerBase or a subclass of it. | To be used in conjunction with ListerBase or a subclass of it. | ||||
""" | """ | ||||
DEFAULT_URL = None # type: Optional[str] | DEFAULT_URL = None # type: Optional[str] | ||||
PATH_TEMPLATE = \ | PATH_TEMPLATE = \ | ||||
AbstractAttribute( | AbstractAttribute( | ||||
'string containing a python string format pattern that produces' | 'string containing a python string format pattern that produces' | ||||
' the API endpoint path for listing stored repositories when given' | ' the API endpoint path for listing stored repositories when given' | ||||
' an index, e.g., "/repositories?after=%s". To be implemented in' | ' an index, e.g., "/repositories?after=%s". To be implemented in' | ||||
' the API-specific class inheriting this.' | ' the API-specific class inheriting this.' | ||||
) # type: Union[AbstractAttribute, Optional[str]] | ) # type: Union[AbstractAttribute, Optional[str]] | ||||
EXPECTED_STATUS_CODES = (200, 429, 403, 404) | EXPECTED_STATUS_CODES = (200, 429, 401, 403, 404) | ||||
creds: Optional[Mapping[str, Any]] = None | |||||
def request_headers(self): | def request_headers(self): | ||||
"""Returns dictionary of any request headers needed by the server. | """Returns dictionary of any request headers needed by the server. | ||||
MAY BE OVERRIDDEN if request headers are needed. | MAY BE OVERRIDDEN if request headers are needed. | ||||
""" | """ | ||||
return { | return { | ||||
'User-Agent': USER_AGENT_TEMPLATE % self.lister_version | 'User-Agent': USER_AGENT_TEMPLATE % self.lister_version | ||||
} | } | ||||
def request_instance_credentials(self): | def request_instance_credentials(self) -> Mapping[str, Any]: | ||||
"""Returns dictionary of any credentials configuration needed by the | """Returns dictionary of any credentials configuration needed by the | ||||
forge instance to list. | forge instance to list. | ||||
The 'credentials' configuration is expected to be a dict of multiple | The 'credentials' configuration is expected to be a dict of multiple | ||||
levels. The first level is the lister's name, the second is the | levels. The first level is the lister's name, the second is the | ||||
lister's instance name, which value is expected to be a list of | lister's instance name, which value is expected to be a list of | ||||
credential structures (typically a couple username/password). | credential structures (typically a couple username/password). | ||||
For example: | For example: | ||||
credentials: | credentials: | ||||
github: # github lister | github: # github lister | ||||
github: # has only one instance (so far) | github: # has only one instance (so far) | ||||
- username: some | - username: some | ||||
password: somekey | password: somekey | ||||
- username: one | - username: one | ||||
password: onekey | password: onekey | ||||
- ... | - ... | ||||
gitlab: # gitlab lister | gitlab: # gitlab lister | ||||
riseup: # has many instances | riseup: # has many instances | ||||
- username: someone | - username: someone | ||||
password: ... | password: ... | ||||
- ... | - username: another | ||||
... | |||||
gitlab: | gitlab: | ||||
- username: someone | - username: someone | ||||
password: ... | password: ... | ||||
- ... | |||||
Returns: | Returns: | ||||
list of credential dicts for the current lister. | Dict of credential dicts (key: login, value: dict) for the current | ||||
lister. For example, for lister 'gitlab', instance 'riseup': | |||||
{ | |||||
someone: {'username': 'someone', "password": ...}, | |||||
another: {'username': 'another', "password": ...}, | |||||
... | |||||
} | |||||
""" | """ | ||||
all_creds = self.config.get('credentials') | if not self.creds: | ||||
ardumont: `type: ignore` because otherwise it complains about the `config` not being declared for that… | |||||
all_creds = self.config.get('credentials') # type: ignore | |||||
if not all_creds: | if not all_creds: | ||||
return [] | return {} | ||||
lister_creds = all_creds.get(self.LISTER_NAME, {}) | lister_creds = all_creds.get(self.LISTER_NAME, {}) # type: ignore | ||||
creds = lister_creds.get(self.instance, []) | instance_creds = lister_creds.get( | ||||
return creds | self.instance, []) # type: ignore | ||||
creds = {} | |||||
for cred in instance_creds: | |||||
creds[cred['username']] = cred | |||||
self.creds = creds | |||||
return self.creds | |||||
def request_uri(self, identifier): | def request_uri(self, identifier): | ||||
"""Get the full request URI given the transport_request identifier. | """Get the full request URI given the transport_request identifier. | ||||
MAY BE OVERRIDDEN if something more complex than the PATH_TEMPLATE is | MAY BE OVERRIDDEN if something more complex than the PATH_TEMPLATE is | ||||
required. | required. | ||||
""" | """ | ||||
path = self.PATH_TEMPLATE % identifier | path = self.PATH_TEMPLATE % identifier | ||||
Show All 10 Lines | def request_params(self, identifier): | ||||
is needed. | is needed. | ||||
""" | """ | ||||
params = {} | params = {} | ||||
params['headers'] = self.request_headers() or {} | params['headers'] = self.request_headers() or {} | ||||
creds = self.request_instance_credentials() | creds = self.request_instance_credentials() | ||||
if not creds: | if not creds: | ||||
return params | return params | ||||
auth = random.choice(creds) if creds else None | login = random.choice(list(creds)) if creds else None | ||||
if auth: | if login: | ||||
auth = creds[login] | |||||
params['auth'] = (auth['username'], auth['password']) | params['auth'] = (auth['username'], auth['password']) | ||||
return params | return params | ||||
Not Done Inline ActionsI don't really understand the semantic of 'reset' in the method (at first glance). Why this 'reset' does remove a token? Remove from what? Why/when is this useful or needed? I mean credentials are loaded by the 'request_instance_credentials' method. Now this method (I guess called when a credential usage generated errors ; not yet read the remaining of the code), will gradually remove entries from the list of known credentials. So each time an error occurs this list will be pop'd. Not sure I like this behavior very much. It might be better to be able to temporarily disable a credential. Then check on a regular base if the credential can be enabled again. Also, the credential 'status' is kept local to the class. Whereas we have many workers. It might be useful to think about a solution for this state to be shared among workers (using memcache, etcd or similar). Not to be done right now in this diff, obviously :D douardda: I don't really understand the semantic of 'reset' in the method (at first glance).
Why this… | |||||
Done Inline Actions
yes, remove_authentication_token sounds better now.
From the current setup initially loaded (from the configuration file) at the task startup.
That'd be the task linked in the description T2099. ardumont: > Why this 'reset' does remove a token?
yes,` remove_authentication_token` sounds better now. | |||||
def transport_quota_check(self, response): | def transport_quota_check(self, response): | ||||
"""Implements ListerBase.transport_quota_check with standard 429 | """Implements ListerBase.transport_quota_check with standard 429 | ||||
code check for HTTP with Requests library. | code check for HTTP with Requests library. | ||||
MAY BE OVERRIDDEN if the server notifies about rate limits in a | MAY BE OVERRIDDEN if the server notifies about rate limits in a | ||||
non-standard way that doesn't use HTTP 429 and the Retry-After | non-standard way that doesn't use HTTP 429 and the Retry-After | ||||
response header. ( https://tools.ietf.org/html/rfc6585#section-4 ) | response header. ( https://tools.ietf.org/html/rfc6585#section-4 ) | ||||
▲ Show 20 Lines • Show All 104 Lines • Show Last 20 Lines |
type: ignore because otherwise it complains about the config not being declared for that class...
which is also true...
but...
let's refactor this another day.
(same goes for the other type: ignore below)