Changeset View
Standalone View
swh/lister/bitbucket/lister.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import logging | import logging | ||||
import iso8601 | import iso8601 | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
from typing import Any | from typing import Any, Dict, List, Optional, Union | ||||
from urllib import parse | from urllib import parse | ||||
from requests import Response | |||||
from swh.lister.bitbucket.models import BitBucketModel | from swh.lister.bitbucket.models import BitBucketModel | ||||
from swh.lister.core.indexing_lister import IndexingHttpLister | from swh.lister.core.indexing_lister import IndexingHttpLister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class BitBucketLister(IndexingHttpLister): | class BitBucketLister(IndexingHttpLister): | ||||
PATH_TEMPLATE = '/repositories?after=%s' | PATH_TEMPLATE = '/repositories?after=%s' | ||||
MODEL = BitBucketModel | MODEL = BitBucketModel | ||||
LISTER_NAME = 'bitbucket' | LISTER_NAME = 'bitbucket' | ||||
DEFAULT_URL = 'https://api.bitbucket.org/2.0' | DEFAULT_URL = 'https://api.bitbucket.org/2.0' | ||||
instance = 'bitbucket' | instance = 'bitbucket' | ||||
default_min_bound = datetime.fromtimestamp(0, timezone.utc) # type: Any | default_min_bound = datetime.fromtimestamp(0, timezone.utc) # type: Any | ||||
def __init__(self, url=None, override_config=None, per_page=100): | def __init__(self, url: str = None, | ||||
override_config=None, per_page: int = 100) -> None: | |||||
ardumont: If it can be None, it must be `Optional`.
So here it's Optional[str].
Same goes for… | |||||
super().__init__(url=url, override_config=override_config) | super().__init__(url=url, override_config=override_config) | ||||
per_page = self.config.get('per_page', per_page) | per_page = self.config.get('per_page', per_page) | ||||
self.PATH_TEMPLATE = '%s&pagelen=%s' % ( | self.PATH_TEMPLATE = '%s&pagelen=%s' % ( | ||||
self.PATH_TEMPLATE, per_page) | self.PATH_TEMPLATE, per_page) | ||||
def get_model_from_repo(self, repo): | def get_model_from_repo(self, repo: Dict) -> Dict[str, Any]: | ||||
return { | return { | ||||
'uid': repo['uuid'], | 'uid': repo['uuid'], | ||||
'indexable': iso8601.parse_date(repo['created_on']), | 'indexable': iso8601.parse_date(repo['created_on']), | ||||
'name': repo['name'], | 'name': repo['name'], | ||||
'full_name': repo['full_name'], | 'full_name': repo['full_name'], | ||||
'html_url': repo['links']['html']['href'], | 'html_url': repo['links']['html']['href'], | ||||
'origin_url': repo['links']['clone'][0]['href'], | 'origin_url': repo['links']['clone'][0]['href'], | ||||
'origin_type': repo['scm'], | 'origin_type': repo['scm'], | ||||
} | } | ||||
def get_next_target_from_response(self, response): | def get_next_target_from_response(self, response: Response | ||||
Not Done Inline ActionsOptional[datetime] vlorentz: `Optional[datetime]` | |||||
) -> Union[None, datetime]: | |||||
"""This will read the 'next' link from the api response if any | """This will read the 'next' link from the api response if any | ||||
and return it as a datetime. | and return it as a datetime. | ||||
Args: | Args: | ||||
response (Response): requests' response from api call | response (Response): requests' response from api call | ||||
Returns: | Returns: | ||||
next date as a datetime | next date as a datetime | ||||
""" | """ | ||||
body = response.json() | body = response.json() | ||||
next_ = body.get('next') | next_ = body.get('next') | ||||
if next_ is not None: | if next_ is not None: | ||||
next_ = parse.urlparse(next_) | next_ = parse.urlparse(next_) | ||||
return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0]) | return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0]) | ||||
Not Done Inline Actionsnot needed vlorentz: not needed | |||||
Not Done Inline ActionsI added this return case because when I annotate the function mypy raise an error about a missing return case. So I think it's a better practice to manage all return cases, but if you prefer I can remove it and just put a "type ignore". yanng23: I added this return case because when I annotate the function mypy raise an error about a… | |||||
Not Done Inline ActionsIndeed. Forget my comment then vlorentz: Indeed. Forget my comment then | |||||
return None | |||||
def transport_response_simplified(self, response): | def transport_response_simplified(self, response: Response | ||||
) -> List[Dict[str, Any]]: | |||||
repos = response.json()['values'] | repos = response.json()['values'] | ||||
return [self.get_model_from_repo(repo) for repo in repos] | return [self.get_model_from_repo(repo) for repo in repos] | ||||
def request_uri(self, identifier): | def request_uri(self, identifier: datetime) -> str: | ||||
identifier = parse.quote(identifier.isoformat()) | identifier_str = parse.quote(identifier.isoformat()) | ||||
return super().request_uri(identifier or '1970-01-01') | return super().request_uri(identifier_str or '1970-01-01') | ||||
Not Done Inline Actionswhy the type: ignore? it's possible some implementation are inconsistent. Union[datetime, <other-type>, etc...]. ardumont: why the `type: ignore`?
it's possible some implementation are inconsistent.
In that case, you… | |||||
def is_within_bounds(self, inner, lower=None, upper=None): | def is_within_bounds(self, inner: int, lower: Optional[int] = None, | ||||
upper: Optional[int] = None) -> bool: | |||||
# values are expected to be datetimes | # values are expected to be datetimes | ||||
if lower is None and upper is None: | if lower is None and upper is None: | ||||
ret = True | ret = True | ||||
elif lower is None: | elif lower is None: | ||||
ret = inner <= upper | ret = inner <= upper # type: ignore | ||||
Not Done Inline Actionsmypy found an error here; you should fix it instead of ignoring vlorentz: mypy found an error here; you should fix it instead of ignoring | |||||
Done Inline ActionsI don't know how to fix it. Mypy saids that "Unsupported operand types for <= ("int" and "None")", but due to the if case this error should not occur, it's why I choose to ignore it. How can I fix it ? yanng23: I don't know how to fix it. Mypy saids that "Unsupported operand types for <= ("int" and… | |||||
Not Done Inline ActionsOh indeed, I misunderstood the conditionals. You could do it like this to work around mypy: if lower is None: if upper is None: ... else: ... else: if upper is None: ... else: ... (I also find this more readable, personally) vlorentz: Oh indeed, I misunderstood the conditionals. You could do it like this to work around mypy… | |||||
elif upper is None: | elif upper is None: | ||||
ret = inner >= lower | ret = inner >= lower | ||||
else: | else: | ||||
ret = lower <= inner <= upper | ret = lower <= inner <= upper | ||||
return ret | return ret |
If it can be None, it must be Optional.
So here it's Optional[str].
Same goes for override_config, Optional[bool].