Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/pypi/lister.py
# Copyright (C) 2018-2021 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from dataclasses import asdict, dataclass | from dataclasses import asdict, dataclass | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import logging | import logging | ||||
from time import sleep | from time import sleep | ||||
from typing import Any, Dict, Iterator, List, Optional, Tuple | from typing import Any, Dict, Iterator, List, Optional, Tuple | ||||
from xmlrpc.client import Fault, ServerProxy | from xmlrpc.client import Fault, ServerProxy | ||||
from tenacity.before_sleep import before_sleep_log | from tenacity.before_sleep import before_sleep_log | ||||
from swh.lister.utils import throttling_retry | from swh.lister.utils import http_retry | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
# Type returned by the XML-RPC changelog call: | # Type returned by the XML-RPC changelog call: | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | ): | ||||
self.last_processed_serial: Optional[int] = None | self.last_processed_serial: Optional[int] = None | ||||
def state_from_dict(self, d: Dict[str, Any]) -> PyPIListerState: | def state_from_dict(self, d: Dict[str, Any]) -> PyPIListerState: | ||||
return PyPIListerState(last_serial=d.get("last_serial")) | return PyPIListerState(last_serial=d.get("last_serial")) | ||||
def state_to_dict(self, state: PyPIListerState) -> Dict[str, Any]: | def state_to_dict(self, state: PyPIListerState) -> Dict[str, Any]: | ||||
return asdict(state) | return asdict(state) | ||||
@throttling_retry( | @http_retry( | ||||
retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING) | retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING) | ||||
) | ) | ||||
def _changelog_last_serial(self, client: ServerProxy) -> int: | def _changelog_last_serial(self, client: ServerProxy) -> int: | ||||
"""Internal detail to allow throttling when calling the changelog last entry""" | """Internal detail to allow throttling when calling the changelog last entry""" | ||||
serial = client.changelog_last_serial() | serial = client.changelog_last_serial() | ||||
assert isinstance(serial, int) | assert isinstance(serial, int) | ||||
return serial | return serial | ||||
@throttling_retry( | @http_retry( | ||||
retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING) | retry=_if_rate_limited, before_sleep=before_sleep_log(logger, logging.WARNING) | ||||
) | ) | ||||
def _changelog_since_serial( | def _changelog_since_serial( | ||||
self, client: ServerProxy, serial: int | self, client: ServerProxy, serial: int | ||||
) -> List[ChangelogEntry]: | ) -> List[ChangelogEntry]: | ||||
"""Internal detail to allow throttling when calling the changelog listing""" | """Internal detail to allow throttling when calling the changelog listing""" | ||||
sleep(1) # to avoid the initial warning about throttling | sleep(1) # to avoid the initial warning about throttling | ||||
return client.changelog_since_serial(serial) # type: ignore | return client.changelog_since_serial(serial) # type: ignore | ||||
▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines |