Changeset View
Changeset View
Standalone View
Standalone View
docs/new_lister_template.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import asdict, dataclass | from dataclasses import asdict, dataclass | ||||
import logging | import logging | ||||
from typing import Any, Dict, Iterator, List | from typing import Any, Dict, Iterator, List | ||||
from urllib.parse import urljoin | from urllib.parse import urljoin | ||||
import requests | import requests | ||||
from tenacity.before_sleep import before_sleep_log | from tenacity.before_sleep import before_sleep_log | ||||
from swh.lister.utils import throttling_retry | from swh.lister.utils import http_retry | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from .. import USER_AGENT | from .. import USER_AGENT | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | ): | ||||
) | ) | ||||
def state_from_dict(self, d: Dict[str, Any]) -> NewForgeListerState: | def state_from_dict(self, d: Dict[str, Any]) -> NewForgeListerState: | ||||
return NewForgeListerState(**d) | return NewForgeListerState(**d) | ||||
def state_to_dict(self, state: NewForgeListerState) -> Dict[str, Any]: | def state_to_dict(self, state: NewForgeListerState) -> Dict[str, Any]: | ||||
return asdict(state) | return asdict(state) | ||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) | @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) | ||||
def page_request(self, url, params) -> requests.Response: | def page_request(self, url, params) -> requests.Response: | ||||
# Do the network resource request under a retrying decorator | # Do the network resource request under a retrying decorator | ||||
# to handle rate limiting and transient errors up to a limit. | # to handle rate limiting and transient errors up to a limit. | ||||
# `throttling_retry` by default use the `requests` library to check | # `http_retry` by default use the `requests` library to check | ||||
# only for rate-limit and a base-10 exponential waiting strategy. | # only for rate-limit and a base-10 exponential waiting strategy. | ||||
# This can be customized by passed waiting, retrying and logging strategies | # This can be customized by passed waiting, retrying and logging strategies | ||||
# as functions. See the `tenacity` library documentation. | # as functions. See the `tenacity` library documentation. | ||||
# Log listed URL to ease debugging | # Log listed URL to ease debugging | ||||
logger.debug("Fetching URL %s with params %s", url, params) | logger.debug("Fetching URL %s with params %s", url, params) | ||||
response = self.session.get(url, params=params) | response = self.session.get(url, params=params) | ||||
▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines |