Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/maven/lister.py
Show All 10 Lines | |||||
from urllib.parse import urljoin | from urllib.parse import urljoin | ||||
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||
import lxml | import lxml | ||||
import requests | import requests | ||||
from tenacity.before_sleep import before_sleep_log | from tenacity.before_sleep import before_sleep_log | ||||
from swh.core.github.utils import GitHubSession | from swh.core.github.utils import GitHubSession | ||||
from swh.lister.utils import throttling_retry | from swh.lister.utils import http_retry | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from .. import USER_AGENT | from .. import USER_AGENT | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | ): | ||||
) | ) | ||||
def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState: | def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState: | ||||
return MavenListerState(**d) | return MavenListerState(**d) | ||||
def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]: | def state_to_dict(self, state: MavenListerState) -> Dict[str, Any]: | ||||
return asdict(state) | return asdict(state) | ||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) | @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) | ||||
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response: | def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response: | ||||
logger.info("Fetching URL %s with params %s", url, params) | logger.info("Fetching URL %s with params %s", url, params) | ||||
response = self.session.get(url, params=params) | response = self.session.get(url, params=params) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
logger.warning( | logger.warning( | ||||
"Unexpected HTTP status code %s on %s: %s", | "Unexpected HTTP status code %s on %s: %s", | ||||
▲ Show 20 Lines • Show All 305 Lines • Show Last 20 Lines |