Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/sourceforge/lister.py
Show All 12 Lines | |||||
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||
import iso8601 | import iso8601 | ||||
import lxml | import lxml | ||||
import requests | import requests | ||||
from tenacity.before_sleep import before_sleep_log | from tenacity.before_sleep import before_sleep_log | ||||
from swh.core.api.classes import stream_results | from swh.core.api.classes import stream_results | ||||
from swh.lister.utils import retry_policy_generic, throttling_retry | from swh.lister.utils import http_retry | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from .. import USER_AGENT | from .. import USER_AGENT | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | def projects_last_modified(self) -> ProjectsLastModifiedCache: | ||||
# within a project or subproject. An assertion here would be overkill. | # within a project or subproject. An assertion here would be overkill. | ||||
last_modified = origin.last_update | last_modified = origin.last_update | ||||
assert last_modified is not None | assert last_modified is not None | ||||
listed_origins[(namespace, project)] = last_modified.date() | listed_origins[(namespace, project)] = last_modified.date() | ||||
self._project_last_modified = listed_origins | self._project_last_modified = listed_origins | ||||
return listed_origins | return listed_origins | ||||
@throttling_retry( | @http_retry( | ||||
retry=retry_policy_generic, | |||||
before_sleep=before_sleep_log(logger, logging.WARNING), | before_sleep=before_sleep_log(logger, logging.WARNING), | ||||
) | ) | ||||
def page_request(self, url, params) -> requests.Response: | def page_request(self, url, params) -> requests.Response: | ||||
# Log listed URL to ease debugging | # Log listed URL to ease debugging | ||||
logger.debug("Fetching URL %s with params %s", url, params) | logger.debug("Fetching URL %s with params %s", url, params) | ||||
response = self.session.get(url, params=params) | response = self.session.get(url, params=params) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
▲ Show 20 Lines • Show All 237 Lines • Show Last 20 Lines |