Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/maven/lister.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import asdict, dataclass | from dataclasses import asdict, dataclass | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import logging | import logging | ||||
import re | import re | ||||
from typing import Any, Dict, Iterator, Optional | from typing import Any, Dict, Iterator, Optional | ||||
from urllib.parse import urljoin | from urllib.parse import urljoin | ||||
import requests | import requests | ||||
from tenacity.before_sleep import before_sleep_log | from tenacity.before_sleep import before_sleep_log | ||||
from urllib3.util import parse_url | |||||
import xmltodict | import xmltodict | ||||
from swh.lister.utils import throttling_retry | from swh.lister.utils import throttling_retry | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from .. import USER_AGENT | from .. import USER_AGENT | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | ): | ||||
incremental: bool, defaults to True. Defines if incremental listing | incremental: bool, defaults to True. Defines if incremental listing | ||||
is activated or not. | is activated or not. | ||||
""" | """ | ||||
self.BASE_URL = url | self.BASE_URL = url | ||||
self.INDEX_URL = index_url | self.INDEX_URL = index_url | ||||
self.incremental = incremental | self.incremental = incremental | ||||
if instance is None: | |||||
instance = parse_url(url).host | |||||
super().__init__( | super().__init__( | ||||
scheduler=scheduler, | scheduler=scheduler, | ||||
credentials=credentials, | credentials=credentials, | ||||
url=url, | url=url, | ||||
instance=instance, | instance=instance, | ||||
) | ) | ||||
self.session = requests.Session() | self.session = requests.Session() | ||||
▲ Show 20 Lines • Show All 286 Lines • Show Last 20 Lines |