diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -58,16 +58,12 @@ Args: url: main URL of the CGit instance, i.e. url of the index of published git repositories on this instance. - instance: Name of cgit instance. Defaults to url's hostname + instance: Name of cgit instance. Defaults to url's network location if unset. base_git_url: Optional base git url which allows the origin url computations. """ - if not instance: - instance = urlparse(url).hostname - assert instance is not None # Make mypy happy - super().__init__( scheduler=scheduler, url=url, instance=instance, credentials=credentials, ) diff --git a/swh/lister/gitea/lister.py b/swh/lister/gitea/lister.py --- a/swh/lister/gitea/lister.py +++ b/swh/lister/gitea/lister.py @@ -11,7 +11,6 @@ import iso8601 import requests from tenacity.before_sleep import before_sleep_log -from urllib3.util import parse_url from swh.lister.utils import throttling_retry from swh.scheduler.interface import SchedulerInterface @@ -47,9 +46,6 @@ page_size: int = 50, credentials: CredentialsType = None, ): - if instance is None: - instance = parse_url(url).host - super().__init__( scheduler=scheduler, credentials=credentials, url=url, instance=instance, ) diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -14,7 +14,6 @@ from requests.exceptions import HTTPError from requests.status_codes import codes from tenacity.before_sleep import before_sleep_log -from urllib3.util import parse_url from swh.lister import USER_AGENT from swh.lister.pattern import CredentialsType, Lister @@ -88,7 +87,8 @@ scheduler: a scheduler instance url: the api v4 url of the gitlab instance to visit (e.g. https://gitlab.com/api/v4/) - instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...) + instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...), + url network location will be used if not provided incremental: defines if incremental listing is activated or not """ @@ -103,8 +103,6 @@ credentials: Optional[CredentialsType] = None, incremental: bool = False, ): - if instance is None: - instance = parse_url(url).host super().__init__( scheduler=scheduler, url=url.rstrip("/"), diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py --- a/swh/lister/pattern.py +++ b/swh/lister/pattern.py @@ -1,10 +1,13 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + from dataclasses import dataclass from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar +from urllib.parse import urlparse from swh.core.config import load_from_envvar from swh.core.utils import grouper @@ -17,10 +20,10 @@ pages: int = 0 origins: int = 0 - def __add__(self, other: "ListerStats") -> "ListerStats": + def __add__(self, other: ListerStats) -> ListerStats: return self.__class__(self.pages + other.pages, self.origins + other.origins) - def __iadd__(self, other: "ListerStats"): + def __iadd__(self, other: ListerStats): self.pages += other.pages self.origins += other.origins @@ -65,8 +68,8 @@ scheduler: the instance of the Scheduler being used to register the origins listed by this lister url: a URL representing this lister, e.g. the API's base URL - instance: the instance name used, in conjunction with :attr:`LISTER_NAME`, to - uniquely identify this lister instance. + instance: the instance name, to uniquely identify this lister instance, + if not provided the URL network location will be used credentials: dictionary of credentials for all listers. The first level identifies the :attr:`LISTER_NAME`, the second level the lister :attr:`instance`. The final level is a list of dicts containing the @@ -86,14 +89,17 @@ self, scheduler: SchedulerInterface, url: str, - instance: str, + instance: Optional[str] = None, credentials: CredentialsType = None, ): if not self.LISTER_NAME: raise ValueError("Must set the LISTER_NAME attribute on Lister classes") self.url = url - self.instance = instance + if instance is not None: + self.instance = instance + else: + self.instance = urlparse(url).netloc self.scheduler = scheduler diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 the Software Heritage developers +# Copyright (C) 2019-2021 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict @@ -27,7 +27,8 @@ Args: url: base URL of a phabricator forge (for instance https://forge.softwareheritage.org) - instance: string identifier for the listed forge + instance: string identifier for the listed forge, + URL network location will be used if not provided api_token: authentication token for Conduit API """ @@ -38,7 +39,7 @@ self, scheduler: SchedulerInterface, url: str, - instance: str, + instance: Optional[str] = None, api_token: Optional[str] = None, credentials: CredentialsType = None, ): diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -39,6 +39,18 @@ lister.run() +def test_lister_instance_name(swh_scheduler): + lister = InstantiableLister( + scheduler=swh_scheduler, url="https://example.org", instance="example" + ) + + assert lister.instance == "example" + + lister = InstantiableLister(scheduler=swh_scheduler, url="https://example.org") + + assert lister.instance == "example.org" + + def test_instantiation_from_configfile(swh_scheduler, mocker): mock_load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") mock_get_scheduler = mocker.patch("swh.lister.pattern.get_scheduler") diff --git a/swh/lister/tuleap/lister.py b/swh/lister/tuleap/lister.py --- a/swh/lister/tuleap/lister.py +++ b/swh/lister/tuleap/lister.py @@ -10,7 +10,6 @@ import iso8601 import requests from tenacity.before_sleep import before_sleep_log -from urllib3.util import parse_url from swh.lister.utils import throttling_retry from swh.scheduler.interface import SchedulerInterface @@ -51,9 +50,6 @@ instance: Optional[str] = None, credentials: CredentialsType = None, ): - if instance is None: - instance = parse_url(url).host - super().__init__( scheduler=scheduler, credentials=credentials, url=url, instance=instance, )