Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gitlab/lister.py
Show All 17 Lines | |||||
from swh.lister import USER_AGENT | from swh.lister import USER_AGENT | ||||
from swh.lister.pattern import CredentialsType, Lister | from swh.lister.pattern import CredentialsType, Lister | ||||
from swh.lister.utils import is_retryable_exception, retry_attempt, throttling_retry | from swh.lister.utils import is_retryable_exception, retry_attempt, throttling_retry | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
# Some instance provides hg_git type which can be ingested as hg origins | |||||
VCS_MAPPING = {"hg_git": "hg"} | |||||
@dataclass | @dataclass | ||||
class GitLabListerState: | class GitLabListerState: | ||||
"""State of the GitLabLister""" | """State of the GitLabLister""" | ||||
last_seen_next_link: Optional[str] = None | last_seen_next_link: Optional[str] = None | ||||
"""Last link header (not visited yet) during an incremental pass | """Last link header (not visited yet) during an incremental pass | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | Args: | ||||
url: the api v4 url of the gitlab instance to visit (e.g. | url: the api v4 url of the gitlab instance to visit (e.g. | ||||
https://gitlab.com/api/v4/) | https://gitlab.com/api/v4/) | ||||
instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...), | instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...), | ||||
url network location will be used if not provided | url network location will be used if not provided | ||||
incremental: defines if incremental listing is activated or not | incremental: defines if incremental listing is activated or not | ||||
""" | """ | ||||
LISTER_NAME = "gitlab" | |||||
def __init__( | def __init__( | ||||
self, | self, | ||||
scheduler, | scheduler, | ||||
url: str, | url: str, | ||||
name: Optional[str] = "gitlab", | |||||
instance: Optional[str] = None, | instance: Optional[str] = None, | ||||
credentials: Optional[CredentialsType] = None, | credentials: Optional[CredentialsType] = None, | ||||
incremental: bool = False, | incremental: bool = False, | ||||
): | ): | ||||
if name is not None: | |||||
self.LISTER_NAME = name | |||||
super().__init__( | super().__init__( | ||||
scheduler=scheduler, | scheduler=scheduler, | ||||
url=url.rstrip("/"), | url=url.rstrip("/"), | ||||
instance=instance, | instance=instance, | ||||
credentials=credentials, | credentials=credentials, | ||||
) | ) | ||||
self.incremental = incremental | self.incremental = incremental | ||||
self.last_page: Optional[str] = None | self.last_page: Optional[str] = None | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | def get_pages(self) -> Iterator[PageResult]: | ||||
yield page_result | yield page_result | ||||
next_page = page_result.next_page | next_page = page_result.next_page | ||||
def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | ||||
assert self.lister_obj.id is not None | assert self.lister_obj.id is not None | ||||
repositories = page_result.repositories if page_result.repositories else [] | repositories = page_result.repositories if page_result.repositories else [] | ||||
for repo in repositories: | for repo in repositories: | ||||
visit_type = repo.get("vcs_type", "git") | |||||
visit_type = VCS_MAPPING.get(visit_type, visit_type) | |||||
yield ListedOrigin( | yield ListedOrigin( | ||||
lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | ||||
url=repo["http_url_to_repo"], | url=repo["http_url_to_repo"], | ||||
visit_type="git", | visit_type=visit_type, | ||||
last_update=iso8601.parse_date(repo["last_activity_at"]), | last_update=iso8601.parse_date(repo["last_activity_at"]), | ||||
) | ) | ||||
def commit_page(self, page_result: PageResult) -> None: | def commit_page(self, page_result: PageResult) -> None: | ||||
"""Update currently stored state using the latest listed "next" page if relevant. | """Update currently stored state using the latest listed "next" page if relevant. | ||||
Relevancy is determined by the next_page link whose 'page' id must be strictly | Relevancy is determined by the next_page link whose 'page' id must be strictly | ||||
superior to the currently stored one. | superior to the currently stored one. | ||||
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines |