diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -6,7 +6,7 @@ from dataclasses import asdict, dataclass import logging import random -from typing import Any, Dict, Iterator, Optional, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple from urllib.parse import parse_qs, urlencode, urlparse import iso8601 @@ -92,6 +92,7 @@ instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...), url network location will be used if not provided incremental: defines if incremental listing is activated or not + ignored_project_prefixes: List of prefixes of project paths to ignore """ @@ -103,6 +104,7 @@ instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, incremental: bool = False, + ignored_project_prefixes: Optional[List[str]] = None, ): if name is not None: self.LISTER_NAME = name @@ -115,6 +117,9 @@ self.incremental = incremental self.last_page: Optional[str] = None self.per_page = 100 + self.ignored_project_prefixes: Optional[Tuple[str, ...]] = None + if ignored_project_prefixes: + self.ignored_project_prefixes = tuple(ignored_project_prefixes) self.session.headers.update({"Accept": "application/json"}) @@ -203,6 +208,10 @@ repositories = page_result.repositories if page_result.repositories else [] for repo in repositories: + if self.ignored_project_prefixes and repo["path_with_namespace"].startswith( + self.ignored_project_prefixes + ): + continue visit_type = repo.get("vcs_type", "git") visit_type = VCS_MAPPING.get(visit_type, visit_type) yield ListedOrigin( diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -356,3 +356,38 @@ ) def test__parse_id_after(url, expected_result): assert _parse_id_after(url) == expected_result + + +def test_lister_gitlab_ignored_project_prefixes(datadir, swh_scheduler, requests_mock): + """Gitlab lister supports listing with ignored project prefixes""" + instance = "gitlab.com" + lister = GitLabLister( + swh_scheduler, + url=api_url(instance), + instance=instance, + ignored_project_prefixes=["jonan/"], + ) + + response = gitlab_page_response(datadir, instance, 1) + + requests_mock.get( + lister.page_url(), + [{"json": response}], + additional_matcher=_match_request, + ) + + listed_result = lister.run() + # 2 origins start with jonan/ + expected_nb_origins = len(response) - 2 + assert listed_result == ListerStats(pages=1, origins=expected_nb_origins) + + scheduler_origins = lister.scheduler.get_listed_origins( + lister.lister_obj.id + ).results + assert len(scheduler_origins) == expected_nb_origins + + for listed_origin in scheduler_origins: + assert listed_origin.visit_type == "git" + assert listed_origin.url.startswith(f"https://{instance}") + assert not listed_origin.url.startswith(f"https://{instance}/jonan/") + assert listed_origin.last_update is not None