Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gitlab/lister.py
Show All 17 Lines | |||||
from swh.lister import USER_AGENT | from swh.lister import USER_AGENT | ||||
from swh.lister.pattern import CredentialsType, Lister | from swh.lister.pattern import CredentialsType, Lister | ||||
from swh.lister.utils import is_retryable_exception, retry_attempt, throttling_retry | from swh.lister.utils import is_retryable_exception, retry_attempt, throttling_retry | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
IGNORED_DVCS = ("hg_git",) | # Some instance provides hg_git type which can be ingested as hg origins | ||||
anlambert: I would remove the hg to hg mapping here. | |||||
Done Inline Actionsyou can't otherwise, hg are mapped to git. > assert VCS_MAPPING[entry["vcs_type"]] == "hg" E KeyError: 'hg' ardumont: you can't otherwise, hg are mapped to git.
```
> assert VCS_MAPPING[entry… | |||||
Done Inline Actionsnever mind this, i dropped it indeed. ardumont: never mind this, i dropped it indeed. | |||||
VCS_MAPPING = {"hg_git": "hg"} | |||||
Done Inline Actionsnot sure if we need this global variable here. anlambert: not sure if we need this global variable here. | |||||
Done Inline Actionsok, dropped. ardumont: ok, dropped. | |||||
@dataclass | @dataclass | ||||
class GitLabListerState: | class GitLabListerState: | ||||
"""State of the GitLabLister""" | """State of the GitLabLister""" | ||||
last_seen_next_link: Optional[str] = None | last_seen_next_link: Optional[str] = None | ||||
"""Last link header (not visited yet) during an incremental pass | """Last link header (not visited yet) during an incremental pass | ||||
▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines | def get_pages(self) -> Iterator[PageResult]: | ||||
next_page = page_result.next_page | next_page = page_result.next_page | ||||
def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | def get_origins_from_page(self, page_result: PageResult) -> Iterator[ListedOrigin]: | ||||
assert self.lister_obj.id is not None | assert self.lister_obj.id is not None | ||||
repositories = page_result.repositories if page_result.repositories else [] | repositories = page_result.repositories if page_result.repositories else [] | ||||
for repo in repositories: | for repo in repositories: | ||||
visit_type = repo.get("vcs_type", "git") | visit_type = repo.get("vcs_type", "git") | ||||
url = repo["http_url_to_repo"] | visit_type = VCS_MAPPING.get(visit_type, visit_type) | ||||
if visit_type in IGNORED_DVCS: | |||||
logger.warning( | |||||
"Ignoring origin %s with type '%s'", url, visit_type, | |||||
) | |||||
continue | |||||
yield ListedOrigin( | yield ListedOrigin( | ||||
Done Inline ActionsHow about splitting this into two instructions, something like: visit_type = repo.get("vcs_type", "git") visit_type = VCS_MAPPING.get(visit_type, visit_type) This is more readable this way imho. anlambert: How about splitting this into two instructions, something like:
```lang=python
visit_type =… | |||||
Done Inline Actionsok ardumont: ok | |||||
lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | ||||
url=url, | url=repo["http_url_to_repo"], | ||||
visit_type=visit_type, | visit_type=visit_type, | ||||
last_update=iso8601.parse_date(repo["last_activity_at"]), | last_update=iso8601.parse_date(repo["last_activity_at"]), | ||||
) | ) | ||||
def commit_page(self, page_result: PageResult) -> None: | def commit_page(self, page_result: PageResult) -> None: | ||||
"""Update currently stored state using the latest listed "next" page if relevant. | """Update currently stored state using the latest listed "next" page if relevant. | ||||
Relevancy is determined by the next_page link whose 'page' id must be strictly | Relevancy is determined by the next_page link whose 'page' id must be strictly | ||||
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines |
I would remove the hg to hg mapping here.