Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/launchpad/lister.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import dataclass | from dataclasses import dataclass | ||||
from datetime import datetime | from datetime import datetime | ||||
import logging | import logging | ||||
from typing import Any, Dict, Iterator, Optional | from typing import Any, Dict, Iterable, Iterator, Optional | ||||
import iso8601 | import iso8601 | ||||
from launchpadlib.launchpad import Launchpad | from launchpadlib.launchpad import Launchpad | ||||
from lazr.restfulclient.resource import Collection | from lazr.restfulclient.resource import Collection | ||||
from swh.core.utils import grouper | |||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
LaunchpadPageType = Iterator[Collection] | LaunchpadPageType = Iterator[Collection] | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]: | ||||
yield ListedOrigin( | yield ListedOrigin( | ||||
lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | ||||
visit_type="git", | visit_type="git", | ||||
url=origin_url, | url=origin_url, | ||||
last_update=last_update, | last_update=last_update, | ||||
) | ) | ||||
def send_origins(self, origins: Iterable[ListedOrigin]) -> int: | |||||
"""Record a list of :class:`model.ListedOrigin` in the scheduler. | |||||
Internally, this splits into groups of 100 records prior to flush to the | |||||
scheduler as some pages can be a bit huge. | |||||
Returns: | |||||
the number of listed origins recorded in the scheduler | |||||
""" | |||||
count = 0 | |||||
for batch_origins in grouper(origins, n=100): | |||||
ret = self.scheduler.record_listed_origins(batch_origins) | |||||
count += len(ret) | |||||
return count | |||||
vlorentz: isn't this exactly the same code? | |||||
ardumontAuthorUnsubmitted Done Inline Actionsyes, i did not put too much effort in this as i was not totally sold on it. D4973 got accepted instead. ardumont: yes, i did not put too much effort in this as i was not totally sold on it.
anyhow, this can be… | |||||
def finalize(self) -> None: | def finalize(self) -> None: | ||||
if self.date_last_modified is None: | if self.date_last_modified is None: | ||||
return | return | ||||
if self.incremental and ( | if self.incremental and ( | ||||
self.state.date_last_modified is None | self.state.date_last_modified is None | ||||
or self.date_last_modified > self.state.date_last_modified | or self.date_last_modified > self.state.date_last_modified | ||||
): | ): | ||||
self.state.date_last_modified = self.date_last_modified | self.state.date_last_modified = self.date_last_modified | ||||
self.updated = True | self.updated = True |
isn't this exactly the same code?