Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/pattern.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from dataclasses import dataclass | from dataclasses import dataclass | ||||
from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar | from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar | ||||
from swh.core.config import load_from_envvar | from swh.core.config import load_from_envvar | ||||
from swh.core.utils import grouper | |||||
from swh.scheduler import get_scheduler, model | from swh.scheduler import get_scheduler, model | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
@dataclass | @dataclass | ||||
class ListerStats: | class ListerStats: | ||||
pages: int = 0 | pages: int = 0 | ||||
origins: int = 0 | origins: int = 0 | ||||
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | def commit_page(self, page: PageType) -> None: | ||||
pass | pass | ||||
def send_origins(self, origins: Iterable[model.ListedOrigin]) -> int: | def send_origins(self, origins: Iterable[model.ListedOrigin]) -> int: | ||||
"""Record a list of :class:`model.ListedOrigin` in the scheduler. | """Record a list of :class:`model.ListedOrigin` in the scheduler. | ||||
Returns: | Returns: | ||||
the number of listed origins recorded in the scheduler | the number of listed origins recorded in the scheduler | ||||
""" | """ | ||||
count = 0 | ret = self.scheduler.record_listed_origins(origins) | ||||
for batch_origins in grouper(origins, n=100): | return len(ret) | ||||
ret = self.scheduler.record_listed_origins(batch_origins) | |||||
count += len(ret) | |||||
return count | |||||
@classmethod | @classmethod | ||||
def from_config(cls, scheduler: Dict[str, Any], **config: Any): | def from_config(cls, scheduler: Dict[str, Any], **config: Any): | ||||
"""Instantiate a lister from a configuration dict. | """Instantiate a lister from a configuration dict. | ||||
This is basically a backwards-compatibility shim for the CLI. | This is basically a backwards-compatibility shim for the CLI. | ||||
Args: | Args: | ||||
Show All 39 Lines |