diff --git a/swh/scheduler/cli/__init__.py b/swh/scheduler/cli/__init__.py --- a/swh/scheduler/cli/__init__.py +++ b/swh/scheduler/cli/__init__.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import logging import click diff --git a/swh/scheduler/cli/admin.py b/swh/scheduler/cli/admin.py --- a/swh/scheduler/cli/admin.py +++ b/swh/scheduler/cli/admin.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import logging import time diff --git a/swh/scheduler/cli/celery_monitor.py b/swh/scheduler/cli/celery_monitor.py --- a/swh/scheduler/cli/celery_monitor.py +++ b/swh/scheduler/cli/celery_monitor.py @@ -3,8 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from ast import literal_eval -import csv +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import logging import sys import time @@ -89,6 +89,9 @@ @click.pass_context def list_running(ctx: click.Context, format: str): """List running tasks on the lister workers""" + from ast import literal_eval + import csv + response_times = {} def active_callback(response): diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py --- a/swh/scheduler/cli/task.py +++ b/swh/scheduler/cli/task.py @@ -3,24 +3,22 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import datetime -import json -import itertools +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import locale import logging -import arrow -import csv import click -from typing import Any, Dict, Optional, Iterator -from itertools import islice - -from swh.model.model import Origin -from swh.storage.interface import StorageInterface +from typing import Any, Dict, Optional, Iterator, TYPE_CHECKING from . import cli +if TYPE_CHECKING: + # importing swh.storage.interface triggers the load of 300+ modules, so... + from swh.model.model import Origin + from swh.storage.interface import StorageInterface + locale.setlocale(locale.LC_ALL, "") ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] @@ -30,6 +28,8 @@ name = "time and date" def convert(self, value, param, ctx): + import arrow + if not isinstance(value, arrow.Arrow): value = arrow.get(value) @@ -41,6 +41,9 @@ def format_dict(d): + import datetime + import arrow + ret = {} for k, v in d.items(): if isinstance(v, (arrow.Arrow, datetime.date, datetime.datetime)): @@ -77,6 +80,7 @@ If 'full' is True, also print the status and priority fields. + >>> import datetime >>> task = { ... 'id': 1234, ... 'arguments': { @@ -121,6 +125,8 @@ key2: 42 """ + import arrow + next_run = arrow.get(task["next_run"]) lines = [ "%s %s\n" % (click.style("Task", bold=True), task["id"]), @@ -204,6 +210,10 @@ --delimiter ';' - """ + import csv + import json + import arrow + tasks = [] now = arrow.utcnow() scheduler = ctx.obj["scheduler"] @@ -262,6 +272,8 @@ Note: if the priority is not given, the task won't have the priority set, which is considered as the lowest priority level. """ + import arrow + from .utils import parse_options scheduler = ctx.obj["scheduler"] @@ -289,9 +301,9 @@ click.echo("\n".join(output)) -def iter_origins( - storage: StorageInterface, page_token: Optional[str] = None -) -> Iterator[Origin]: +def iter_origins( # use string annotations to prevent some pkg loading + storage: "StorageInterface", page_token: "Optional[str]" = None, +) -> "Iterator[Origin]": """Iterate over origins in the storage. Optionally starting from page_token. This logs regularly an info message during pagination with the page_token. This, in @@ -357,6 +369,7 @@ swh-scheduler --database 'service=swh-scheduler' \ task schedule_origins index-origin-metadata """ + from itertools import islice from swh.storage import get_storage from .utils import parse_options, schedule_origin_batches @@ -571,6 +584,8 @@ swh-scheduler task respawn 1 3 12 """ + import arrow + scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") @@ -658,6 +673,8 @@ With --dry-run flag set (default), only list those. """ + import arrow + from itertools import groupby from swh.core.utils import grouper from swh.scheduler.backend_es import ElasticSearchBackend @@ -711,7 +728,7 @@ after, before, page_token=page_token, limit=batch_index ) tasks_sorted = sorted(result["tasks"], key=get_index_name) - groups = itertools.groupby(tasks_sorted, key=get_index_name) + groups = groupby(tasks_sorted, key=get_index_name) for index_name, tasks_group in groups: logger.debug("Index tasks to %s" % index_name) if dry_run: diff --git a/swh/scheduler/cli/task_type.py b/swh/scheduler/cli/task_type.py --- a/swh/scheduler/cli/task_type.py +++ b/swh/scheduler/cli/task_type.py @@ -3,7 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import celery.app.task +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import click import logging @@ -106,6 +107,8 @@ ...) plugins. """ + import celery.app.task + scheduler = ctx.obj["scheduler"] if plugins == ("all",): diff --git a/swh/scheduler/cli/utils.py b/swh/scheduler/cli/utils.py --- a/swh/scheduler/cli/utils.py +++ b/swh/scheduler/cli/utils.py @@ -3,17 +3,19 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import itertools +# WARNING: do not import unnecessary things here to keep cli startup time under +# control import click -import yaml -from swh.scheduler.utils import create_task_dict TASK_BATCH_SIZE = 1000 # Number of tasks per query to the scheduler def schedule_origin_batches(scheduler, task_type, origins, origin_batch_size, kwargs): + from itertools import islice + from swh.scheduler.utils import create_task_dict + nb_origins = 0 nb_tasks = 0 @@ -22,7 +24,7 @@ for _ in range(TASK_BATCH_SIZE): # Group origins origin_batch = [] - for origin in itertools.islice(origins, origin_batch_size): + for origin in islice(origins, origin_batch_size): origin_batch.append(origin) nb_origins += len(origin_batch) if not origin_batch: @@ -49,6 +51,8 @@ def parse_argument(option): + import yaml + try: return yaml.safe_load(option) except Exception: