diff --git a/PKG-INFO b/PKG-INFO
index 9a92675..b74769d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,33 +1,33 @@
 Metadata-Version: 2.1
 Name: swh.scheduler
-Version: 1.2.1
+Version: 1.2.2
 Summary: Software Heritage Scheduler
 Home-page: https://forge.softwareheritage.org/diffusion/DSCH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 Provides-Extra: journal
 Provides-Extra: simulator
 License-File: LICENSE
 License-File: LICENSE.Celery
 License-File: AUTHORS
 
 swh-scheduler
 =============
 
 Job scheduler for the Software Heritage project.
 
 Task manager for asynchronous/delayed tasks, used for both recurrent (e.g.,
 listing a forge, loading new stuff from a Git repository) and one-off
 activities (e.g., loading a specific version of a source package).
diff --git a/sql/Makefile b/sql/Makefile
index cc836d5..1a81be5 100644
--- a/sql/Makefile
+++ b/sql/Makefile
@@ -1,73 +1,68 @@
 # Depends: postgresql-client, postgresql-autodoc
 
 DBNAME = softwareheritage-scheduler-dev
 DOCDIR = autodoc
 
-SQL_SCHEMA  = 30-swh-schema.sql
-SQL_FUNC    = 40-swh-func.sql
-SQL_DATA    = 50-swh-data.sql
-SQL_INDEXES = 60-swh-indexes.sql
-SQLS = $(SQL_SCHEMA) $(SQL_FUNC) $(SQL_DATA) $(SQL_INDEXES)
-SQL_FILES = $(abspath $(addprefix $(CURDIR)/../swh/scheduler/sql/,$(SQLS)))
+SQL_FILES = $(abspath $(shell ls $(CURDIR)/../swh/scheduler/sql/*.sql))
 
 PSQL_BIN = psql
 PSQL_FLAGS = --echo-all -X -v ON_ERROR_STOP=1
 PSQL = $(PSQL_BIN) $(PSQL_FLAGS)
 
 PIFPAF=$(findstring postgresql://,$(PIFPAF_URLS))
 
 all:
 
 createdb: createdb-stamp
 createdb-stamp: $(SQL_FILES)
 ifeq ($(PIFPAF),)
 	-dropdb $(DBNAME)
 endif
 	createdb $(DBNAME)
 ifeq ($(PIFPAF),)
 	touch $@
 else
 	rm -f $@
 endif
 
 filldb: filldb-stamp
 filldb-stamp: createdb-stamp
 	cat $(SQL_FILES) | $(PSQL) $(DBNAME)
 ifeq ($(PIFPAF),)
 	touch $@
 else
 	rm -f $@
 endif
 
 dropdb:
 	-dropdb $(DBNAME)
 
 dumpdb: swh-scheduler.dump
 swh-scheduler.dump: filldb-stamp
 	pg_dump -Fc $(DBNAME) > $@
 
 $(DOCDIR):
 	test -d $(DOCDIR)/ || mkdir $(DOCDIR)
 
 doc: autodoc-stamp $(DOCDIR)/swh-scheduler.pdf
 autodoc-stamp: filldb-stamp $(DOCDIR)
 	postgresql_autodoc -d $(DBNAME) -f $(DOCDIR)/swh-scheduler
 	cp -a $(DOCDIR)/swh-scheduler.dot $(DOCDIR)/swh-scheduler.dot.orig
 ifeq ($(PIFPAF),)
 	touch $@
 else
 	rm -f $@
 endif
 
 $(DOCDIR)/swh-scheduler.pdf: $(DOCDIR)/swh-scheduler.dot autodoc-stamp
 	dot -T pdf $< > $@
 $(DOCDIR)/swh-scheduler.svg: $(DOCDIR)/swh-scheduler.dot autodoc-stamp
 	dot -T svg $< > $@
 
 clean:
 	rm -rf *-stamp $(DOCDIR)/
 
 distclean: clean dropdb
 	rm -f swh-scheduler.dump
 
 .PHONY: all initdb createdb dropdb doc clean
diff --git a/swh.scheduler.egg-info/PKG-INFO b/swh.scheduler.egg-info/PKG-INFO
index 9a92675..b74769d 100644
--- a/swh.scheduler.egg-info/PKG-INFO
+++ b/swh.scheduler.egg-info/PKG-INFO
@@ -1,33 +1,33 @@
 Metadata-Version: 2.1
 Name: swh.scheduler
-Version: 1.2.1
+Version: 1.2.2
 Summary: Software Heritage Scheduler
 Home-page: https://forge.softwareheritage.org/diffusion/DSCH/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 Provides-Extra: journal
 Provides-Extra: simulator
 License-File: LICENSE
 License-File: LICENSE.Celery
 License-File: AUTHORS
 
 swh-scheduler
 =============
 
 Job scheduler for the Software Heritage project.
 
 Task manager for asynchronous/delayed tasks, used for both recurrent (e.g.,
 listing a forge, loading new stuff from a Git repository) and one-off
 activities (e.g., loading a specific version of a source package).
diff --git a/swh/scheduler/api/server.py b/swh/scheduler/api/server.py
index abc3ab8..a067589 100644
--- a/swh/scheduler/api/server.py
+++ b/swh/scheduler/api/server.py
@@ -1,150 +1,151 @@
-# Copyright (C) 2018-2019  The Software Heritage developers
+# Copyright (C) 2018-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import os
+from typing import Dict
 
 from swh.core import config
 from swh.core.api import JSONFormatter, MsgpackFormatter, RPCServerApp
 from swh.core.api import encode_data_server as encode_data
 from swh.core.api import error_handler, negotiate
 from swh.scheduler import get_scheduler
 from swh.scheduler.exc import SchedulerException
 from swh.scheduler.interface import SchedulerInterface
 
 from .serializers import DECODERS, ENCODERS
 
 scheduler = None
 
 
 def get_global_scheduler():
     global scheduler
     if not scheduler:
         scheduler = get_scheduler(**app.config["scheduler"])
     return scheduler
 
 
 class SchedulerServerApp(RPCServerApp):
     extra_type_decoders = DECODERS
     extra_type_encoders = ENCODERS
 
 
 app = SchedulerServerApp(
     __name__, backend_class=SchedulerInterface, backend_factory=get_global_scheduler
 )
 
 
 @app.errorhandler(SchedulerException)
 def argument_error_handler(exception):
     return error_handler(exception, encode_data, status_code=400)
 
 
 @app.errorhandler(Exception)
 def my_error_handler(exception):
     return error_handler(exception, encode_data)
 
 
 def has_no_empty_params(rule):
     return len(rule.defaults or ()) >= len(rule.arguments or ())
 
 
 @app.route("/")
 def index():
     return """<html>
 <head><title>Software Heritage scheduler RPC server</title></head>
 <body>
 <p>You have reached the
 <a href="https://www.softwareheritage.org/">Software Heritage</a>
 scheduler RPC server.<br />
 See its
 <a href="https://docs.softwareheritage.org/devel/swh-scheduler/">documentation
 and API</a> for more information</p>
 </body>
 </html>"""
 
 
 @app.route("/site-map")
 @negotiate(MsgpackFormatter)
 @negotiate(JSONFormatter)
 def site_map():
     links = []
     for rule in app.url_map.iter_rules():
         if has_no_empty_params(rule) and hasattr(SchedulerInterface, rule.endpoint):
             links.append(
                 dict(
                     rule=rule.rule,
                     description=getattr(SchedulerInterface, rule.endpoint).__doc__,
                 )
             )
     # links is now a list of url, endpoint tuples
     return links
 
 
-def load_and_check_config(config_path, type="local"):
+def load_and_check_config(config_path: str, type: str = "postgresql") -> Dict:
     """Check the minimal configuration is set to run the api or raise an
        error explanation.
 
     Args:
-        config_path (str): Path to the configuration file to load
-        type (str): configuration type. For 'local' type, more
-                    checks are done.
+        config_path: Configuration file path to load
+        type: Configuration type, for 'postgresql' type (the default), more checks are
+              done.
 
     Raises:
         Error if the setup is not as expected
 
     Returns:
         configuration as a dict
 
     """
     if not config_path:
         raise EnvironmentError("Configuration file must be defined")
 
     if not os.path.exists(config_path):
         raise FileNotFoundError(f"Configuration file {config_path} does not exist")
 
     cfg = config.read(config_path)
 
     vcfg = cfg.get("scheduler")
     if not vcfg:
         raise KeyError("Missing '%scheduler' configuration")
 
-    if type == "local":
+    if type == "postgresql":
         cls = vcfg.get("cls")
         if cls not in ("local", "postgresql"):
             raise ValueError(
                 "The scheduler backend can only be started with a 'postgresql' "
                 "configuration"
             )
 
         db = vcfg.get("db")
         if not db:
             raise KeyError("Invalid configuration; missing 'db' config entry")
 
     return cfg
 
 
 api_cfg = None
 
 
 def make_app_from_configfile():
     """Run the WSGI app from the webserver, loading the configuration from
     a configuration file.
 
     SWH_CONFIG_FILENAME environment variable defines the
     configuration path to load.
 
     """
     global api_cfg
     if not api_cfg:
         config_path = os.environ.get("SWH_CONFIG_FILENAME")
         api_cfg = load_and_check_config(config_path)
         app.config.update(api_cfg)
     handler = logging.StreamHandler()
     app.logger.addHandler(handler)
     return app
 
 
 if __name__ == "__main__":
     print('Please use the "swh-scheduler api-server" command')
diff --git a/swh/scheduler/celery_backend/recurrent_visits.py b/swh/scheduler/celery_backend/recurrent_visits.py
index 0ec6110..62389f1 100644
--- a/swh/scheduler/celery_backend/recurrent_visits.py
+++ b/swh/scheduler/celery_backend/recurrent_visits.py
@@ -1,365 +1,369 @@
 # Copyright (C) 2021-2022 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """This schedules the recurrent visits, for listed origins, in Celery.
 
 For "oneshot" (save code now, lister) tasks, check the
 :mod:`swh.scheduler.celery_backend.runner` and
 :mod:`swh.scheduler.celery_backend.pika_listener` modules.
 
 """
 
 from __future__ import annotations
 
 from itertools import chain
 import logging
 from queue import Empty, Queue
 import random
 from threading import Thread
 import time
 from typing import TYPE_CHECKING, Any, Dict, List, Tuple
 
 from kombu.utils.uuid import uuid
 
 from swh.scheduler.celery_backend.config import get_available_slots
 from swh.scheduler.utils import create_origin_task_dicts
 
 if TYPE_CHECKING:
     from ..interface import SchedulerInterface
     from ..model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 
 DEFAULT_POLICY = [
     {"policy": "already_visited_order_by_lag", "weight": 50},
     {"policy": "never_visited_oldest_update_first", "weight": 50},
 ]
 DEFAULT_DVCS_POLICY = [
     {"policy": "already_visited_order_by_lag", "weight": 49},
     {"policy": "never_visited_oldest_update_first", "weight": 49},
     {"policy": "origins_without_last_update", "weight": 2},
 ]
 DEFAULT_GIT_POLICY = [
     {"policy": "already_visited_order_by_lag", "weight": 49, "tablesample": 0.1},
     {"policy": "never_visited_oldest_update_first", "weight": 49, "tablesample": 0.1},
     {"policy": "origins_without_last_update", "weight": 2, "tablesample": 0.1},
 ]
 
 DEFAULT_POLICY_CONFIG: Dict[str, List[Dict[str, Any]]] = {
     "default": DEFAULT_POLICY,
     "hg": DEFAULT_DVCS_POLICY,
     "svn": DEFAULT_DVCS_POLICY,
     "cvs": DEFAULT_DVCS_POLICY,
     "bzr": DEFAULT_DVCS_POLICY,
     "git": DEFAULT_GIT_POLICY,
 }
 
 """Scheduling policies to use to retrieve visits for the given visit types, with their
 relative weights"""
 
 MIN_SLOTS_RATIO = 0.05
 """Quantity of slots that need to be available (with respect to max_queue_length) for
 :py:func:`~swh.scheduler.interface.SchedulerInterface.grab_next_visits` to trigger"""
 
 QUEUE_FULL_BACKOFF = 60
 """Backoff time (in seconds) if there's fewer than :py:data:`MIN_SLOTS_RATIO` slots
 available in the queue."""
 
-NO_ORIGINS_SCHEDULED_BACKOFF = 20 * 60
+DEFAULT_NO_ORIGINS_SCHEDULED_BACKOFF = 20 * 60
 """Backoff time (in seconds) if no origins have been scheduled in the current
 iteration"""
 
 BACKOFF_SPLAY = 5.0
 """Amplitude of the fuzziness between backoffs"""
 
 TERMINATE = object()
 """Termination request received from command queue (singleton used for identity
 comparison)"""
 
 
 def grab_next_visits_policy_weights(
     scheduler: SchedulerInterface,
     visit_type: str,
     num_visits: int,
     policy_cfg: List[Dict[str, Any]],
 ) -> List[ListedOrigin]:
     """Get the next ``num_visits`` for the given ``visit_type`` using the corresponding
     set of scheduling policies.
 
     The :py:data:`POLICY_CFG` list sets, for the current visit type, the
     scheduling policies used to pull the next tasks. Each policy config entry
     in the list should at least have a 'policy' (policy name) and a 'weight'
     entry. For each policy in this policy_cfg list, the number of returned
     origins to visit will be weighted using this weight config option so that
     the total number of returned origins is around num_visits. Any other
     key/value entry in the policy configuration will be passed to the
     scheduler.grab_next_visit() method.
 
     This function emits a warning if the ratio of retrieved origins is off of
     the requested ratio by more than 5%.
 
     Returns:
        at most ``num_visits`` :py:class:`~swh.scheduler.model.ListedOrigin` objects
 
     """
 
     policies = [cfg["policy"] for cfg in policy_cfg]
     if len(set(policies)) != len(policies):
         raise ValueError(
             "A policy weights can only appear once; check your policy "
             f"configuration for visit type {visit_type}"
         )
 
     weights = [cfg["weight"] for cfg in policy_cfg]
     total_weight = sum(weights)
     if not total_weight:
         raise ValueError(f"No policy weights set for visit type {visit_type}")
 
     ratios = [weight / total_weight for weight in weights]
 
     extra_kws = [
         {k: v for k, v in cfg.items() if k not in ("weight", "policy")}
         for cfg in policy_cfg
     ]
 
     fetched_origins: Dict[str, List[ListedOrigin]] = {}
     for policy, ratio, extra_kw in zip(policies, ratios, extra_kws):
         num_tasks_to_send = int(num_visits * ratio)
         fetched_origins[policy] = scheduler.grab_next_visits(
             visit_type,
             num_tasks_to_send,
             policy=policy,
             **extra_kw,
         )
 
     all_origins: List[ListedOrigin] = list(
         chain.from_iterable(fetched_origins.values())
     )
     if not all_origins:
         return []
 
     # Check whether the ratios of origins fetched are skewed with respect to the
     # ones we requested
     fetched_origin_ratios = {
         policy: len(origins) / len(all_origins)
         for policy, origins in fetched_origins.items()
     }
 
     for policy, expected_ratio in zip(policies, ratios):
         # 5% of skew with respect to request
         if abs(fetched_origin_ratios[policy] - expected_ratio) / expected_ratio > 0.05:
             logger.info(
                 "Skewed fetch for visit type %s with policy %s: fetched %s, "
                 "requested %s",
                 visit_type,
                 policy,
                 fetched_origin_ratios[policy],
                 expected_ratio,
             )
 
     return all_origins
 
 
 def splay():
     """Return a random short interval by which to vary the backoffs for the visit
     scheduling threads"""
     return random.uniform(0, BACKOFF_SPLAY)
 
 
 def send_visits_for_visit_type(
     scheduler: SchedulerInterface,
     app,
     visit_type: str,
     task_type: Dict,
     policy_cfg: List[Dict[str, Any]],
+    no_origins_scheduled_backoff: int = DEFAULT_NO_ORIGINS_SCHEDULED_BACKOFF,
 ) -> float:
     """Schedule the next batch of visits for the given ``visit_type``.
 
     First, we determine the number of available slots by introspecting the RabbitMQ
     queue.
 
     If there's fewer than :py:data:`MIN_SLOTS_RATIO` slots available in the queue, we
     wait for :py:data:`QUEUE_FULL_BACKOFF` seconds. This avoids running the expensive
     :py:func:`~swh.scheduler.interface.SchedulerInterface.grab_next_visits` queries when
     there's not many jobs to queue.
 
     Once there's more than :py:data:`MIN_SLOTS_RATIO` slots available, we run
     :py:func:`grab_next_visits_policy_weights` to retrieve the next set of origin visits
     to schedule, and we send them to celery.
 
-    If the last scheduling attempt didn't return any origins, we sleep for
-    :py:data:`NO_ORIGINS_SCHEDULED_BACKOFF` seconds. This avoids running the expensive
+    If the last scheduling attempt didn't return any origins, we sleep by default for
+    :py:data:`DEFAULT_NO_ORIGINS_SCHEDULED_BACKOFF` seconds. This avoids running the expensive
     :py:func:`~swh.scheduler.interface.SchedulerInterface.grab_next_visits` queries too
     often if there's nothing left to schedule.
 
     The :py:data:`POLICY_CFG` argument is the policy configuration used to
     choose the next origins to visit. It is passed directly to the
     :py:func:`grab_next_visits_policy_weights()` function.
 
     Returns:
        the earliest :py:func:`time.monotonic` value at which to run the next iteration
        of the loop.
 
     """
     queue_name = task_type["backend_name"]
     max_queue_length = task_type.get("max_queue_length") or 0
     min_available_slots = max_queue_length * MIN_SLOTS_RATIO
 
     current_iteration_start = time.monotonic()
 
     # Check queue level
     available_slots = get_available_slots(app, queue_name, max_queue_length)
     logger.debug(
         "%s available slots for visit type %s in queue %s",
         available_slots,
         visit_type,
         queue_name,
     )
     if available_slots < min_available_slots:
         return current_iteration_start + QUEUE_FULL_BACKOFF
 
     origins = grab_next_visits_policy_weights(
         scheduler, visit_type, available_slots, policy_cfg
     )
 
     if not origins:
         logger.debug("No origins to visit for type %s", visit_type)
-        return current_iteration_start + NO_ORIGINS_SCHEDULED_BACKOFF
+        return current_iteration_start + no_origins_scheduled_backoff
 
     # Try to smooth the ingestion load, origins pulled by different
     # scheduling policies have different resource usage patterns
     random.shuffle(origins)
 
     for task_dict in create_origin_task_dicts(origins, scheduler):
         app.send_task(
             queue_name,
             task_id=uuid(),
             args=task_dict["arguments"]["args"],
             kwargs=task_dict["arguments"]["kwargs"],
             queue=queue_name,
         )
 
     logger.info(
         "%s: %s visits scheduled in queue %s",
         visit_type,
         len(origins),
         queue_name,
     )
 
     # When everything worked, we can try to schedule origins again ASAP.
     return time.monotonic()
 
 
 def visit_scheduler_thread(
     config: Dict,
     visit_type: str,
     command_queue: Queue[object],
     exc_queue: Queue[Tuple[str, BaseException]],
 ):
     """Target function for the visit sending thread, which initializes local connections
     and handles exceptions by sending them back to the main thread."""
 
     from swh.scheduler import get_scheduler
     from swh.scheduler.celery_backend.config import build_app
 
     try:
         # We need to reinitialize these connections because they're not generally
         # thread-safe
         app = build_app(config.get("celery"))
         scheduler = get_scheduler(**config["scheduler"])
         task_type = scheduler.get_task_type(f"load-{visit_type}")
         if task_type is None:
             raise ValueError(f"Unknown task type: load-{visit_type}")
 
         policy_cfg = config.get("scheduling_policy", DEFAULT_POLICY_CONFIG)
         for policies in policy_cfg.values():
             for policy in policies:
                 if "weight" not in policy or "policy" not in policy:
                     raise ValueError(
                         "Each policy configuration needs at least a 'policy' "
                         "and a 'weight' entry"
                     )
         policy_cfg = {**DEFAULT_POLICY_CONFIG, **policy_cfg}
 
         next_iteration = time.monotonic()
 
         while True:
             # vary the next iteration time a little bit
             next_iteration = next_iteration + splay()
             while time.monotonic() < next_iteration:
                 # Wait for next iteration to start. Listen for termination message.
                 try:
                     msg = command_queue.get(block=True, timeout=1)
                 except Empty:
                     continue
 
                 if msg is TERMINATE:
                     return
                 else:
                     logger.warn("Received unexpected message %s in command queue", msg)
 
             next_iteration = send_visits_for_visit_type(
                 scheduler,
                 app,
                 visit_type,
                 task_type,
                 policy_cfg.get(visit_type, policy_cfg["default"]),
+                config.get(
+                    "no_origins_scheduled_backoff", DEFAULT_NO_ORIGINS_SCHEDULED_BACKOFF
+                ),
             )
 
     except BaseException as e:
         exc_queue.put((visit_type, e))
 
 
 VisitSchedulerThreads = Dict[str, Tuple[Thread, Queue]]
 """Dict storing the visit scheduler threads and their command queues"""
 
 
 def spawn_visit_scheduler_thread(
     threads: VisitSchedulerThreads,
     exc_queue: Queue[Tuple[str, BaseException]],
     config: Dict[str, Any],
     visit_type: str,
 ):
     """Spawn a new thread to schedule the visits of type ``visit_type``."""
     command_queue: Queue[object] = Queue()
     thread = Thread(
         target=visit_scheduler_thread,
         kwargs={
             "config": config,
             "visit_type": visit_type,
             "command_queue": command_queue,
             "exc_queue": exc_queue,
         },
     )
     threads[visit_type] = (thread, command_queue)
     thread.start()
 
 
 def terminate_visit_scheduler_threads(threads: VisitSchedulerThreads) -> List[str]:
     """Terminate all visit scheduler threads"""
     logger.info("Termination requested...")
     for _, command_queue in threads.values():
         command_queue.put(TERMINATE)
 
     loops = 0
     while threads and loops < 10:
         logger.info(
             "Terminating visit scheduling threads: %s", ", ".join(sorted(threads))
         )
         loops += 1
         for visit_type, (thread, _) in list(threads.items()):
             thread.join(timeout=1)
             if not thread.is_alive():
                 logger.debug("Thread %s terminated", visit_type)
                 del threads[visit_type]
 
     if threads:
         logger.warn(
             "Could not reap the following threads after 10 attempts: %s",
             ", ".join(sorted(threads)),
         )
 
     return list(sorted(threads))
diff --git a/swh/scheduler/tests/test_cli.py b/swh/scheduler/tests/test_cli.py
index 5152656..6a55071 100644
--- a/swh/scheduler/tests/test_cli.py
+++ b/swh/scheduler/tests/test_cli.py
@@ -1,946 +1,946 @@
 # Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 from itertools import islice
 import logging
 import random
 import re
 import tempfile
 from unittest.mock import patch
 
 from click.testing import CliRunner
 import pytest
 
 from swh.core.api.classes import stream_results
 from swh.model.model import Origin
 from swh.scheduler.cli import cli
 from swh.scheduler.utils import create_task_dict, utcnow
 
 CLI_CONFIG = """
 scheduler:
     cls: foo
     args: {}
 """
 
 
-def invoke(scheduler, catch_exceptions, args):
+def invoke(scheduler, catch_exceptions, args, config=CLI_CONFIG):
     runner = CliRunner()
     with patch(
         "swh.scheduler.get_scheduler"
     ) as get_scheduler_mock, tempfile.NamedTemporaryFile(
         "a", suffix=".yml"
     ) as config_fd:
-        config_fd.write(CLI_CONFIG)
+        config_fd.write(config)
         config_fd.seek(0)
         get_scheduler_mock.return_value = scheduler
         args = [
             "-C" + config_fd.name,
         ] + args
         result = runner.invoke(cli, args, obj={"log_level": logging.WARNING})
     if not catch_exceptions and result.exception:
         print(result.output)
         raise result.exception
     return result
 
 
 def test_schedule_tasks(swh_scheduler):
     csv_data = (
         b'swh-test-ping;[["arg1", "arg2"]];{"key": "value"};'
         + utcnow().isoformat().encode()
         + b"\n"
         + b'swh-test-ping;[["arg3", "arg4"]];{"key": "value"};'
         + utcnow().isoformat().encode()
         + b"\n"
     )
     with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd:
         csv_fd.write(csv_data)
         csv_fd.seek(0)
         result = invoke(
             swh_scheduler, False, ["task", "schedule", "-d", ";", csv_fd.name]
         )
     expected = r"""
 Created 2 tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: recurring
   Args:
     \['arg1', 'arg2'\]
   Keyword args:
     key: 'value'
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: recurring
   Args:
     \['arg3', 'arg4'\]
   Keyword args:
     key: 'value'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_schedule_tasks_columns(swh_scheduler):
     with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd:
         csv_fd.write(b'swh-test-ping;oneshot;["arg1", "arg2"];{"key": "value"}\n')
         csv_fd.seek(0)
         result = invoke(
             swh_scheduler,
             False,
             [
                 "task",
                 "schedule",
                 "-c",
                 "type",
                 "-c",
                 "policy",
                 "-c",
                 "args",
                 "-c",
                 "kwargs",
                 "-d",
                 ";",
                 csv_fd.name,
             ],
         )
     expected = r"""
 Created 1 tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
     'arg1'
     'arg2'
   Keyword args:
     key: 'value'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_schedule_task(swh_scheduler):
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "add",
             "swh-test-ping",
             "arg1",
             "arg2",
             "key=value",
         ],
     )
     expected = r"""
 Created 1 tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: recurring
   Args:
     'arg1'
     'arg2'
   Keyword args:
     key: 'value'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_pending_tasks_none(swh_scheduler):
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
         ],
     )
 
     expected = r"""
 Found 0 swh-test-ping tasks
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_pending_tasks(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task2["next_run"] += datetime.timedelta(days=1)
     swh_scheduler.create_tasks([task1, task2])
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
         ],
     )
 
     expected = r"""
 Found 1 swh-test-ping tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value1'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
     swh_scheduler.grab_ready_tasks("swh-test-ping")
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
         ],
     )
 
     expected = r"""
 Found 0 swh-test-ping tasks
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_pending_tasks_filter(swh_scheduler):
     task = create_task_dict("swh-test-multiping", "oneshot", key="value")
     swh_scheduler.create_tasks([task])
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
         ],
     )
 
     expected = r"""
 Found 0 swh-test-ping tasks
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_pending_tasks_filter_2(swh_scheduler):
     swh_scheduler.create_tasks(
         [
             create_task_dict("swh-test-multiping", "oneshot", key="value"),
             create_task_dict("swh-test-ping", "oneshot", key="value2"),
         ]
     )
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
         ],
     )
 
     expected = r"""
 Found 1 swh-test-ping tasks
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 # Fails because "task list-pending --limit 3" only returns 2 tasks, because
 # of how compute_nb_tasks_from works.
 @pytest.mark.xfail
 def test_list_pending_tasks_limit(swh_scheduler):
     swh_scheduler.create_tasks(
         [
             create_task_dict("swh-test-ping", "oneshot", key="value%d" % i)
             for i in range(10)
         ]
     )
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
             "--limit",
             "3",
         ],
     )
 
     expected = r"""
 Found 2 swh-test-ping tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value0'
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value1'
 
 Task 3
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_pending_tasks_before(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task1["next_run"] += datetime.timedelta(days=3)
     task2["next_run"] += datetime.timedelta(days=1)
     swh_scheduler.create_tasks([task1, task2])
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list-pending",
             "swh-test-ping",
             "--before",
             (datetime.date.today() + datetime.timedelta(days=2)).isoformat(),
         ],
     )
 
     expected = r"""
 Found 1 swh-test-ping tasks
 
 Task 2
   Next run: tomorrow \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task1["next_run"] += datetime.timedelta(days=3, hours=2)
     swh_scheduler.create_tasks([task1, task2])
 
     swh_scheduler.grab_ready_tasks("swh-test-ping")
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list",
         ],
     )
 
     expected = r"""
 Found 2 tasks
 
 Task 1
   Next run: .+ \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value1'
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_id(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task3 = create_task_dict("swh-test-ping", "oneshot", key="value3")
     swh_scheduler.create_tasks([task1, task2, task3])
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list",
             "--task-id",
             "2",
         ],
     )
 
     expected = r"""
 Found 1 tasks
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_id_2(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task3 = create_task_dict("swh-test-ping", "oneshot", key="value3")
     swh_scheduler.create_tasks([task1, task2, task3])
 
     result = invoke(
         swh_scheduler, False, ["task", "list", "--task-id", "2", "--task-id", "3"]
     )
 
     expected = r"""
 Found 2 tasks
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value2'
 
 Task 3
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value3'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_type(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-multiping", "oneshot", key="value2")
     task3 = create_task_dict("swh-test-ping", "oneshot", key="value3")
     swh_scheduler.create_tasks([task1, task2, task3])
 
     result = invoke(
         swh_scheduler, False, ["task", "list", "--task-type", "swh-test-ping"]
     )
 
     expected = r"""
 Found 2 tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value1'
 
 Task 3
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value3'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_limit(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task3 = create_task_dict("swh-test-ping", "oneshot", key="value3")
     swh_scheduler.create_tasks([task1, task2, task3])
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list",
             "--limit",
             "2",
         ],
     )
 
     expected = r"""
 Found 2 tasks
 
 Task 1
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value1'
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_before(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task1["next_run"] += datetime.timedelta(days=3, hours=2)
     swh_scheduler.create_tasks([task1, task2])
 
     swh_scheduler.grab_ready_tasks("swh-test-ping")
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list",
             "--before",
             (datetime.date.today() + datetime.timedelta(days=2)).isoformat(),
         ],
     )
 
     expected = r"""
 Found 1 tasks
 
 Task 2
   Next run: today \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value2'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def test_list_tasks_after(swh_scheduler):
     task1 = create_task_dict("swh-test-ping", "oneshot", key="value1")
     task2 = create_task_dict("swh-test-ping", "oneshot", key="value2")
     task1["next_run"] += datetime.timedelta(days=3, hours=2)
     swh_scheduler.create_tasks([task1, task2])
 
     swh_scheduler.grab_ready_tasks("swh-test-ping")
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "list",
             "--after",
             (datetime.date.today() + datetime.timedelta(days=2)).isoformat(),
         ],
     )
 
     expected = r"""
 Found 1 tasks
 
 Task 1
   Next run: .+ \(.*\)
   Interval: 1 day, 0:00:00
   Type: swh-test-ping
   Policy: oneshot
   Status: next_run_not_scheduled
   Priority:\x20
   Args:
   Keyword args:
     key: 'value1'
 
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), result.output
 
 
 def _fill_storage_with_origins(storage, nb_origins):
     origins = [Origin(url=f"http://example.com/{i}") for i in range(nb_origins)]
     storage.origin_add(origins)
     return origins
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 def test_task_schedule_origins_dry_run(swh_scheduler, storage):
     """Tests the scheduling when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     _fill_storage_with_origins(storage, 90)
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "schedule_origins",
             "--dry-run",
             "swh-test-ping",
         ],
     )
 
     # Check the output
     expected = r"""
 Scheduled 3 tasks \(30 origins\).
 Scheduled 6 tasks \(60 origins\).
 Scheduled 9 tasks \(90 origins\).
 Done.
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output)
 
     # Check scheduled tasks
     tasks = swh_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 def _assert_origin_tasks_contraints(tasks, max_tasks, max_task_size, expected_origins):
     # check there are not too many tasks
     assert len(tasks) <= max_tasks
 
     # check tasks are not too large
     assert all(len(task["arguments"]["args"][0]) <= max_task_size for task in tasks)
 
     # check the tasks are exhaustive
     assert sum([len(task["arguments"]["args"][0]) for task in tasks]) == len(
         expected_origins
     )
     assert set.union(*(set(task["arguments"]["args"][0]) for task in tasks)) == {
         origin.url for origin in expected_origins
     }
 
 
 @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3)
 def test_task_schedule_origins(swh_scheduler, storage):
     """Tests the scheduling when neither origin_batch_size or
     task_batch_size is a divisor of nb_origins."""
     origins = _fill_storage_with_origins(storage, 70)
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "schedule_origins",
             "swh-test-ping",
             "--batch-size",
             "20",
         ],
     )
 
     # Check the output
     expected = r"""
 Scheduled 3 tasks \(60 origins\).
 Scheduled 4 tasks \(70 origins\).
 Done.
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output)
 
     # Check tasks
     tasks = swh_scheduler.search_tasks()
     _assert_origin_tasks_contraints(tasks, 4, 20, origins)
     assert all(task["arguments"]["kwargs"] == {} for task in tasks)
 
 
 def test_task_schedule_origins_kwargs(swh_scheduler, storage):
     """Tests support of extra keyword-arguments."""
     origins = _fill_storage_with_origins(storage, 30)
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "schedule_origins",
             "swh-test-ping",
             "--batch-size",
             "20",
             'key1="value1"',
             'key2="value2"',
         ],
     )
 
     # Check the output
     expected = r"""
 Scheduled 2 tasks \(30 origins\).
 Done.
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output)
 
     # Check tasks
     tasks = swh_scheduler.search_tasks()
     _assert_origin_tasks_contraints(tasks, 2, 20, origins)
     assert all(
         task["arguments"]["kwargs"] == {"key1": "value1", "key2": "value2"}
         for task in tasks
     )
 
 
 def test_task_schedule_origins_with_limit(swh_scheduler, storage):
     """Tests support of extra keyword-arguments."""
     _fill_storage_with_origins(storage, 50)
     limit = 20
     expected_origins = list(islice(stream_results(storage.origin_list), limit))
     nb_origins = len(expected_origins)
 
     assert nb_origins == limit
     max_task_size = 5
     nb_tasks, remainder = divmod(nb_origins, max_task_size)
     assert remainder == 0  # made the numbers go round
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "schedule_origins",
             "swh-test-ping",
             "--batch-size",
             max_task_size,
             "--limit",
             limit,
         ],
     )
 
     # Check the output
     expected = rf"""
 Scheduled {nb_tasks} tasks \({nb_origins} origins\).
 Done.
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output)
 
     tasks = swh_scheduler.search_tasks()
     _assert_origin_tasks_contraints(tasks, max_task_size, nb_origins, expected_origins)
 
 
 def test_task_schedule_origins_with_page_token(swh_scheduler, storage):
     """Tests support of extra keyword-arguments."""
     nb_total_origins = 50
     origins = _fill_storage_with_origins(storage, nb_total_origins)
 
     # prepare page_token and origins result expectancy
     page_result = storage.origin_list(limit=10)
     assert len(page_result.results) == 10
     page_token = page_result.next_page_token
     assert page_token is not None
 
     # remove the first 10 origins listed as we won't see those in tasks
     expected_origins = [o for o in origins if o not in page_result.results]
     nb_origins = len(expected_origins)
     assert nb_origins == nb_total_origins - len(page_result.results)
 
     max_task_size = 10
     nb_tasks, remainder = divmod(nb_origins, max_task_size)
     assert remainder == 0
 
     result = invoke(
         swh_scheduler,
         False,
         [
             "task",
             "schedule_origins",
             "swh-test-ping",
             "--batch-size",
             max_task_size,
             "--page-token",
             page_token,
         ],
     )
 
     # Check the output
     expected = rf"""
 Scheduled {nb_tasks} tasks \({nb_origins} origins\).
 Done.
 """.lstrip()
     assert result.exit_code == 0, result.output
     assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output)
 
     # Check tasks
     tasks = swh_scheduler.search_tasks()
     _assert_origin_tasks_contraints(tasks, max_task_size, nb_origins, expected_origins)
 
 
 def test_cli_task_runner_unknown_task_types(swh_scheduler, storage):
     """When passing at least one unknown task type, the runner should fail."""
 
     task_types = swh_scheduler.get_task_types()
     task_type_names = [t["type"] for t in task_types]
     known_task_type = random.choice(task_type_names)
     unknown_task_type = "unknown-task-type"
     assert unknown_task_type not in task_type_names
 
     with pytest.raises(ValueError, match="Unknown"):
         invoke(
             swh_scheduler,
             False,
             [
                 "start-runner",
                 "--task-type",
                 known_task_type,
                 "--task-type",
                 unknown_task_type,
             ],
         )
 
 
 @pytest.mark.parametrize("flag_priority", ["--with-priority", "--without-priority"])
 def test_cli_task_runner_with_known_tasks(
     swh_scheduler, storage, caplog, flag_priority
 ):
     """Trigger runner with known tasks runs smoothly."""
 
     task_types = swh_scheduler.get_task_types()
     task_type_names = [t["type"] for t in task_types]
     task_type_name = random.choice(task_type_names)
     task_type_name2 = random.choice(task_type_names)
 
     # The runner will just iterate over the following known tasks and do noop. We are
     # just checking the runner does not explode here.
     result = invoke(
         swh_scheduler,
         False,
         [
             "start-runner",
             flag_priority,
             "--task-type",
             task_type_name,
             "--task-type",
             task_type_name2,
         ],
     )
 
     assert result.exit_code == 0, result.output
 
 
 def test_cli_task_runner_no_task(swh_scheduler, storage):
     """Trigger runner with no parameter should run as before."""
 
     # The runner will just iterate over the existing tasks from the scheduler and do
     # noop. We are just checking the runner does not explode here.
     result = invoke(
         swh_scheduler,
         False,
         [
             "start-runner",
         ],
     )
 
     assert result.exit_code == 0, result.output
diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py
index ad3cfd2..484176f 100644
--- a/swh/scheduler/tests/test_recurrent_visits.py
+++ b/swh/scheduler/tests/test_recurrent_visits.py
@@ -1,216 +1,271 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import timedelta
 import logging
 from queue import Queue
-from unittest.mock import MagicMock
+import time
 
 import pytest
+import yaml
 
 from swh.scheduler.celery_backend.recurrent_visits import (
     DEFAULT_DVCS_POLICY,
     VisitSchedulerThreads,
     grab_next_visits_policy_weights,
     send_visits_for_visit_type,
     spawn_visit_scheduler_thread,
     terminate_visit_scheduler_threads,
     visit_scheduler_thread,
 )
 
 from .test_cli import invoke
 
 TEST_MAX_QUEUE = 10000
 MODULE_NAME = "swh.scheduler.celery_backend.recurrent_visits"
 
 
 def _compute_backend_name(visit_type: str) -> str:
     "Build a dummy reproducible backend name"
     return f"swh.loader.{visit_type}.tasks"
 
 
 @pytest.fixture
 def swh_scheduler(swh_scheduler):
     """Override default fixture of the scheduler to install some more task types."""
     for visit_type in ["test-git", "test-hg", "test-svn"]:
         task_type = f"load-{visit_type}"
         swh_scheduler.create_task_type(
             {
                 "type": task_type,
                 "max_queue_length": TEST_MAX_QUEUE,
                 "description": "The {} testing task".format(task_type),
                 "backend_name": _compute_backend_name(visit_type),
                 "default_interval": timedelta(days=1),
                 "min_interval": timedelta(hours=6),
                 "max_interval": timedelta(days=12),
             }
         )
     return swh_scheduler
 
 
+@pytest.fixture
+def all_task_types(swh_scheduler):
+    return {
+        task_type_d["type"]: task_type_d
+        for task_type_d in swh_scheduler.get_task_types()
+    }
+
+
 def test_cli_schedule_recurrent_unknown_visit_type(swh_scheduler):
     """When passed an unknown visit type, the recurrent visit scheduler should refuse
     to start."""
 
     with pytest.raises(ValueError, match="Unknown"):
         invoke(
             swh_scheduler,
             False,
             [
                 "schedule-recurrent",
                 "--visit-type",
                 "unknown",
                 "--visit-type",
                 "test-git",
             ],
         )
 
 
 def test_cli_schedule_recurrent_noop(swh_scheduler, mocker):
     """When passing no visit types, the recurrent visit scheduler should start."""
 
     spawn_visit_scheduler_thread = mocker.patch(
         f"{MODULE_NAME}.spawn_visit_scheduler_thread"
     )
     spawn_visit_scheduler_thread.side_effect = SystemExit
     # The actual scheduling threads won't spawn, they'll immediately terminate. This
     # only exercises the logic to pull task types out of the database
 
     result = invoke(swh_scheduler, False, ["schedule-recurrent"])
     assert result.exit_code == 0, result.output
 
 
+def test_send_visits_for_type_no_origin_scheduled_backoff(
+    swh_scheduler, all_task_types, mocker
+):
+    visit_type = "test-git"
+    task_type = f"load-{visit_type}"
+    mocker.patch.object(time, "monotonic", lambda: 0)
+    no_origins_scheduled_backoff = 60
+    assert (
+        send_visits_for_visit_type(
+            swh_scheduler,
+            mocker.MagicMock(),
+            visit_type,
+            all_task_types[task_type],
+            DEFAULT_DVCS_POLICY,
+            no_origins_scheduled_backoff,
+        )
+        == no_origins_scheduled_backoff
+    )
+
+
+def test_cli_schedule_recurrent_no_origins_scheduled_backoff_in_config(
+    swh_scheduler, mocker
+):
+    """When passing no visit types, the recurrent visit scheduler should start."""
+
+    config = """
+    scheduler:
+        cls: foo
+        args: {}
+    no_origins_scheduled_backoff: 60
+    """
+
+    spawn_visit_scheduler_thread = mocker.patch(
+        f"{MODULE_NAME}.spawn_visit_scheduler_thread"
+    )
+    spawn_visit_scheduler_thread.side_effect = SystemExit
+    # The actual scheduling threads won't spawn, they'll immediately terminate. This
+    # only exercises the logic to pull task types out of the database
+
+    result = invoke(
+        swh_scheduler,
+        False,
+        ["schedule-recurrent", "--visit-type", "test-git"],
+        config=config,
+    )
+    assert result.exit_code == 0, result.output
+
+    assert yaml.safe_load(config) in spawn_visit_scheduler_thread.call_args[0]
+
+
 def test_recurrent_visit_scheduling(
     swh_scheduler,
     caplog,
     listed_origins_by_type,
+    all_task_types,
     mocker,
 ):
     """Scheduling known tasks is ok."""
 
     caplog.set_level(logging.DEBUG, MODULE_NAME)
     nb_origins = 1000
 
-    mock_celery_app = MagicMock()
+    mock_celery_app = mocker.MagicMock()
     mock_available_slots = mocker.patch(f"{MODULE_NAME}.get_available_slots")
     mock_available_slots.return_value = nb_origins  # Slots available in queue
 
     # Make sure the scheduler is properly configured in terms of visit/task types
-    all_task_types = {
-        task_type_d["type"]: task_type_d
-        for task_type_d in swh_scheduler.get_task_types()
-    }
-
     visit_types = list(listed_origins_by_type.keys())
     assert len(visit_types) > 0
 
     task_types = []
     origins = []
     for visit_type, _origins in listed_origins_by_type.items():
         origins.extend(swh_scheduler.record_listed_origins(_origins))
         task_type_name = f"load-{visit_type}"
         assert task_type_name in all_task_types.keys()
         task_type = all_task_types[task_type_name]
         task_type["visit_type"] = visit_type
         # we'll limit the orchestrator to the origins' type we know
         task_types.append(task_type)
 
     for visit_type in ["test-git", "test-svn"]:
         task_type = f"load-{visit_type}"
         send_visits_for_visit_type(
             swh_scheduler,
             mock_celery_app,
             visit_type,
             all_task_types[task_type],
             DEFAULT_DVCS_POLICY,
         )
 
     assert mock_available_slots.called, "The available slots functions should be called"
 
     records = [record.message for record in caplog.records]
 
     # Mapping over the dict ratio/policies entries can change overall order so let's
     # check the set of records
     expected_records = set()
     for task_type in task_types:
         visit_type = task_type["visit_type"]
         queue_name = task_type["backend_name"]
         msg = (
             f"{nb_origins} available slots for visit type {visit_type} "
             f"in queue {queue_name}"
         )
         expected_records.add(msg)
 
     for expected_record in expected_records:
         assert expected_record in set(records)
 
 
 @pytest.mark.parametrize(
     "visit_type, extras",
     [("test-hg", {}), ("test-git", {"tablesample": 0.1})],
 )
 def test_recurrent_visit_additional_parameters(
     swh_scheduler, mocker, visit_type, extras
 ):
     """Testing additional policy parameters"""
 
     mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits")
     mock_grab_next_visits.return_value = []
     policy_cfg = DEFAULT_DVCS_POLICY[:]
     for policy in policy_cfg:
         policy.update(extras)
     grab_next_visits_policy_weights(swh_scheduler, visit_type, 10, policy_cfg)
 
     for call in mock_grab_next_visits.call_args_list:
         assert call[1].get("tablesample") == extras.get("tablesample")
 
 
 @pytest.fixture
 def scheduler_config(swh_scheduler_config):
     return {"scheduler": {"cls": "postgresql", **swh_scheduler_config}, "celery": {}}
 
 
 def test_visit_scheduler_thread_unknown_task(
     swh_scheduler,
     scheduler_config,
 ):
     """Starting a thread with unknown task type reports the error"""
 
     unknown_visit_type = "unknown"
     command_queue = Queue()
     exc_queue = Queue()
 
     visit_scheduler_thread(
         scheduler_config, unknown_visit_type, command_queue, exc_queue
     )
 
     assert command_queue.empty() is True
     assert exc_queue.empty() is False
     assert len(exc_queue.queue) == 1
     result = exc_queue.queue.pop()
     assert result[0] == unknown_visit_type
     assert isinstance(result[1], ValueError)
 
 
 def test_spawn_visit_scheduler_thread_noop(scheduler_config, visit_types, mocker):
     """Spawning and terminating threads runs smoothly"""
 
     threads: VisitSchedulerThreads = {}
     exc_queue = Queue()
     mock_build_app = mocker.patch("swh.scheduler.celery_backend.config.build_app")
-    mock_build_app.return_value = MagicMock()
+    mock_build_app.return_value = mocker.MagicMock()
 
     assert len(threads) == 0
     for visit_type in visit_types:
         spawn_visit_scheduler_thread(threads, exc_queue, scheduler_config, visit_type)
 
     # This actually only checks the spawning and terminating logic is sound
 
     assert len(threads) == len(visit_types)
 
     actual_threads = terminate_visit_scheduler_threads(threads)
     assert not len(actual_threads)
 
     assert mock_build_app.called
diff --git a/swh/scheduler/tests/test_server.py b/swh/scheduler/tests/test_server.py
index 50c5b41..4156fac 100644
--- a/swh/scheduler/tests/test_server.py
+++ b/swh/scheduler/tests/test_server.py
@@ -1,100 +1,97 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 import yaml
 
 from swh.scheduler.api.server import load_and_check_config
 
 
 def prepare_config_file(tmpdir, content, name="config.yml"):
     """Prepare configuration file in `$tmpdir/name` with content `content`.
 
     Args:
         tmpdir (LocalPath): root directory
         content (str/dict): Content of the file either as string or as a dict.
                             If a dict, converts the dict into a yaml string.
         name (str): configuration filename
 
     Returns
         path (str) of the configuration file prepared.
 
     """
     config_path = tmpdir / name
     if isinstance(content, dict):  # convert if needed
         content = yaml.dump(content)
     config_path.write_text(content, encoding="utf-8")
     # pytest on python3.5 does not support LocalPath manipulation, so
     # convert path to string
     return str(config_path)
 
 
 @pytest.mark.parametrize("scheduler_class", [None, ""])
 def test_load_and_check_config_no_configuration(scheduler_class):
     """Inexistent configuration files raises"""
     with pytest.raises(EnvironmentError, match="Configuration file must be defined"):
         load_and_check_config(scheduler_class)
 
 
 def test_load_and_check_config_inexistent_fil():
     """Inexistent config filepath should raise"""
     config_path = "/some/inexistent/config.yml"
     expected_error = f"Configuration file {config_path} does not exist"
     with pytest.raises(FileNotFoundError, match=expected_error):
         load_and_check_config(config_path)
 
 
 def test_load_and_check_config_wrong_configuration(tmpdir):
     """Wrong configuration raises"""
     config_path = prepare_config_file(tmpdir, "something: useless")
     with pytest.raises(KeyError, match="Missing '%scheduler' configuration"):
         load_and_check_config(config_path)
 
 
 def test_load_and_check_config_remote_config_local_type_raise(tmpdir):
-    """Configuration without 'local' storage is rejected"""
-    config = {"scheduler": {"cls": "remote"}}
-    config_path = prepare_config_file(tmpdir, config)
-    expected_error = (
-        "The scheduler backend can only be started with a 'postgresql'" " configuration"
-    )
-    with pytest.raises(ValueError, match=expected_error):
-        load_and_check_config(config_path, type="local")
+    """Configuration without 'postgresql' storage is rejected"""
+    config_path = prepare_config_file(tmpdir, {"scheduler": {"cls": "remote"}})
+    with pytest.raises(ValueError, match="'postgresql'"):
+        load_and_check_config(config_path)
 
 
 def test_load_and_check_config_local_incomplete_configuration(tmpdir):
     """Incomplete 'local' configuration should raise"""
     config = {
         "scheduler": {
             "cls": "postgresql",
             "something": "needed-for-test",
         }
     }
 
     config_path = prepare_config_file(tmpdir, config)
     expected_error = "Invalid configuration; missing 'db' config entry"
     with pytest.raises(KeyError, match=expected_error):
         load_and_check_config(config_path)
 
 
-def test_load_and_check_config_local_config_fine(tmpdir):
+@pytest.mark.parametrize("clazz", ["local", "postgresql"])
+def test_load_and_check_config_local_config_fine(tmpdir, clazz):
     """Local configuration is fine"""
     config = {
         "scheduler": {
-            "cls": "postgresql",
+            "cls": clazz,
             "db": "db",
         }
     }
     config_path = prepare_config_file(tmpdir, config)
-    cfg = load_and_check_config(config_path, type="local")
+    cfg = load_and_check_config(config_path, type="postgresql")
     assert cfg == config
 
 
 def test_load_and_check_config_remote_config_fine(tmpdir):
     """Remote configuration is fine"""
     config = {"scheduler": {"cls": "remote"}}
     config_path = prepare_config_file(tmpdir, config)
     cfg = load_and_check_config(config_path, type="any")
     assert cfg == config