diff --git a/swh/scheduler/simulator/__init__.py b/swh/scheduler/simulator/__init__.py --- a/swh/scheduler/simulator/__init__.py +++ b/swh/scheduler/simulator/__init__.py @@ -104,31 +104,40 @@ def fill_test_data(scheduler: SchedulerInterface, num_origins: int = 100000): """Fills the database with mock data to test the simulator.""" + from random import randint + + from swh.scheduler.utils import utcnow + stored_lister = scheduler.get_or_create_lister(name="example") assert stored_lister.id is not None - origins = [ - ListedOrigin( - lister_id=stored_lister.id, - url=f"https://example.com/{i:04d}.git", - visit_type="git", - last_update=datetime(2020, 6, 15, 16, 0, 0, i, tzinfo=timezone.utc), - ) - for i in range(num_origins) - ] - scheduler.record_listed_origins(origins) - - scheduler.create_tasks( - [ - { - **origin.as_task_dict(), - "policy": "recurring", - "next_run": origin.last_update, - "interval": timedelta(days=64), - } - for origin in origins + maxts = int(utcnow().timestamp()) + + while num_origins: + batch_size = min(num_origins, 10_000) + origins = [ + ListedOrigin( + lister_id=stored_lister.id, + url=f"https://example.com/{i:04d}.git", + visit_type="git", + last_update=datetime.fromtimestamp(randint(0, maxts), tz=timezone.utc), + ) + for i in range(num_origins, num_origins - batch_size, -1) ] - ) + scheduler.record_listed_origins(origins) + + scheduler.create_tasks( + [ + { + **origin.as_task_dict(), + "policy": "recurring", + "next_run": origin.last_update, + "interval": timedelta(days=64), + } + for origin in origins + ] + ) + num_origins -= batch_size def run(