diff --git a/swh/scheduler/cli/simulator.py b/swh/scheduler/cli/simulator.py --- a/swh/scheduler/cli/simulator.py +++ b/swh/scheduler/cli/simulator.py @@ -57,8 +57,11 @@ "showplots", help="Show results as plots (with plotille)", ) +@click.option( + "--csv", "-o", "csvfile", type=click.File("w"), help="Export results in a CSV file" +) @click.pass_context -def run_command(ctx, scheduler, policy, runtime, showplots): +def run_command(ctx, scheduler, policy, runtime, showplots, csvfile): """Run the scheduler simulator. By default, the simulation runs forever. You can cap the simulated runtime @@ -80,3 +83,5 @@ ) print(report.format(with_plots=showplots)) + if csvfile is not None: + report.metrics_csv(csvfile) diff --git a/swh/scheduler/simulator/common.py b/swh/scheduler/simulator/common.py --- a/swh/scheduler/simulator/common.py +++ b/swh/scheduler/simulator/common.py @@ -3,10 +3,11 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import csv from dataclasses import dataclass, field from datetime import datetime, timedelta import textwrap -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, TextIO, Tuple import uuid import plotille @@ -32,7 +33,11 @@ scheduler_metrics: List[Tuple[datetime, List[SchedulerMetrics]]] = field( default_factory=list ) - """Collected scheduler metrics for every timestamp""" + """Collected scheduler metrics + + This is a list of couples (timestamp, [SchedulerMetrics,]): the list of + scheduler metrics collected at given timestamp. + """ visit_metrics: List[Tuple[datetime, int]] = field(default_factory=list) """Collected visit metrics over time""" @@ -90,6 +95,33 @@ return figure.show(legend=True) + def metrics_csv(self, fobj: TextIO) -> None: + """Export scheduling metrics in a csv file""" + csv_writer = csv.writer(fobj) + csv_writer.writerow( + [ + "timestamp", + "known_origins", + "enabled_origins", + "never_visited_origins", + "origins_with_pending_changes", + ] + ) + + timestamps, metric_lists = zip(*self.scheduler_metrics) + known = (sum(m.origins_known for m in metrics) for metrics in metric_lists) + enabled = (sum(m.origins_enabled for m in metrics) for metrics in metric_lists) + never_visited = ( + sum(m.origins_never_visited for m in metrics) for metrics in metric_lists + ) + pending_changes = ( + sum(m.origins_with_pending_changes for m in metrics) + for metrics in metric_lists + ) + csv_writer.writerows( + zip(timestamps, known, enabled, never_visited, pending_changes) + ) + def format(self, with_plots=True): full_visits = self.visit_runtimes.get(("full", True), []) long_tasks = sum(runtime > self.DURATION_THRESHOLD for runtime in full_visits) diff --git a/swh/scheduler/tests/test_simulator.py b/swh/scheduler/tests/test_simulator.py --- a/swh/scheduler/tests/test_simulator.py +++ b/swh/scheduler/tests/test_simulator.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import io + import pytest from swh.core.api.classes import stream_results @@ -62,3 +64,6 @@ # just check these SimulationReport methods do not crash assert report.format(with_plots=True) assert report.format(with_plots=False) + fobj = io.StringIO() + report.metrics_csv(fobj=fobj) + assert fobj.getvalue()