diff --git a/swh/scheduler/cli/simulator.py b/swh/scheduler/cli/simulator.py --- a/swh/scheduler/cli/simulator.py +++ b/swh/scheduler/cli/simulator.py @@ -46,8 +46,11 @@ ) @click.option("--runtime", "-t", type=float, help="Simulated runtime") @click.option("--histogram/--no-histogram", "-H", help="Show histograms results") +@click.option( + "--csv", "-o", "csvfile", type=click.File("w"), help="Export results in a CSV file" +) @click.pass_context -def run_command(ctx, scheduler, policy, runtime, histogram): +def run_command(ctx, scheduler, policy, runtime, histogram, csvfile): """Run the scheduler simulator. By default, the simulation runs forever. You can cap the simulated runtime @@ -69,3 +72,5 @@ ) print(report.format(with_histogram=histogram)) + if csvfile is not None: + report.metrics_csv(csvfile) diff --git a/swh/scheduler/simulator/common.py b/swh/scheduler/simulator/common.py --- a/swh/scheduler/simulator/common.py +++ b/swh/scheduler/simulator/common.py @@ -3,10 +3,11 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import csv from dataclasses import dataclass, field from datetime import datetime, timedelta import textwrap -from typing import Dict, List, Tuple +from typing import Dict, List, TextIO, Tuple import uuid import plotille @@ -30,7 +31,12 @@ """Collected visit runtimes for each (status, eventful) tuple""" metrics: List[Tuple[datetime, List[SchedulerMetrics]]] = field(default_factory=list) - """Collected scheduler metrics for every timestamp""" + """Collected scheduler metrics + + This is a list of couples (timestamp, [SchedulerMetrics,]): the list of + metrics collected at given timestamp. + + """ def record_visit(self, duration: float, eventful: bool, status: str) -> None: self.total_visits += 1 @@ -65,6 +71,33 @@ return figure.show(legend=True) + def metrics_csv(self, fobj: TextIO) -> None: + """Export metrics in a csv file""" + csv_writer = csv.writer(fobj) + csv_writer.writerow( + [ + "timestamp", + "know_origins", + "enabled_origins", + "never_visited_origins", + "origins_with_pending_changes", + ] + ) + + timestamps, metric_lists = zip(*self.metrics) + known = (sum(m.origins_known for m in metrics) for metrics in metric_lists) + enabled = (sum(m.origins_enabled for m in metrics) for metrics in metric_lists) + never_visited = ( + sum(m.origins_never_visited for m in metrics) for metrics in metric_lists + ) + pending_changes = ( + sum(m.origins_with_pending_changes for m in metrics) + for metrics in metric_lists + ) + csv_writer.writerows( + zip(timestamps, known, enabled, never_visited, pending_changes) + ) + def format(self, with_histogram=True): full_visits = self.visit_runtimes.get(("full", True), []) long_tasks = sum(runtime > self.DURATION_THRESHOLD for runtime in full_visits)