Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/simulator/common.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import csv | |||||
from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||
from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||
import textwrap | import textwrap | ||||
from typing import Dict, List, Optional, Tuple | from typing import Dict, List, Optional, TextIO, Tuple | ||||
import uuid | import uuid | ||||
import plotille | import plotille | ||||
from simpy import Environment as _Environment | from simpy import Environment as _Environment | ||||
from simpy import Store | from simpy import Store | ||||
from swh.model.model import OriginVisitStatus | from swh.model.model import OriginVisitStatus | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
Show All 9 Lines | class SimulationReport: | ||||
"""Total count of finished visits""" | """Total count of finished visits""" | ||||
visit_runtimes: Dict[Tuple[str, bool], List[float]] = field(default_factory=dict) | visit_runtimes: Dict[Tuple[str, bool], List[float]] = field(default_factory=dict) | ||||
"""Collected visit runtimes for each (status, eventful) tuple""" | """Collected visit runtimes for each (status, eventful) tuple""" | ||||
scheduler_metrics: List[Tuple[datetime, List[SchedulerMetrics]]] = field( | scheduler_metrics: List[Tuple[datetime, List[SchedulerMetrics]]] = field( | ||||
default_factory=list | default_factory=list | ||||
) | ) | ||||
"""Collected scheduler metrics for every timestamp""" | """Collected scheduler metrics | ||||
This is a list of couples (timestamp, [SchedulerMetrics,]): the list of | |||||
scheduler metrics collected at given timestamp. | |||||
""" | |||||
visit_metrics: List[Tuple[datetime, int]] = field(default_factory=list) | visit_metrics: List[Tuple[datetime, int]] = field(default_factory=list) | ||||
"""Collected visit metrics over time""" | """Collected visit metrics over time""" | ||||
latest_snapshots: Dict[Tuple[str, str], bytes] = field(default_factory=dict) | latest_snapshots: Dict[Tuple[str, str], bytes] = field(default_factory=dict) | ||||
"""Collected latest snapshots for origins""" | """Collected latest snapshots for origins""" | ||||
def record_visit( | def record_visit( | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def metrics_plot(self) -> str: | ||||
figure.scatter(timestamps, known, label="Known origins") | figure.scatter(timestamps, known, label="Known origins") | ||||
figure.scatter(timestamps, never_visited, label="Origins never visited") | figure.scatter(timestamps, never_visited, label="Origins never visited") | ||||
visit_timestamps, n_visits = zip(*self.visit_metrics) | visit_timestamps, n_visits = zip(*self.visit_metrics) | ||||
figure.scatter(visit_timestamps, n_visits, label="Visits over time") | figure.scatter(visit_timestamps, n_visits, label="Visits over time") | ||||
return figure.show(legend=True) | return figure.show(legend=True) | ||||
def metrics_csv(self, fobj: TextIO) -> None: | |||||
"""Export scheduling metrics in a csv file""" | |||||
csv_writer = csv.writer(fobj) | |||||
csv_writer.writerow( | |||||
[ | |||||
"timestamp", | |||||
olasd: known | |||||
"know_origins", | |||||
"enabled_origins", | |||||
"never_visited_origins", | |||||
"origins_with_pending_changes", | |||||
] | |||||
) | |||||
timestamps, metric_lists = zip(*self.scheduler_metrics) | |||||
known = (sum(m.origins_known for m in metrics) for metrics in metric_lists) | |||||
enabled = (sum(m.origins_enabled for m in metrics) for metrics in metric_lists) | |||||
never_visited = ( | |||||
sum(m.origins_never_visited for m in metrics) for metrics in metric_lists | |||||
) | |||||
pending_changes = ( | |||||
sum(m.origins_with_pending_changes for m in metrics) | |||||
for metrics in metric_lists | |||||
) | |||||
csv_writer.writerows( | |||||
zip(timestamps, known, enabled, never_visited, pending_changes) | |||||
) | |||||
Not Done Inline ActionsInstead of zipping twice, we can probably just do the sums in a loop for each timestamp. olasd: Instead of zipping twice, we can probably just do the sums in a loop for each timestamp. | |||||
Done Inline Actionswell everything in this pipeline is a generator, so I see no harm in "double zipping" there. douardda: well everything in this pipeline is a generator, so I see no harm in "double zipping" there. | |||||
def format(self, with_plots=True): | def format(self, with_plots=True): | ||||
full_visits = self.visit_runtimes.get(("full", True), []) | full_visits = self.visit_runtimes.get(("full", True), []) | ||||
long_tasks = sum(runtime > self.DURATION_THRESHOLD for runtime in full_visits) | long_tasks = sum(runtime > self.DURATION_THRESHOLD for runtime in full_visits) | ||||
output = textwrap.dedent( | output = textwrap.dedent( | ||||
f"""\ | f"""\ | ||||
Total visits: {self.total_visits} | Total visits: {self.total_visits} | ||||
Uneventful visits: {self.uneventful_visits} | Uneventful visits: {self.uneventful_visits} | ||||
▲ Show 20 Lines • Show All 59 Lines • Show Last 20 Lines |
known