diff --git a/grafanalib-dashboards/Makefile b/grafanalib-dashboards/Makefile new file mode 100644 index 0000000..736d811 --- /dev/null +++ b/grafanalib-dashboards/Makefile @@ -0,0 +1,21 @@ +# Generate json dashboards + +TARGETS= \ + grafanalib.cpu.usage.json \ + grafanalib.disk.iops.json \ + grafanalib.disk.latency.json \ + grafanalib.disk.throughput.json \ + grafanalib.filesystem.sizes.json \ + grafanalib.loadavg.json \ + grafanalib.memory.usage.json \ + grafanalib.network.traffic.json \ + grafanalib.swap.json \ + grafanalib.uptime.json + +default: $(TARGETS) + +%.json: %.py + generate-dashboard -o $@ $< + +clean: + rm $(TARGETS) diff --git a/grafanalib-dashboards/README.txt b/grafanalib-dashboards/README.txt new file mode 100644 index 0000000..1ea3fd4 --- /dev/null +++ b/grafanalib-dashboards/README.txt @@ -0,0 +1,25 @@ +Task: Grafana dashboards +======================== + +Goal: replace munin graphs + + +1. Grafanalib installation +-------------------------- + + pip3 install --user grafanalib + export PATH=$PATH:~/.local/bin + + +2. Define grafana graph programmaticaly +--------------------------------------- + +Grafanalib invocation: + + generate-dashboard -o frontend.json frontend.dashboard.py + + +3. Import dashboard into Grafana +-------------------------------- + + Cross on top of the left panel => "Import" diff --git a/grafanalib-dashboards/grafanalib.cpu.usage.py b/grafanalib-dashboards/grafanalib.cpu.usage.py new file mode 100644 index 0000000..8ae0d44 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.cpu.usage.py @@ -0,0 +1,99 @@ +from grafanalib.core import * + +def cpu_usage_graph(target, cpu=0, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='CPU %s usage' % (cpu,), + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + stack=True, + percentage=True, + targets=[ + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="system"}[5m])' % (target, cpu), + legendFormat="system time", + refId='A', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="irq"}[5m])' % (target, cpu), + legendFormat="irq", + refId='B', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="softirq"}[5m])' % (target, cpu), + legendFormat="softirq", + refId='C', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="user"}[5m])' % (target, cpu), + legendFormat="user time", + refId='D', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="nice"}[5m])' % (target, cpu), + legendFormat="nice", + refId='E', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="idle"}[5m])' % (target, cpu), + legendFormat="idle", + refId='F', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="iowait"}[5m])' % (target, cpu), + legendFormat="iowait", + refId='G', + ), + Target( + expr='irate(node_cpu_seconds_total{instance="%s",cpu="%s",mode="steal"}[5m])' % (target, cpu), + legendFormat="steal", + refId='H', + ), + ], + yAxes=[ + YAxis(format='percentunit'), + YAxis(format=SHORT_FORMAT), + ], + ) + +dashboard = Dashboard( + title="CPU usage auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_uname_info', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="cpu", + label="", + query='node_cpu_seconds_total{instance="$target"}', + regex='/cpu="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide = 2, + ), + ]), + rows=[ + Row( + title = 'CPU $cpu', + panels=[ + cpu_usage_graph('$target', '$cpu'), + cpu_usage_graph('$target', '$cpu', "1y"), + ], + repeat = 'cpu', + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.disk.iops.py b/grafanalib-dashboards/grafanalib.disk.iops.py new file mode 100644 index 0000000..17a65d8 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.disk.iops.py @@ -0,0 +1,68 @@ +from grafanalib.core import * + +def device_iops_graph(target, device, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='IOs for %s' % (device,), + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='rate(node_disk_reads_completed_total{instance="%s",device="%s"}[5m])' % (target, device), + legendFormat="reads per second", + refId='A', + ), + Target( + expr='rate(node_disk_writes_completed_total{instance="%s",device="%s"}[5m])' % (target, device), + legendFormat="writes per second", + refId='B', + ), + ], + yAxes=[ + YAxis(format=SHORT_FORMAT), + YAxis(format=SHORT_FORMAT), + ], + legend=Legend(max=True, min=True, avg=True, current=True), + ) + +dashboard = Dashboard( + title="Diskstat iops auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_disk_io_now', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="device", + label="", + query='node_disk_io_now{instance="$target"}', + regex='/device="([sd|vd|dm|md][a-z0-9-]*)/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide = 2, + ), + ]), + rows=[ + Row( + title = '$device device', + panels=[ + device_iops_graph('$target', '$device'), + device_iops_graph('$target', '$device', "1y"), + ], + repeat = 'device', + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.disk.latency.py b/grafanalib-dashboards/grafanalib.disk.latency.py new file mode 100644 index 0000000..5d534fd --- /dev/null +++ b/grafanalib-dashboards/grafanalib.disk.latency.py @@ -0,0 +1,63 @@ +from grafanalib.core import * + +def device_latency_graph(target, device, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Average latency for %s' % (device,), + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='rate(node_disk_io_time_weighted_seconds_total{instance="%s",device="%s"}[5m])' % (target, device), + legendFormat="io_time_weighted_seconds rate", + refId='A', + ), + ], + yAxes=[ + YAxis(format='ms'), + YAxis(format=SHORT_FORMAT), + ], + legend=Legend(max=True, min=True, avg=True, current=True), + ) + +dashboard = Dashboard( + title="Diskstat latency auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_disk_io_now', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="device", + label="", + query='node_disk_io_now{instance="$target"}', + regex='/device="([sd|vd|dm|md][a-z0-9-]*)/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide=2, + ), + ]), + rows=[ + Row( + title = '$device device', + panels=[ + device_latency_graph('$target', '$device'), + device_latency_graph('$target', '$device', "1y"), + ], + repeat = 'device', + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.disk.throughput.py b/grafanalib-dashboards/grafanalib.disk.throughput.py new file mode 100644 index 0000000..577f982 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.disk.throughput.py @@ -0,0 +1,68 @@ +from grafanalib.core import * + +def device_tput_graph(target, device, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Throughput for %s' % (device,), + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='irate(node_disk_read_bytes_total{instance="%s",device="%s"}[5m])' % (target, device), + legendFormat="bytes read per second", + refId='A', + ), + Target( + expr='irate(node_disk_written_bytes_total{instance="%s",device="%s"}[5m])' % (target, device), + legendFormat="bytes written per second", + refId='B', + ), + ], + yAxes=[ + YAxis(format='bytes'), + YAxis(format=SHORT_FORMAT), + ], + legend=Legend(max=True, min=True, avg=True, current=True), + ) + +dashboard = Dashboard( + title="Diskstat throughput auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_disk_io_now', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="device", + label="", + query='node_disk_io_now{instance="$target"}', + regex='/device="([sd|vd|dm|md][a-z0-9-]*)/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide = 2, + ), + ]), + rows=[ + Row( + title = '$device device', + panels=[ + device_tput_graph('$target', '$device'), + device_tput_graph('$target', '$device', "1y"), + ], + repeat = 'device', + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.filesystem.sizes.py b/grafanalib-dashboards/grafanalib.filesystem.sizes.py new file mode 100644 index 0000000..a2abf7c --- /dev/null +++ b/grafanalib-dashboards/grafanalib.filesystem.sizes.py @@ -0,0 +1,70 @@ +from grafanalib.core import * + +def filesystem_graph(target, mountpoint, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Size of %s' % (mountpoint,), + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='node_filesystem_size_bytes{instance="%s",mountpoint="%s"}' % (target, mountpoint), + legendFormat="Filesystem size", + refId='A', + ), + Target( + expr='node_filesystem_size_bytes{instance="%s",mountpoint="%s"} - ' \ + 'node_filesystem_avail_bytes{instance="%s",mountpoint="%s"}' \ + % (target, mountpoint, target, mountpoint), + legendFormat="Used space", + refId='B', + ), + ], + yAxes=[ + YAxis(format='bytes'), + YAxis(format=SHORT_FORMAT), + ], + ) + +dashboard = Dashboard( + title="Filesystem sizes auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_filesystem_size_bytes', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="filesystem", + label="", + query='node_filesystem_size_bytes{instance="$target"}', + regex='/mountpoint="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide = 2, + ), + ]), + rows=[ + Row( + title = '$filesystem', + panels=[ + filesystem_graph('$target', '$filesystem'), + filesystem_graph('$target', '$filesystem', "1y"), + ], + repeat = 'filesystem', + ), + ],).auto_panel_ids() + diff --git a/grafanalib-dashboards/grafanalib.loadavg.py b/grafanalib-dashboards/grafanalib.loadavg.py new file mode 100644 index 0000000..f48b6ec --- /dev/null +++ b/grafanalib-dashboards/grafanalib.loadavg.py @@ -0,0 +1,51 @@ +from grafanalib.core import * + +def loadavg_graph(target, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Load average', + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='node_load5{instance="%s"}' % (target), + legendFormat="5m load average", + refId='A', + ), + ], + yAxes=[ + YAxis(format=SHORT_FORMAT), + YAxis(format=SHORT_FORMAT), + ], + ) + +dashboard = Dashboard( + title="Loadavg auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_load5', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + ]), + rows=[ + Row( + title = 'Load average', + panels=[ + loadavg_graph('$target'), + loadavg_graph('$target', "1y"), + ], + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.memory.usage.py b/grafanalib-dashboards/grafanalib.memory.usage.py new file mode 100644 index 0000000..637d4a7 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.memory.usage.py @@ -0,0 +1,75 @@ +from grafanalib.core import * + +def memory_graph(target, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Memory usage', + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + stack=True, + tooltip=Tooltip(valueType=INDIVIDUAL), + targets=[ + Target( + expr='node_memory_MemTotal_bytes{instance="%s"}' % (target), + legendFormat="Total memory", + refId='A', + ), + Target( + expr='node_memory_MemTotal_bytes{instance="%s"} - \ + node_memory_MemAvailable_bytes{instance="%s"}' % (target, target), + legendFormat="Used memory", + refId='B', + ), + Target( + expr='node_memory_Cached_bytes{instance="%s"}' % (target), + legendFormat="File cache", + refId='C', + ), + Target( + expr='node_memory_SwapTotal_bytes{instance="%s"} - \ + node_memory_SwapFree_bytes{instance="%s"}' % (target, target), + legendFormat="Used swap", + refId='D', + ), + ], + seriesOverrides = [ + {"alias": "Total memory", "stack": "false"}, + {"alias": "Used swap", "stack": "false", "fill": "10"}, + ], + yAxes=[ + YAxis(format='bytes'), + YAxis(format=SHORT_FORMAT), + ], + legend=Legend(min=True, max=True, avg=True, current=True), + ) + +dashboard = Dashboard( + title="Memory usage auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_uname_info', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + ]), + rows=[ + Row( + title = 'Memory', + panels=[ + memory_graph('$target'), + memory_graph('$target', "1y"), + ], + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.network.traffic.py b/grafanalib-dashboards/grafanalib.network.traffic.py new file mode 100644 index 0000000..3511350 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.network.traffic.py @@ -0,0 +1,70 @@ +from grafanalib.core import * + +def network_graph(target, device, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Network traffic', + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + stack=True, + tooltip=Tooltip(valueType=INDIVIDUAL), + targets=[ + Target( + expr='irate(node_network_receive_bytes_total{instance="%s",device="%s"}[5m]) * 8' % (target, device), + legendFormat="bytes per second in", + refId='A', + ), + Target( + expr='irate(node_network_transmit_bytes_total{instance="%s",device="%s"}[5m]) * 8' % (target, device), + legendFormat="bytes per second out", + refId='B', + ), + ], + yAxes=[ + YAxis(format='bits'), + YAxis(format=SHORT_FORMAT), + ], + legend=Legend(max=True, min=True, avg=True, current=True), + ) + +dashboard = Dashboard( + title="Network traffic auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_network_receive_bytes_total', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="interface", + label="", + query='node_network_receive_bytes_total{instance="$target"}', + regex='/device="([^"]*)/', + dataSource='Prometheus (Pergamon)', + includeAll=True, + default="All", + hide = 2, + ), + ]), + rows=[ + Row( + title = '$interface traffic', + panels=[ + network_graph('$target', "$interface"), + network_graph('$target', "$interface", "1y"), + ], + repeat = 'interface', + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.swap.py b/grafanalib-dashboards/grafanalib.swap.py new file mode 100644 index 0000000..8f556b4 --- /dev/null +++ b/grafanalib-dashboards/grafanalib.swap.py @@ -0,0 +1,56 @@ +from grafanalib.core import * + +def swap_activity_graph(target, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Swap activity', + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='irate(node_vmstat_pswpin{instance="%s"}[5m])' % (target), + legendFormat="pages per second in", + refId='A', + ), + Target( + expr='irate(node_vmstat_pswpout{instance="%s"}[5m])' % (target), + legendFormat="pages per second out", + refId='B', + ), + ], + yAxes=[ + YAxis(format=SHORT_FORMAT), + YAxis(format=SHORT_FORMAT), + ], + ) + +dashboard = Dashboard( + title="Swap usage auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_memory_SwapTotal_bytes', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + ]), + rows=[ + Row( + title = 'Swap', + panels=[ + swap_activity_graph('$target'), + swap_activity_graph('$target', "1y"), + ], + ), + ],).auto_panel_ids() diff --git a/grafanalib-dashboards/grafanalib.uptime.py b/grafanalib-dashboards/grafanalib.uptime.py new file mode 100644 index 0000000..4e0000b --- /dev/null +++ b/grafanalib-dashboards/grafanalib.uptime.py @@ -0,0 +1,51 @@ +from grafanalib.core import * + +def uptime_graph(target, time_from=None): + if time_from is not None: + timeFrom = "%s" % (time_from,) + else: + timeFrom = None + return Graph( + title='Uptime', + dataSource="Prometheus (Pergamon)", + timeFrom=timeFrom, + targets=[ + Target( + expr='time() - node_boot_time_seconds{instance="%s"}' % (target), + legendFormat="uptime", + refId='A', + ), + ], + yAxes=[ + YAxis(format=SECONDS_FORMAT), + YAxis(format=SHORT_FORMAT), + ], + ) + +dashboard = Dashboard( + title="Uptime auto-generated", + templating=Templating(list=[ + Template( + name="host", + label="", + query='node_uname_info{instance="$target"}', + regex='/nodename="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + Template( + name="target", + label="", + query='node_boot_time_seconds', + regex='/instance="([^"]*)"/', + dataSource='Prometheus (Pergamon)', + ), + ]), + rows=[ + Row( + title = 'Uptime', + panels=[ + uptime_graph('$target'), + uptime_graph('$target', "1y"), + ], + ), + ],).auto_panel_ids()