diff --git a/site-modules/profile/files/icinga2/plugins/check_prometheus_metric.sh b/site-modules/profile/files/icinga2/plugins/check_prometheus_metric similarity index 100% rename from site-modules/profile/files/icinga2/plugins/check_prometheus_metric.sh rename to site-modules/profile/files/icinga2/plugins/check_prometheus_metric diff --git a/site-modules/profile/manifests/icinga2/objects/agent_checks.pp b/site-modules/profile/manifests/icinga2/objects/agent_checks.pp index 5d33d3fc..071fc7fa 100644 --- a/site-modules/profile/manifests/icinga2/objects/agent_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/agent_checks.pp @@ -1,159 +1,159 @@ # Checks that need to be supported on icinga2 agents class profile::icinga2::objects::agent_checks { $prometheus_host = lookup('prometheus::server::fqdn') $prometheus_port = lookup('prometheus::server::listen_port') $prometheus_url = "http://${prometheus_host}:${prometheus_port}" $plugins = { 'check_journal' => { arguments => { '-f' => { 'value' => '$journal_cursor_file$', 'set_if' => '{{ var filename = macro("$journal_cursor_file$"); return len(filename) > 0 }}', }, '-w' => '$journal_lag_warn$', '-c' => '$journal_lag_crit$', '-wn' => { 'value' => '$journal_lag_entries_warn$', 'set_if' => '$journal_lag_entries_warn$', }, '-cn' => { 'value' => '$journal_lag_entries_crit$', 'set_if' => '$journal_lag_entries_crit$', }, }, vars => { 'journal_lag_warn' => 1200, 'journal_lag_crit' => 3600, }, sudo => true, sudo_user => 'root', }, 'check_newest_file_age' => { arguments => { '-d' => '$check_directory$', '-w' => '$check_directory_warn_age$', '-c' => '$check_directory_crit_age$', '-W' => { 'set_if' => '$check_directory_missing_warn$', }, '-C' => { 'set_if' => '$check_directory_missing_crit$', }, }, vars => { 'check_directory_warn_age' => 26, 'check_directory_crit_age' => 52, 'check_directory_missing_warn' => false, 'check_directory_missing_crit' => true, }, sudo => true, sudo_user => 'root', }, - 'check_prometheus_metric.sh' => { + 'check_prometheus_metric' => { arguments => { '-H' => '$check_prometheus_metric_url$', '-q' => '$check_prometheus_metric_query$', '-w' => '$check_prometheus_metric_warning$', '-c' => '$check_prometheus_metric_critical$', '-n' => '$check_prometheus_metric_name$', }, vars => { 'check_prometheus_metric_url' => $prometheus_url, } }, } $plugin_dir = '/usr/lib/nagios/plugins' $swh_plugin_dir = "${plugin_dir}/swh" $swh_plugin_configfile = '/etc/icinga2/conf.d/swh-plugins.conf' $packages = [ 'python3-nagiosplugin', 'python3-systemd', 'monitoring-plugins-basic', 'monitoring-plugins-systemd', # in swh repository ] package {$packages: ensure => present, } file {$swh_plugin_dir: ensure => 'directory', owner => 'root', group => 'root', mode => '0755', recurse => true, purge => true, require => Package[$packages], } $plugins.each |$command, $plugin| { $command_path = "${swh_plugin_dir}/${command}" file {$command_path: ensure => present, owner => 'root', group => 'root', mode => '0755', source => "puppet:///modules/profile/icinga2/plugins/${command}", require => Package[$packages], } if $plugin['sudo'] { $sudo_user = $plugin['sudo_user'] $icinga_command = ['sudo', '-u', $sudo_user, $command_path] ::sudo::conf { "icinga-${command}": ensure => present, content => "nagios ALL=(${sudo_user}) NOPASSWD: ${command_path}", priority => 50, } } else { $icinga_command = [$command_path] ::sudo::conf { "icinga-${command}": ensure => absent, } } ::icinga2::object::checkcommand {$command: import => ['plugin-check-command'], command => $icinga_command, arguments => $plugin['arguments'], vars => $plugin['vars'], target => $swh_plugin_configfile, } } $check_command = "check_systemd" $check_command_path = "${plugin_dir}/${check_command}" ::icinga2::object::checkcommand {$check_command: import => ['plugin-check-command'], command => [ $check_command_path ], arguments => { '--unit' => { value => '$systemd_units$', description => 'Name of the systemd units that are being tested.', repeat_key => true, }, '--exclude' => { value => '$systemd_excludes$', description => '-:"Name of the systemd units to exclude from checks can be a regular expression)."', repeat_key => true, }, '--no-startup-time' => { set_if => '{{ !macro("$systemd_check_startup_time$") }}', description => 'Whether to check the system startup time' }, }, vars => { systemd_units => [], systemd_excludes => [], systemd_check_startup_time => false, }, target => $swh_plugin_configfile, require => Package[$packages], } } diff --git a/site-modules/profile/manifests/icinga2/objects/static_checks.pp b/site-modules/profile/manifests/icinga2/objects/static_checks.pp index 2e320048..04d22a69 100644 --- a/site-modules/profile/manifests/icinga2/objects/static_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/static_checks.pp @@ -1,141 +1,142 @@ # Static checks on the icinga master class profile::icinga2::objects::static_checks { $checks_file = '/etc/icinga2/conf.d/static-checks.conf' ::icinga2::object::host {'www.softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'www.softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'graphql.staging.swh.network': import => ['generic-host'], check_command => 'dummy', address => 'graphql.staging.swh.network', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::service {'Software Heritage Homepage': import => ['generic-service'], host_name => 'www.softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'www.softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'Software Heritage', }, } ::icinga2::object::service {'Software Heritage Homepage (redirect to www)': import => ['generic-service'], host_name => 'softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, }, } ::icinga2::object::host {'swh-logging-prod': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'swh-logging-prod cluster': host_name => 'swh-logging-prod', check_command => 'check_escluster', target => $checks_file, } ::icinga2::object::checkcommand {'check_escluster': import => ['plugin-check-command'], command => '/usr/lib/nagios/plugins/icinga_check_elasticsearch.sh', target => $checks_file, } ::icinga2::object::host {'DNS resolvers': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'SOA': host_name => 'DNS resolvers', check_command => 'check_resolvers', target => $checks_file, } ::icinga2::object::checkcommand {'check_resolvers': import => ['plugin-check-command'], command => [ '/usr/lib/nagios/plugins/dsa-nagios-checks_checks_dsa-check-soas.txt', 'internal.softwareheritage.org', ], target => $checks_file, } + $prometheus_host = lookup('prometheus::server::fqdn') ::icinga2::object::service {'Postgresql replication lag (belvedere -> somerset)': - check_command => 'check_prometheus_metric.sh', + check_command => 'check_prometheus_metric', target => $checks_file, host_name => 'belvedere.internal.softwareheritage.org', vars => { check_prometheus_metric_name => 'pg replication_lag belvedere somerset', check_prometheus_query => profile::icinga2::literal_var( 'sum(sql_pg_stat_replication{instance="belvedere.internal.softwareheritage.org", host=":5433", application_name="softwareheritage_replica"})' ), check_prometheus_metric_warning => '1073741824', # 1GiB 1*1024*1024*1024 check_prometheus_metric_critical => '2147483648', # 2GiB 2*1024*1024*1024 }, } ::icinga2::object::service {'Software Heritage Staging Graphql Instance': import => ['generic-service'], host_name => 'graphql.staging.swh.network', check_command => 'http', target => $checks_file, vars => { http_vhost => 'graphql.staging.swh.network', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'GraphQL Playground', }, } }