diff --git a/site-modules/profile/manifests/icinga2/objects/agent_checks.pp b/site-modules/profile/manifests/icinga2/objects/agent_checks.pp index ad88aee2..923f2c05 100644 --- a/site-modules/profile/manifests/icinga2/objects/agent_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/agent_checks.pp @@ -1,154 +1,169 @@ # Checks that need to be supported on icinga2 agents class profile::icinga2::objects::agent_checks { $prometheus_port = lookup('prometheus::server::listen_port') $prometheus_url = "pergamon.internal.softwareheritage.org:${prometheus_port}" $plugins = { 'check_journal' => { arguments => { '-f' => { 'value' => '$journal_cursor_file$', 'set_if' => '{{ var filename = macro("$journal_cursor_file$"); return len(filename) > 0 }}', }, '-w' => '$journal_lag_warn$', '-c' => '$journal_lag_crit$', '-wn' => { 'value' => '$journal_lag_entries_warn$', 'set_if' => '$journal_lag_entries_warn$', }, '-cn' => { 'value' => '$journal_lag_entries_crit$', 'set_if' => '$journal_lag_entries_crit$', }, }, vars => { 'journal_lag_warn' => 1200, 'journal_lag_crit' => 3600, }, sudo => true, sudo_user => 'journalbeat', }, 'check_newest_file_age' => { arguments => { '-d' => '$check_directory$', '-w' => '$check_directory_warn_age$', '-c' => '$check_directory_crit_age$', '-W' => { 'set_if' => '$check_directory_missing_warn$', }, '-C' => { 'set_if' => '$check_directory_missing_crit$', }, }, vars => { 'check_directory_warn_age' => 26, 'check_directory_crit_age' => 52, 'check_directory_missing_warn' => false, 'check_directory_missing_crit' => true, }, sudo => true, sudo_user => 'root', }, 'check_prometheus_metric.sh' => { arguments => { '-H' => '$check_prometheus_metric_url$', '-q' => '$check_prometheus_metric_query$', '-w' => '$check_prometheus_metric_warning$', '-c' => '$check_prometheus_metric_critical$', '-n' => '$check_prometheus_metric_name$', }, vars => { 'check_prometheus_metric_url' => $prometheus_url, } }, 'check_belvedere_replication_lag.sh' => { arguments => { '-H' => '$check_prometheus_metric_url$', '-w' => '$check_prometheus_metric_warning$', '-c' => '$check_prometheus_metric_critical$', '-n' => '$check_prometheus_metric_name$', }, vars => { 'check_prometheus_metric_url' => $prometheus_url, } } } $plugin_dir = '/usr/lib/nagios/plugins' $swh_plugin_dir = "${plugin_dir}/swh" $swh_plugin_configfile = '/etc/icinga2/conf.d/swh-plugins.conf' $packages = [ 'python3-nagiosplugin', 'python3-systemd', 'monitoring-plugins-basic', 'monitoring-plugins-systemd', # in swh repository ] package {$packages: ensure => present, } file {$swh_plugin_dir: ensure => 'directory', owner => 'root', group => 'root', mode => '0755', recurse => true, purge => true, require => Package[$packages], } $plugins.each |$command, $plugin| { $command_path = "${swh_plugin_dir}/${command}" file {$command_path: ensure => present, owner => 'root', group => 'root', mode => '0755', source => "puppet:///modules/profile/icinga2/plugins/${command}", require => Package[$packages], } if $plugin['sudo'] { $sudo_user = $plugin['sudo_user'] $icinga_command = ['sudo', '-u', $sudo_user, $command_path] ::sudo::conf { "icinga-${command}": ensure => present, content => "nagios ALL=(${sudo_user}) NOPASSWD: ${command_path}", priority => 50, } } else { $icinga_command = [$command_path] ::sudo::conf { "icinga-${command}": ensure => absent, } } ::icinga2::object::checkcommand {$command: import => ['plugin-check-command'], command => $icinga_command, arguments => $plugin['arguments'], vars => $plugin['vars'], target => $swh_plugin_configfile, } } $check_command = "check_systemd" $check_command_path = "${plugin_dir}/${check_command}" ::icinga2::object::checkcommand {$check_command: import => ['plugin-check-command'], command => [ $check_command_path ], arguments => { - '--unit' => { - value => '$check_systemd_unit$', - description => 'Name of the systemd unit that is being tested.', + '--unit' => { + value => '$systemd_units$', + description => 'Name of the systemd units that are being tested.', + repeat_key => true, }, + '--exclude' => { + value => '$systemd_excludes$', + description => 'Name of the systemd units to exclude from checks (can be a regular expression).', + repeat_key => true, + }, + '--no-startup-time' => { + set_if => '{{ !macro("$systemd_check_startup_time") }}', + description => 'Whether to check the system startup time' + }, + }, + vars => { + systemd_units => [], + systemd_excludes => [], + systemd_check_startup_time => false, }, target => $swh_plugin_configfile, require => Package[$packages], } } diff --git a/site-modules/profile/manifests/icinga2/objects/common_checks.pp b/site-modules/profile/manifests/icinga2/objects/common_checks.pp index 089f3013..2ab78f7b 100644 --- a/site-modules/profile/manifests/icinga2/objects/common_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/common_checks.pp @@ -1,149 +1,149 @@ # Icinga2 common check definitions class profile::icinga2::objects::common_checks { $service_configuration = lookup('icinga2::service_configuration') $target_file = '/etc/icinga2/zones.d/global-templates/services.conf' # Done locally on the master ::icinga2::object::service {'ping4': import => ['generic-service'], apply => true, check_command => 'ping4', assign => ['host.address'], target => $target_file, } ::icinga2::object::service {'linux-ssh': import => ['generic-service'], apply => true, check_command => 'ssh', assign => ['host.vars.os == Linux'], target => $target_file, } # Done remotely on the client: command_endpoint = host.name. each($service_configuration['load']) |$name, $vars| { if $name == 'default' { $assign = 'host.vars.os == Linux' $ignore = 'host.vars.noagent || host.vars.load' } else { $assign = "host.vars.os == Linux && host.vars.load == ${name}" $ignore = 'host.vars.noagent' } ::icinga2::object::service {"linux_load_${name}": import => ['generic-service'], service_name => 'load', apply => true, check_command => 'load', command_endpoint => 'host.name', assign => [$assign], ignore => [$ignore], target => $target_file, vars => $vars, } } ::icinga2::object::service {'linux_disks': import => ['generic-service'], apply => 'disk_name => config in host.vars.disks', check_command => 'disk', command_endpoint => 'host.name', vars => 'vars + config', assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'apt': import => ['generic-service'], apply => true, check_command => 'apt', command_endpoint => 'host.name', check_interval => '3h', vars => { apt_timeout => '120', apt_only_critical => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'ntp': import => ['generic-service'], apply => true, check_command => 'ntp_peer', command_endpoint => 'host.name', vars => { ntp_address => 'localhost', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'journalbeat': import => ['generic-service'], apply => true, check_command => 'check_journal', command_endpoint => 'host.name', assign => ['host.vars.os == Linux'], ignore => ['-:"check_journal" !in host.vars.plugins', 'host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'puppet_agent': import => ['generic-service'], apply => true, check_command => 'file_age', command_endpoint => 'host.name', vars => { file_age_file => '/var/lib/puppet/state/agent_disabled.lock', file_age_warning_time => '14400', # in seconds, warning after 4h file_age_critical_time => '86400', # in seconds, critical after 24h file_age_ignoremissing => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'check_systemd': import => ['generic-service'], apply => true, name => "Check systemd state", check_command => "check_systemd", command_endpoint => 'host.name', assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'check_postfix': import => ['generic-service'], apply => true, name => "Check postfix service", check_command => "check_systemd", command_endpoint => 'host.name', assign => ['host.vars.os == Linux'], vars => { - check_systemd_unit => 'postfix@-.service', + systemd_units => ['postfix@-.service'], }, ignore => ['host.vars.noagent'], target => $target_file, } ::icinga2::object::service {'logstash_errors': import => ['generic-service'], apply => true, check_command => 'check_logstash_errors.sh', command_endpoint => 'host.name', assign => ['check_logstash_errors.sh in host.vars.plugins'], ignore => ['host.vars.noagent'], target => $target_file, } } diff --git a/site-modules/profile/manifests/swh/deploy/scheduler/journal_client.pp b/site-modules/profile/manifests/swh/deploy/scheduler/journal_client.pp index 83f3f0d2..b97a405d 100644 --- a/site-modules/profile/manifests/swh/deploy/scheduler/journal_client.pp +++ b/site-modules/profile/manifests/swh/deploy/scheduler/journal_client.pp @@ -1,56 +1,56 @@ # Deployment of the swh.search.journal_client class profile::swh::deploy::scheduler::journal_client { include ::profile::swh::deploy::base_scheduler include ::profile::swh::deploy::journal $config_file = lookup('swh::deploy::scheduler::journal_client::config_file') $config = lookup('swh::deploy::scheduler::journal_client::config') $user = lookup('swh::deploy::scheduler::journal_client::user') $group = lookup('swh::deploy::scheduler::journal_client::group') $service_name = 'swh-scheduler-journal-client' $unit_name = "${service_name}.service" $sentry_dsn = lookup("swh::deploy::scheduler::sentry_dsn", Optional[String], 'first', undef) $sentry_environment = lookup("swh::deploy::scheduler::sentry_environment", Optional[String], 'first', undef) $sentry_swh_package = lookup("swh::deploy::scheduler::sentry_swh_package", Optional[String], 'first', undef) file {$config_file: ensure => present, owner => 'root', group => $group, mode => '0640', content => inline_template("<%= @config.to_yaml %>\n"), notify => Service[$service_name], } # Template uses variables # - $user # - $group # - $sentry_dsn # - $sentry_environment # - $sentry_package # ::systemd::unit_file {$unit_name: ensure => present, content => template("profile/swh/deploy/journal/${unit_name}.erb"), } ~> service {$service_name: ensure => running, enable => true, } @@::icinga2::object::service {"check_scheduler_journal_client_${::fqdn}": import => ['generic-service'], name => "Check swh scheduler journal client service ${::fqdn}", check_command => "check_systemd", host_name => $::fqdn, command_endpoint => $::fqdn, vars => { - check_systemd_unit => $unit_name, + systemd_units => [$unit_name], }, target => '/etc/icinga2/zones.d/master/exported-checks.conf', tag => 'icinga2::exported', } }