diff --git a/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh b/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh new file mode 100644 index 00000000..f54eb602 --- /dev/null +++ b/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +CODE_CRITICAL=2 +CODE_OK=0 + +STATE_CRITICAL=false + +LOGSTASH_STATS_URL=http://localhost:9600/_node/stats + +ERROR_CODE=0 + +# JPATH_FAILURE_COUNT=".pipelines.main.plugins.outputs[].bulk_requests.failures" +JPATH_ERROR_COUNT=".pipelines.main.plugins.outputs[].bulk_requests.with_errors" +JPATH_NON_RETRYABLE_FAILURE_COUNT=".pipelines.main.plugins.outputs[].documents.non_retryable_failures" + +get_value_from_json() { + json=$1 + jpath=$2 + + if ! jq -r "${jpath}" "${json}"; then + echo "CRITICAL: unable to parse json file" + exit ${CODE_CRITICAL} + fi +} + +TMP_FILE=$(mktemp) + +trap "rm -f ${TMP_FILE}" EXIT + +if ! curl -f -s -o ${TMP_FILE} ${LOGSTASH_STATS_URL}; then + echo "CRITICAL - Unable to retrieve logstash statistics" + exit ${CODE_CRITICAL} +fi + +NON_RETRYABLE_FAILURES="$(get_value_from_json ${TMP_FILE} ${JPATH_NON_RETRYABLE_FAILURE_COUNT})" +ERRORS="$(get_value_from_json ${TMP_FILE} ${JPATH_ERROR_COUNT})" + +if [ "${NON_RETRYABLE_FAILURES}" != "null" ]; then + STATE_CRITICAL=true +fi + +if [ "${ERRORS}" != null ]; then + STATE_CRITICAL=true +fi + +if ${STATE_CRITICAL}; then + echo "CRITICAL - Logstash has detected some errors in outputs errors=${ERRORS} non_retryable_errors=${NON_RETRYABLE_FAILURES}" + ERROR_CODE=${CODE_CRITICAL} +else + echo "OK - No errors detected" + ERROR_CODE=${CODE_OK} +fi + +exit ${ERROR_CODE} diff --git a/site-modules/profile/manifests/icinga2/objects/common_checks.pp b/site-modules/profile/manifests/icinga2/objects/common_checks.pp index 50fffc5f..0d963bb3 100644 --- a/site-modules/profile/manifests/icinga2/objects/common_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/common_checks.pp @@ -1,111 +1,122 @@ # Icinga2 common check definitions class profile::icinga2::objects::common_checks { $service_configuration = lookup('icinga2::service_configuration') # Done locally on the master ::icinga2::object::service {'ping4': import => ['generic-service'], apply => true, check_command => 'ping4', assign => ['host.address'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'linux-ssh': import => ['generic-service'], apply => true, check_command => 'ssh', assign => ['host.vars.os == Linux'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } # Done remotely on the client: command_endpoint = host.name. each($service_configuration['load']) |$name, $vars| { if $name == 'default' { $assign = 'host.vars.os == Linux' $ignore = 'host.vars.noagent || host.vars.load' } else { $assign = "host.vars.os == Linux && host.vars.load == ${name}" $ignore = 'host.vars.noagent' } ::icinga2::object::service {"linux_load_${name}": import => ['generic-service'], service_name => 'load', apply => true, check_command => 'load', command_endpoint => 'host.name', assign => [$assign], ignore => [$ignore], target => '/etc/icinga2/zones.d/global-templates/services.conf', vars => $vars, } } ::icinga2::object::service {'linux_disks': import => ['generic-service'], apply => 'disk_name => config in host.vars.disks', check_command => 'disk', command_endpoint => 'host.name', vars => 'vars + config', assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'apt': import => ['generic-service'], apply => true, check_command => 'apt', command_endpoint => 'host.name', check_interval => '3h', vars => { apt_timeout => '120', apt_only_critical => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'ntp': import => ['generic-service'], apply => true, check_command => 'ntp_peer', command_endpoint => 'host.name', vars => { ntp_address => 'localhost', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'journalbeat': import => ['generic-service'], apply => true, check_command => 'check_journal', command_endpoint => 'host.name', assign => ['host.vars.os == Linux'], ignore => ['-:"check_journal" !in host.vars.plugins', 'host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'puppet_agent': import => ['generic-service'], apply => true, check_command => 'file_age', command_endpoint => 'host.name', vars => { file_age_file => '/var/lib/puppet/state/agent_disabled.lock', file_age_warning_time => '14400', # in seconds, warning after 4h file_age_critical_time => '86400', # in seconds, critical after 24h file_age_ignoremissing => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } + + ::icinga2::object::service {'logstash_errors': + import => ['generic-service'], + apply => true, + check_command => 'check_logstash_errors.sh', + command_endpoint => 'host.name', + assign => ['"check_logstash_errors.sh" in host.vars.plugins'], + ignore => ['host.vars.noagent'], + target => '/etc/icinga2/zones.d/global-templates/services.conf', + } + } diff --git a/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp b/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp new file mode 100644 index 00000000..4ef5453b --- /dev/null +++ b/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp @@ -0,0 +1,27 @@ +# Check the status of logstash service +# this is an agent check +class profile::icinga2::objects::logstash_checks { + $swh_plugin_dir = '/usr/lib/nagios/plugins/swh' + $check_command = 'check_logstash_errors.sh' + $check_command_path = "${swh_plugin_dir}/${check_command}" + + $swh_plugin_configfile = '/etc/icinga2/conf.d/swh-plugins.conf' + + file {$check_command_path: + ensure => present, + owner => 'root', + group => 'root', + mode => '0755', + source => "puppet:///modules/profile/icinga2/plugins/${check_command}", + require => File[$swh_plugin_dir] + } + + + ::icinga2::object::checkcommand {$check_command: + import => ['plugin-check-command'], + command => $check_command_path, + target => $swh_plugin_configfile, + require => File[$check_command_path] + } + +} diff --git a/site-modules/profile/manifests/icinga2/objects/static_checks.pp b/site-modules/profile/manifests/icinga2/objects/static_checks.pp index 77664dd9..bb992a4b 100644 --- a/site-modules/profile/manifests/icinga2/objects/static_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/static_checks.pp @@ -1,101 +1,107 @@ # Static checks on the icinga master class profile::icinga2::objects::static_checks { $checks_file = '/etc/icinga2/conf.d/static-checks.conf' ::icinga2::object::host {'www.softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'www.softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::service {'Software Heritage Homepage': import => ['generic-service'], host_name => 'www.softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'www.softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'Software Heritage', }, } ::icinga2::object::service {'Software Heritage Homepage (redirect to www)': import => ['generic-service'], host_name => 'softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, }, } ::icinga2::object::host {'swh-logging-prod': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'swh-logging-prod cluster': host_name => 'swh-logging-prod', check_command => 'check_escluster', target => $checks_file, } ::icinga2::object::checkcommand {'check_escluster': import => ['plugin-check-command'], command => '/usr/lib/nagios/plugins/icinga_check_elasticsearch.sh', target => $checks_file, } + ::icinga2::object::checkcommand {'check_logstash_errors.sh': + import => ['plugin-check-command'], + command => '/usr/lib/nagios/plugins/icinga_check_logstash.sh', + target => $checks_file, + } + ::icinga2::object::host {'DNS resolvers': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'SOA': host_name => 'DNS resolvers', check_command => 'check_resolvers', target => $checks_file, } ::icinga2::object::checkcommand {'check_resolvers': import => ['plugin-check-command'], command => [ '/usr/lib/nagios/plugins/dsa-nagios-checks_checks_dsa-check-soas.txt', 'internal.softwareheritage.org', ], target => $checks_file, } } diff --git a/site-modules/profile/manifests/logstash.pp b/site-modules/profile/manifests/logstash.pp index 0896f712..e9f25e62 100644 --- a/site-modules/profile/manifests/logstash.pp +++ b/site-modules/profile/manifests/logstash.pp @@ -1,53 +1,55 @@ # Install and configure logstash class profile::logstash { include ::java include ::profile::elastic::apt_config $version = sprintf('1:%s-1', lookup('elastic::elk_version')) $elasticsearch_hosts = lookup('logstash::elasticsearch::hosts') $listen_address = ip_for_network(lookup('kibana::listen_network')) package { 'logstash': ensure => $version, require => Class['java'], } apt::pin { 'logstash': packages => 'logstash', version => $version, priority => 1001, } file { '/etc/logstash/conf.d/input.conf': ensure => 'file', content => template('profile/logstash/input.conf.erb'), require => Package['logstash'], notify => Service['logstash'], } file { '/etc/logstash/conf.d/output.conf': ensure => 'file', content => template('profile/logstash/output.conf.erb'), require => Package['logstash'], notify => Service['logstash'], } file { '/etc/logstash/conf.d/filter.conf': ensure => 'file', content => template('profile/logstash/filter.conf.erb'), require => Package['logstash'], notify => Service['logstash'], } service { 'logstash': ensure => running, enable => true, require => [Package['logstash'], File['/etc/logstash/conf.d/input.conf'], File['/etc/logstash/conf.d/output.conf'], File['/etc/logstash/conf.d/filter.conf'] ], } + include profile::icinga2::objects::logstash_checks + }