diff --git a/data/hostname/logstash0.internal.softwareheritage.org.yaml b/data/hostname/logstash0.internal.softwareheritage.org.yaml new file mode 100644 index 00000000..5a4f6f16 --- /dev/null +++ b/data/hostname/logstash0.internal.softwareheritage.org.yaml @@ -0,0 +1,2 @@ +icinga2::host::local_plugins: + - check_logstash_errors.sh diff --git a/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh b/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh index f54eb602..0cb55b7f 100644 --- a/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh +++ b/site-modules/profile/files/icinga2/plugins/check_logstash_errors.sh @@ -1,54 +1,80 @@ #!/bin/bash +# +# File managed by puppet. All modifications will be lost. +# +# Check if logstash encountered errors when sending messages +# to its output. +# +# Copyright (c) 2017 The Software Heritage Developers +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + CODE_CRITICAL=2 CODE_OK=0 STATE_CRITICAL=false LOGSTASH_STATS_URL=http://localhost:9600/_node/stats ERROR_CODE=0 # JPATH_FAILURE_COUNT=".pipelines.main.plugins.outputs[].bulk_requests.failures" JPATH_ERROR_COUNT=".pipelines.main.plugins.outputs[].bulk_requests.with_errors" JPATH_NON_RETRYABLE_FAILURE_COUNT=".pipelines.main.plugins.outputs[].documents.non_retryable_failures" get_value_from_json() { json=$1 jpath=$2 if ! jq -r "${jpath}" "${json}"; then echo "CRITICAL: unable to parse json file" exit ${CODE_CRITICAL} fi } TMP_FILE=$(mktemp) trap "rm -f ${TMP_FILE}" EXIT if ! curl -f -s -o ${TMP_FILE} ${LOGSTASH_STATS_URL}; then echo "CRITICAL - Unable to retrieve logstash statistics" exit ${CODE_CRITICAL} fi NON_RETRYABLE_FAILURES="$(get_value_from_json ${TMP_FILE} ${JPATH_NON_RETRYABLE_FAILURE_COUNT})" ERRORS="$(get_value_from_json ${TMP_FILE} ${JPATH_ERROR_COUNT})" if [ "${NON_RETRYABLE_FAILURES}" != "null" ]; then STATE_CRITICAL=true fi if [ "${ERRORS}" != null ]; then STATE_CRITICAL=true fi if ${STATE_CRITICAL}; then echo "CRITICAL - Logstash has detected some errors in outputs errors=${ERRORS} non_retryable_errors=${NON_RETRYABLE_FAILURES}" ERROR_CODE=${CODE_CRITICAL} else echo "OK - No errors detected" ERROR_CODE=${CODE_OK} fi exit ${ERROR_CODE} diff --git a/site-modules/profile/manifests/icinga2/objects/common_checks.pp b/site-modules/profile/manifests/icinga2/objects/common_checks.pp index 0d963bb3..e8e8c3e2 100644 --- a/site-modules/profile/manifests/icinga2/objects/common_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/common_checks.pp @@ -1,122 +1,122 @@ # Icinga2 common check definitions class profile::icinga2::objects::common_checks { $service_configuration = lookup('icinga2::service_configuration') # Done locally on the master ::icinga2::object::service {'ping4': import => ['generic-service'], apply => true, check_command => 'ping4', assign => ['host.address'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'linux-ssh': import => ['generic-service'], apply => true, check_command => 'ssh', assign => ['host.vars.os == Linux'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } # Done remotely on the client: command_endpoint = host.name. each($service_configuration['load']) |$name, $vars| { if $name == 'default' { $assign = 'host.vars.os == Linux' $ignore = 'host.vars.noagent || host.vars.load' } else { $assign = "host.vars.os == Linux && host.vars.load == ${name}" $ignore = 'host.vars.noagent' } ::icinga2::object::service {"linux_load_${name}": import => ['generic-service'], service_name => 'load', apply => true, check_command => 'load', command_endpoint => 'host.name', assign => [$assign], ignore => [$ignore], target => '/etc/icinga2/zones.d/global-templates/services.conf', vars => $vars, } } ::icinga2::object::service {'linux_disks': import => ['generic-service'], apply => 'disk_name => config in host.vars.disks', check_command => 'disk', command_endpoint => 'host.name', vars => 'vars + config', assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'apt': import => ['generic-service'], apply => true, check_command => 'apt', command_endpoint => 'host.name', check_interval => '3h', vars => { apt_timeout => '120', apt_only_critical => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'ntp': import => ['generic-service'], apply => true, check_command => 'ntp_peer', command_endpoint => 'host.name', vars => { ntp_address => 'localhost', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'journalbeat': import => ['generic-service'], apply => true, check_command => 'check_journal', command_endpoint => 'host.name', assign => ['host.vars.os == Linux'], ignore => ['-:"check_journal" !in host.vars.plugins', 'host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'puppet_agent': import => ['generic-service'], apply => true, check_command => 'file_age', command_endpoint => 'host.name', vars => { file_age_file => '/var/lib/puppet/state/agent_disabled.lock', file_age_warning_time => '14400', # in seconds, warning after 4h file_age_critical_time => '86400', # in seconds, critical after 24h file_age_ignoremissing => 'true', }, assign => ['host.vars.os == Linux'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } ::icinga2::object::service {'logstash_errors': import => ['generic-service'], apply => true, check_command => 'check_logstash_errors.sh', command_endpoint => 'host.name', - assign => ['"check_logstash_errors.sh" in host.vars.plugins'], + assign => ['check_logstash_errors.sh in host.vars.plugins'], ignore => ['host.vars.noagent'], target => '/etc/icinga2/zones.d/global-templates/services.conf', } } diff --git a/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp b/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp index 4ef5453b..092ba228 100644 --- a/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/logstash_checks.pp @@ -1,27 +1,26 @@ # Check the status of logstash service # this is an agent check class profile::icinga2::objects::logstash_checks { $swh_plugin_dir = '/usr/lib/nagios/plugins/swh' $check_command = 'check_logstash_errors.sh' $check_command_path = "${swh_plugin_dir}/${check_command}" $swh_plugin_configfile = '/etc/icinga2/conf.d/swh-plugins.conf' file {$check_command_path: ensure => present, owner => 'root', group => 'root', mode => '0755', source => "puppet:///modules/profile/icinga2/plugins/${check_command}", require => File[$swh_plugin_dir] } - ::icinga2::object::checkcommand {$check_command: import => ['plugin-check-command'], command => $check_command_path, target => $swh_plugin_configfile, require => File[$check_command_path] } } diff --git a/site-modules/profile/manifests/icinga2/objects/static_checks.pp b/site-modules/profile/manifests/icinga2/objects/static_checks.pp index bb992a4b..77664dd9 100644 --- a/site-modules/profile/manifests/icinga2/objects/static_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/static_checks.pp @@ -1,107 +1,101 @@ # Static checks on the icinga master class profile::icinga2::objects::static_checks { $checks_file = '/etc/icinga2/conf.d/static-checks.conf' ::icinga2::object::host {'www.softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'www.softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::service {'Software Heritage Homepage': import => ['generic-service'], host_name => 'www.softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'www.softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'Software Heritage', }, } ::icinga2::object::service {'Software Heritage Homepage (redirect to www)': import => ['generic-service'], host_name => 'softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, }, } ::icinga2::object::host {'swh-logging-prod': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'swh-logging-prod cluster': host_name => 'swh-logging-prod', check_command => 'check_escluster', target => $checks_file, } ::icinga2::object::checkcommand {'check_escluster': import => ['plugin-check-command'], command => '/usr/lib/nagios/plugins/icinga_check_elasticsearch.sh', target => $checks_file, } - ::icinga2::object::checkcommand {'check_logstash_errors.sh': - import => ['plugin-check-command'], - command => '/usr/lib/nagios/plugins/icinga_check_logstash.sh', - target => $checks_file, - } - ::icinga2::object::host {'DNS resolvers': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'SOA': host_name => 'DNS resolvers', check_command => 'check_resolvers', target => $checks_file, } ::icinga2::object::checkcommand {'check_resolvers': import => ['plugin-check-command'], command => [ '/usr/lib/nagios/plugins/dsa-nagios-checks_checks_dsa-check-soas.txt', 'internal.softwareheritage.org', ], target => $checks_file, } }