diff --git a/site-modules/profile/manifests/icinga2/objects/static_checks.pp b/site-modules/profile/manifests/icinga2/objects/static_checks.pp index 04d22a69..b455f430 100644 --- a/site-modules/profile/manifests/icinga2/objects/static_checks.pp +++ b/site-modules/profile/manifests/icinga2/objects/static_checks.pp @@ -1,142 +1,142 @@ # Static checks on the icinga master class profile::icinga2::objects::static_checks { $checks_file = '/etc/icinga2/conf.d/static-checks.conf' ::icinga2::object::host {'www.softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'www.softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'softwareheritage.org': import => ['generic-host'], check_command => 'dummy', address => 'softwareheritage.org', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::host {'graphql.staging.swh.network': import => ['generic-host'], check_command => 'dummy', address => 'graphql.staging.swh.network', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "HTTP-only host", }, } ::icinga2::object::service {'Software Heritage Homepage': import => ['generic-service'], host_name => 'www.softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'www.softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'Software Heritage', }, } ::icinga2::object::service {'Software Heritage Homepage (redirect to www)': import => ['generic-service'], host_name => 'softwareheritage.org', check_command => 'http', target => $checks_file, vars => { http_vhost => 'softwareheritage.org', http_uri => '/', http_ssl => true, http_sni => true, }, } ::icinga2::object::host {'swh-logging-prod': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'swh-logging-prod cluster': host_name => 'swh-logging-prod', check_command => 'check_escluster', target => $checks_file, } ::icinga2::object::checkcommand {'check_escluster': import => ['plugin-check-command'], command => '/usr/lib/nagios/plugins/icinga_check_elasticsearch.sh', target => $checks_file, } ::icinga2::object::host {'DNS resolvers': check_command => 'dummy', address => '127.0.0.1', target => $checks_file, vars => { dummy_state => 0, # up dummy_text => "virtual host for clustered checks", }, } ::icinga2::object::service {'SOA': host_name => 'DNS resolvers', check_command => 'check_resolvers', target => $checks_file, } ::icinga2::object::checkcommand {'check_resolvers': import => ['plugin-check-command'], command => [ '/usr/lib/nagios/plugins/dsa-nagios-checks_checks_dsa-check-soas.txt', 'internal.softwareheritage.org', ], target => $checks_file, } $prometheus_host = lookup('prometheus::server::fqdn') ::icinga2::object::service {'Postgresql replication lag (belvedere -> somerset)': check_command => 'check_prometheus_metric', target => $checks_file, host_name => 'belvedere.internal.softwareheritage.org', vars => { check_prometheus_metric_name => 'pg replication_lag belvedere somerset', - check_prometheus_query => profile::icinga2::literal_var( + check_prometheus_metric_query => profile::icinga2::literal_var( 'sum(sql_pg_stat_replication{instance="belvedere.internal.softwareheritage.org", host=":5433", application_name="softwareheritage_replica"})' ), check_prometheus_metric_warning => '1073741824', # 1GiB 1*1024*1024*1024 check_prometheus_metric_critical => '2147483648', # 2GiB 2*1024*1024*1024 }, } ::icinga2::object::service {'Software Heritage Staging Graphql Instance': import => ['generic-service'], host_name => 'graphql.staging.swh.network', check_command => 'http', target => $checks_file, vars => { http_vhost => 'graphql.staging.swh.network', http_uri => '/', http_ssl => true, http_sni => true, http_string => 'GraphQL Playground', }, } } diff --git a/site-modules/profile/manifests/thanos/prometheus_sidecar.pp b/site-modules/profile/manifests/thanos/prometheus_sidecar.pp index e759cda4..68251a8c 100644 --- a/site-modules/profile/manifests/thanos/prometheus_sidecar.pp +++ b/site-modules/profile/manifests/thanos/prometheus_sidecar.pp @@ -1,109 +1,109 @@ # Thanos prometheus sidecar class profile::thanos::prometheus_sidecar { include profile::thanos::base include profile::thanos::tls_certificate $service_name = 'thanos-sidecar' $unit_name = "${service_name}.service" $objstore_config = lookup('thanos::objstore::config') $objstore_config_file = "${::profile::thanos::base::config_dir}/objstore.yml" $port_http = lookup('thanos::sidecar::port_http') $port_grpc = lookup('thanos::sidecar::port_grpc') $internal_ip = ip_for_network(lookup('internal_network')) $grpc_address = "${internal_ip}:${port_grpc}" $grpc_target = "${swh_hostname['internal_fqdn']}:${port_grpc}" $cert_paths = $::profile::thanos::tls_certificate::cert_paths $sidecar_arguments = { tsdb => { path => '/var/lib/prometheus/metrics2', }, prometheus => { # use the listen address for the prometheus server url => "http://${::profile::prometheus::server::target}/", }, objstore => { 'config-file' => $objstore_config_file, }, shipper => { 'upload-compacted' => true, }, 'grpc-server-tls-cert' => $cert_paths['fullchain'], 'grpc-server-tls-key' => $cert_paths['privkey'], 'http-address' => "${internal_ip}:${port_http}", 'grpc-address' => $grpc_address, } file {$objstore_config_file: ensure => present, owner => 'root', group => 'prometheus', mode => '0640', content => inline_yaml($objstore_config), require => File[$::profile::thanos::base::config_dir], } # Template uses: # $sidecar_arguments systemd::unit_file {$unit_name: ensure => present, content => template('profile/thanos/thanos-sidecar.service.erb'), require => Class['profile::thanos::base'], notify => Service[$service_name] } service {$service_name: ensure => 'running', enable => true, require => [ Service['prometheus'], File[$cert_paths['fullchain']], File[$cert_paths['privkey']], ], tag => 'thanos', } # Ensure prometheus is configured properly before starting the sidecar Exec['restart-prometheus'] -> Service[$service_name] # Ensure service is restarted when the certs are renewed File[$cert_paths['fullchain']] ~> Service[$service_name] File[$cert_paths['privkey']] ~> Service[$service_name] ::profile::thanos::export_query_endpoint {"thanos-sidecar-${::fqdn}": grpc_address => $grpc_target } $http_target = "${swh_hostname['internal_fqdn']}:${port_http}" ::profile::prometheus::export_scrape_config {"thanos-sidecar-${::fqdn}": target => $http_target, job => 'thanos_sidecar', } $icinga_checks_file = lookup('icinga2::exported_checks::filename') @@::icinga2::object::service {"thanos sidecar on ${::fqdn}": service_name => 'thanos sidecar', import => ['generic-service'], host_name => $::fqdn, check_command => 'check_prometheus_metric', vars => { - 'check_prometheus_query' => profile::icinga2::literal_var( + 'check_prometheus_metric_query' => profile::icinga2::literal_var( join([ 'time() - thanos_objstore_bucket_last_successful_upload_time{job="thanos_sidecar", instance="', $swh_hostname['internal_fqdn'], '"}', ]) ), 'check_prometheus_metric_name' => 'thanos_sidecar_upload_lag', # We expect an upload every 2 hours 'check_prometheus_metric_warning' => 3 * 3600, 'check_prometheus_metric_critical' => 24 * 3600, }, target => $icinga_checks_file, tag => 'icinga2::exported', } }