diff --git a/site-modules/profile/manifests/icinga2/objects/static_checks.pp b/site-modules/profile/manifests/icinga2/objects/static_checks.pp
index 04d22a69..b455f430 100644
--- a/site-modules/profile/manifests/icinga2/objects/static_checks.pp
+++ b/site-modules/profile/manifests/icinga2/objects/static_checks.pp
@@ -1,142 +1,142 @@
# Static checks on the icinga master
class profile::icinga2::objects::static_checks {
$checks_file = '/etc/icinga2/conf.d/static-checks.conf'
::icinga2::object::host {'www.softwareheritage.org':
import => ['generic-host'],
check_command => 'dummy',
address => 'www.softwareheritage.org',
target => $checks_file,
vars => {
dummy_state => 0, # up
dummy_text => "HTTP-only host",
},
}
::icinga2::object::host {'softwareheritage.org':
import => ['generic-host'],
check_command => 'dummy',
address => 'softwareheritage.org',
target => $checks_file,
vars => {
dummy_state => 0, # up
dummy_text => "HTTP-only host",
},
}
::icinga2::object::host {'graphql.staging.swh.network':
import => ['generic-host'],
check_command => 'dummy',
address => 'graphql.staging.swh.network',
target => $checks_file,
vars => {
dummy_state => 0, # up
dummy_text => "HTTP-only host",
},
}
::icinga2::object::service {'Software Heritage Homepage':
import => ['generic-service'],
host_name => 'www.softwareheritage.org',
check_command => 'http',
target => $checks_file,
vars => {
http_vhost => 'www.softwareheritage.org',
http_uri => '/',
http_ssl => true,
http_sni => true,
http_string => '
Software Heritage',
},
}
::icinga2::object::service {'Software Heritage Homepage (redirect to www)':
import => ['generic-service'],
host_name => 'softwareheritage.org',
check_command => 'http',
target => $checks_file,
vars => {
http_vhost => 'softwareheritage.org',
http_uri => '/',
http_ssl => true,
http_sni => true,
},
}
::icinga2::object::host {'swh-logging-prod':
check_command => 'dummy',
address => '127.0.0.1',
target => $checks_file,
vars => {
dummy_state => 0, # up
dummy_text => "virtual host for clustered checks",
},
}
::icinga2::object::service {'swh-logging-prod cluster':
host_name => 'swh-logging-prod',
check_command => 'check_escluster',
target => $checks_file,
}
::icinga2::object::checkcommand {'check_escluster':
import => ['plugin-check-command'],
command => '/usr/lib/nagios/plugins/icinga_check_elasticsearch.sh',
target => $checks_file,
}
::icinga2::object::host {'DNS resolvers':
check_command => 'dummy',
address => '127.0.0.1',
target => $checks_file,
vars => {
dummy_state => 0, # up
dummy_text => "virtual host for clustered checks",
},
}
::icinga2::object::service {'SOA':
host_name => 'DNS resolvers',
check_command => 'check_resolvers',
target => $checks_file,
}
::icinga2::object::checkcommand {'check_resolvers':
import => ['plugin-check-command'],
command => [
'/usr/lib/nagios/plugins/dsa-nagios-checks_checks_dsa-check-soas.txt',
'internal.softwareheritage.org',
],
target => $checks_file,
}
$prometheus_host = lookup('prometheus::server::fqdn')
::icinga2::object::service {'Postgresql replication lag (belvedere -> somerset)':
check_command => 'check_prometheus_metric',
target => $checks_file,
host_name => 'belvedere.internal.softwareheritage.org',
vars => {
check_prometheus_metric_name => 'pg replication_lag belvedere somerset',
- check_prometheus_query => profile::icinga2::literal_var(
+ check_prometheus_metric_query => profile::icinga2::literal_var(
'sum(sql_pg_stat_replication{instance="belvedere.internal.softwareheritage.org", host=":5433", application_name="softwareheritage_replica"})'
),
check_prometheus_metric_warning => '1073741824', # 1GiB 1*1024*1024*1024
check_prometheus_metric_critical => '2147483648', # 2GiB 2*1024*1024*1024
},
}
::icinga2::object::service {'Software Heritage Staging Graphql Instance':
import => ['generic-service'],
host_name => 'graphql.staging.swh.network',
check_command => 'http',
target => $checks_file,
vars => {
http_vhost => 'graphql.staging.swh.network',
http_uri => '/',
http_ssl => true,
http_sni => true,
http_string => 'GraphQL Playground',
},
}
}
diff --git a/site-modules/profile/manifests/thanos/prometheus_sidecar.pp b/site-modules/profile/manifests/thanos/prometheus_sidecar.pp
index e759cda4..68251a8c 100644
--- a/site-modules/profile/manifests/thanos/prometheus_sidecar.pp
+++ b/site-modules/profile/manifests/thanos/prometheus_sidecar.pp
@@ -1,109 +1,109 @@
# Thanos prometheus sidecar
class profile::thanos::prometheus_sidecar {
include profile::thanos::base
include profile::thanos::tls_certificate
$service_name = 'thanos-sidecar'
$unit_name = "${service_name}.service"
$objstore_config = lookup('thanos::objstore::config')
$objstore_config_file = "${::profile::thanos::base::config_dir}/objstore.yml"
$port_http = lookup('thanos::sidecar::port_http')
$port_grpc = lookup('thanos::sidecar::port_grpc')
$internal_ip = ip_for_network(lookup('internal_network'))
$grpc_address = "${internal_ip}:${port_grpc}"
$grpc_target = "${swh_hostname['internal_fqdn']}:${port_grpc}"
$cert_paths = $::profile::thanos::tls_certificate::cert_paths
$sidecar_arguments = {
tsdb => {
path => '/var/lib/prometheus/metrics2',
},
prometheus => {
# use the listen address for the prometheus server
url => "http://${::profile::prometheus::server::target}/",
},
objstore => {
'config-file' => $objstore_config_file,
},
shipper => {
'upload-compacted' => true,
},
'grpc-server-tls-cert' => $cert_paths['fullchain'],
'grpc-server-tls-key' => $cert_paths['privkey'],
'http-address' => "${internal_ip}:${port_http}",
'grpc-address' => $grpc_address,
}
file {$objstore_config_file:
ensure => present,
owner => 'root',
group => 'prometheus',
mode => '0640',
content => inline_yaml($objstore_config),
require => File[$::profile::thanos::base::config_dir],
}
# Template uses:
# $sidecar_arguments
systemd::unit_file {$unit_name:
ensure => present,
content => template('profile/thanos/thanos-sidecar.service.erb'),
require => Class['profile::thanos::base'],
notify => Service[$service_name]
}
service {$service_name:
ensure => 'running',
enable => true,
require => [
Service['prometheus'],
File[$cert_paths['fullchain']],
File[$cert_paths['privkey']],
],
tag => 'thanos',
}
# Ensure prometheus is configured properly before starting the sidecar
Exec['restart-prometheus'] -> Service[$service_name]
# Ensure service is restarted when the certs are renewed
File[$cert_paths['fullchain']] ~> Service[$service_name]
File[$cert_paths['privkey']] ~> Service[$service_name]
::profile::thanos::export_query_endpoint {"thanos-sidecar-${::fqdn}":
grpc_address => $grpc_target
}
$http_target = "${swh_hostname['internal_fqdn']}:${port_http}"
::profile::prometheus::export_scrape_config {"thanos-sidecar-${::fqdn}":
target => $http_target,
job => 'thanos_sidecar',
}
$icinga_checks_file = lookup('icinga2::exported_checks::filename')
@@::icinga2::object::service {"thanos sidecar on ${::fqdn}":
service_name => 'thanos sidecar',
import => ['generic-service'],
host_name => $::fqdn,
check_command => 'check_prometheus_metric',
vars => {
- 'check_prometheus_query' => profile::icinga2::literal_var(
+ 'check_prometheus_metric_query' => profile::icinga2::literal_var(
join([
'time() - thanos_objstore_bucket_last_successful_upload_time{job="thanos_sidecar", instance="',
$swh_hostname['internal_fqdn'],
'"}',
])
),
'check_prometheus_metric_name' => 'thanos_sidecar_upload_lag',
# We expect an upload every 2 hours
'check_prometheus_metric_warning' => 3 * 3600,
'check_prometheus_metric_critical' => 24 * 3600,
},
target => $icinga_checks_file,
tag => 'icinga2::exported',
}
}