diff --git a/data/common/common.yaml b/data/common/common.yaml --- a/data/common/common.yaml +++ b/data/common/common.yaml @@ -3251,7 +3251,7 @@ port: "%{hiera('swh::deploy::db::pgbouncer::port')}" user: guest -elastic::elk_version: '7.8.0' +elastic::elk_version: '7.15.1' elasticsearch::hosts: - http://esnode1.internal.softwareheritage.org:9200 diff --git a/data/deployments/staging/common.yaml b/data/deployments/staging/common.yaml --- a/data/deployments/staging/common.yaml +++ b/data/deployments/staging/common.yaml @@ -289,8 +289,6 @@ # Elasticsearch -elastic::elk_version: '7.9.3' - elasticsearch::config::cluster::name: swh-search elasticsearch::config::discovery::seed_hosts: diff --git a/data/hostname/esnode1.internal.softwareheritage.org.yaml b/data/hostname/esnode1.internal.softwareheritage.org.yaml --- a/data/hostname/esnode1.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode1.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.1' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode2.internal.softwareheritage.org.yaml b/data/hostname/esnode2.internal.softwareheritage.org.yaml --- a/data/hostname/esnode2.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode2.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.1' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode3.internal.softwareheritage.org.yaml b/data/hostname/esnode3.internal.softwareheritage.org.yaml --- a/data/hostname/esnode3.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode3.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.1' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/kibana0.internal.softwareheritage.org.yaml b/data/hostname/kibana0.internal.softwareheritage.org.yaml deleted file mode 100644 --- a/data/hostname/kibana0.internal.softwareheritage.org.yaml +++ /dev/null @@ -1 +0,0 @@ -elastic::elk_version: '7.15.1' diff --git a/data/hostname/search-esnode0.internal.staging.swh.network.yaml b/data/hostname/search-esnode0.internal.staging.swh.network.yaml --- a/data/hostname/search-esnode0.internal.staging.swh.network.yaml +++ b/data/hostname/search-esnode0.internal.staging.swh.network.yaml @@ -4,6 +4,9 @@ netmask: 255.255.255.0 gateway: 192.168.130.1 +elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.1' + swh::apt_config::enable_non_free: true swh::apt_config::backported_packages: buster: diff --git a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.1' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.1' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.1' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/site-modules/profile/files/icinga2/plugins/check_journal b/site-modules/profile/files/icinga2/plugins/check_journal --- a/site-modules/profile/files/icinga2/plugins/check_journal +++ b/site-modules/profile/files/icinga2/plugins/check_journal @@ -25,6 +25,7 @@ import argparse import logging +import yaml import nagiosplugin from nagiosplugin import ScalarContext @@ -61,8 +62,10 @@ class JournalLag(nagiosplugin.Resource): """Check journal lag""" - def __init__(self, cursorfile): + def __init__(self, cursorfile, registryfile, registryentry): self.cursorfile = cursorfile + self.registryfile = registryfile + self.registryentry = registryentry def parse_cursor(self, cursor): """Parse a journald cursor entry""" @@ -79,17 +82,42 @@ return ret - def get_file_journal_cursor(self): + def get_legacy_journal_cursor(self): _log.info("querying the journal cursor cache file %s" % self.cursorfile) try: with open(self.cursorfile, 'r') as f: ret = f.read().strip() except OSError as e: - raise nagiosplugin.CheckError("failed to read journal cursor file: %s" % e) + _log.warning(f"failed to read the legacy journal cursor file {self.cursorfile}") else: _log.debug("current journal cursor: %s" % ret) return ret + def get_journal_cursor_from_registry(self): + _log.info(f"querying the journal cursor {self.registryentry} from registry file {self.registryfile}") + try: + with open(self.registryfile, 'r') as f: + registrycontent = yaml.safe_load(f) + except OSError as e: + _log.warning(f"failed to read the journal registry file {self.registryfile}") + else: + entry = [e for e in registrycontent['journal_entries'] if e['path'] == self.registryentry] + cursor = entry[0]["cursor"] + _log.debug("current journal cursor: %s" % cursor) + return cursor + + def get_file_journal_cursor(self): + cursor = self.get_journal_cursor_from_registry() + + if not cursor: + _log.debug("Unable to read cursor position from the registry, fallback on the legacy file") + cursor = self.get_legacy_journal_cursor() + + if not cursor: + raise nagiosplugin.CheckError("failed to get cursor from registry or legacy file") + + return cursor + def get_system_journal_cursor(self): _log.info("querying the system journal for the current cursor") reader = systemd.journal.Reader() @@ -136,6 +164,10 @@ help='increase output verbosity (use up to 3 times)') argp.add_argument('-f', '--file', metavar='FILE', default='/var/lib/journalbeat/cursor-state', help='read journald cursor state from this file') + argp.add_argument('-r', '--registry', metavar='FILE', default='/var/lib/journalbeat/registry', + help='read journald cursor state from this journalbeat registry file') + argp.add_argument('-e', '--entry', default='LOCAL_SYSTEM_JOURNAL', + help='Check this registry entry') argp.add_argument('-w', '--warning', metavar='RANGE', default='1200', help='return warning if temporal lag is outside RANGE') argp.add_argument('-c', '--critical', metavar='RANGE', default='3600', @@ -148,7 +180,7 @@ args = argp.parse_args() check = nagiosplugin.Check( - JournalLag(args.file), + JournalLag(args.file, args.registry, args.entry), BooleanContext('sameboot'), ScalarContext('lag_time', args.warning, args.critical), ScalarContext('lag_entries', args.warning_entries, args.critical_entries), diff --git a/site-modules/profile/manifests/filebeat.pp b/site-modules/profile/manifests/filebeat.pp --- a/site-modules/profile/manifests/filebeat.pp +++ b/site-modules/profile/manifests/filebeat.pp @@ -7,7 +7,8 @@ include ::profile::elastic::apt_config - $version = lookup('elastic::elk_version') + $default_elk_version = lookup('elastic::elk_version') + $version = lookup('elastic::beat_version', { default_value => $default_elk_version }) package { 'filebeat': ensure => $version, diff --git a/site-modules/profile/manifests/systemd_journal/journalbeat.pp b/site-modules/profile/manifests/systemd_journal/journalbeat.pp --- a/site-modules/profile/manifests/systemd_journal/journalbeat.pp +++ b/site-modules/profile/manifests/systemd_journal/journalbeat.pp @@ -1,51 +1,61 @@ # Journalbeat: a systemd journal collection beater for the ELK stack class profile::systemd_journal::journalbeat { $package = 'journalbeat' - $user = 'journalbeat' - $group = 'nogroup' - $homedir = '/var/lib/journalbeat' $configdir = '/etc/journalbeat' $configfile = "${configdir}/journalbeat.yml" $service = 'journalbeat' + $default_elk_version = lookup('elastic::elk_version') + $version = lookup('elastic::beat_version', { default_value => $default_elk_version }) $logstash_hosts = lookup('systemd_journal::logstash_hosts') - package {$package: - ensure => present + include ::profile::elastic::apt_config + + # cleanup + ::apt::pin {'swh-journalbeat': + ensure => absent, + } + -> ::apt::pin {'journalbeat': + explanation => 'Use the elk stack version', + packages => ['journalbeat'], + version => $version, + priority => 1001, + } + -> package {$package: + ensure => $version, } - user {$user: - ensure => present, - gid => $group, - groups => 'systemd-journal', - home => $homedir, - managehome => true, - system => true, + # To remove after complete migration to 7.15 + -> user {'journalbeat': # journalbeat needs to be stopped before trying to remove the user + ensure => absent, + managehome => false, } - # Uses variables - # - $user - # - $homedir - # - $configfile - # - ::systemd::unit_file {"${service}.service": + # cleanup pre 7.15 version + file {"/etc/systemd/system/${service}.service": + ensure => absent, + } + file {'/var/lib/journalbeat/cursor_state': + ensure => absent, + } + ::systemd::dropin_file { "${service}.conf": ensure => present, - content => template('profile/systemd_journal/journalbeat/journalbeat.service.erb'), + unit => "${service}.service", + content => template('profile/systemd_journal/journalbeat/journalbeat.conf.erb'), } ~> service {$service: - ensure => running, - enable => true, - require => [ + ensure => running, + enable => true, + require => [ Package[$package], File[$configfile], + ::Systemd::Dropin_file["${service}.conf"], + ], + subscribe => [ + Package[$package], + File[$configfile], + ::Systemd::Dropin_file["${service}.conf"], ], - } - - file {$configdir: - ensure => directory, - owner => 'root', - group => 'root', - mode => '0644', } # Uses variables @@ -57,16 +67,8 @@ group => 'root', mode => '0644', content => template('profile/systemd_journal/journalbeat/journalbeat.yml.erb'), - notify => [ - Service[$service], - ], - } - - ::apt::pin {'swh-journalbeat': - explanation => 'Use journalbeat packages from Software Heritage', - packages => ['journalbeat'], - originator => 'softwareheritage', - priority => 990, + require => [Package[$package]], + notify => [Service[$service]], } profile::cron::d {'logrotate-journal': diff --git a/site-modules/profile/templates/logstash/filter.conf.erb b/site-modules/profile/templates/logstash/filter.conf.erb --- a/site-modules/profile/templates/logstash/filter.conf.erb +++ b/site-modules/profile/templates/logstash/filter.conf.erb @@ -17,15 +17,31 @@ } } } else if "swh-worker@" in [systemd_unit] { + # Temporary rule to delete after complete migration to 7.15.1 mutate { add_field => { "[@metadata][target_index]" => "swh_workers-%{+YYYY.MM.dd}" } } - } else { + } else if "swh-worker@" in [systemd][unit] { mutate { add_field => { - "[@metadata][target_index]" => "systemlogs-%{+YYYY.MM.dd}" + "[@metadata][target_index]" => "swh_workers-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } + } else { + if [@metadata][version] { + mutate { + add_field => { + "[@metadata][target_index]" => "systemlogs-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } + } else { + # Temporary rule to delete after complete migration to 7.15.1 + mutate { + add_field => { + "[@metadata][target_index]" => "systemlogs-%{+YYYY.MM.dd}" + } } } } diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb new file mode 100644 --- /dev/null +++ b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb @@ -0,0 +1,6 @@ +# Managed by puppet (class profile::systemd_journal::journalbeat), changes will be lost + +[Service] +ReadOnlyDirectories=/ +ReadWriteDirectories=-/var/lib/journalbeat +WorkingDirectory=/var/lib/journalbeat diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb deleted file mode 100644 --- a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb +++ /dev/null @@ -1,19 +0,0 @@ -# Managed by puppet (class profile::systemd_journal::journalbeat), changes will be lost - -[Unit] -Description=Send systemd journal messages to logstash -After=nss-lookup.target - -[Service] -Type=simple -Restart=always -RestartSec=20s -ExecStart=/usr/bin/journalbeat -e -c <%= @configfile %> -User=<%= @user %> -Group=systemd-journal -ReadOnlyDirectories=/ -ReadWriteDirectories=-<%= @homedir %> -WorkingDirectory=<%= @homedir %> - -[Install] -WantedBy=multi-user.target diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb --- a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb +++ b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb @@ -1,6 +1,12 @@ #======================== Journalbeat Configuration ============================ journalbeat: + inputs: + # Paths that should be crawled and fetched. Possible values files and directories. + # When setting a directory, all journals under it are merged. + # When empty starts to read from local journal. + - paths: [] + # What position in journald to seek to at start up # options: cursor, tail, head (defaults to tail) seek_position: cursor @@ -10,12 +16,6 @@ # options: tail, head, none (defaults to tail) cursor_seek_fallback: head - # Store the cursor of the successfully published events - write_cursor_state: true - - # Path to the file to store the cursor (defaults to ".journalbeat-cursor-state") - cursor_state_file: cursor-state - # How frequently should we save the cursor to disk (defaults to 5s) #cursor_flush_period: 5s @@ -241,7 +241,7 @@ # distribution (for example, the sample dashboards). # If not set by a CLI flag or in the configuration file, the default for the # home path is the location of the binary. -path.home: <%= @homedir %> +# path.home: /var/lib/journalbeat # The configuration path for the beatname installation. This is the default # base path for configuration files, including the main YAML configuration file