diff --git a/data/common/common.yaml b/data/common/common.yaml --- a/data/common/common.yaml +++ b/data/common/common.yaml @@ -3251,7 +3251,7 @@ port: "%{hiera('swh::deploy::db::pgbouncer::port')}" user: guest -elastic::elk_version: '7.8.0' +elastic::elk_version: '7.15.2' elasticsearch::hosts: - http://esnode1.internal.softwareheritage.org:9200 diff --git a/data/deployments/staging/common.yaml b/data/deployments/staging/common.yaml --- a/data/deployments/staging/common.yaml +++ b/data/deployments/staging/common.yaml @@ -289,8 +289,6 @@ # Elasticsearch -elastic::elk_version: '7.9.3' - elasticsearch::config::cluster::name: swh-search elasticsearch::config::discovery::seed_hosts: diff --git a/data/hostname/esnode1.internal.softwareheritage.org.yaml b/data/hostname/esnode1.internal.softwareheritage.org.yaml --- a/data/hostname/esnode1.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode1.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode2.internal.softwareheritage.org.yaml b/data/hostname/esnode2.internal.softwareheritage.org.yaml --- a/data/hostname/esnode2.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode2.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode3.internal.softwareheritage.org.yaml b/data/hostname/esnode3.internal.softwareheritage.org.yaml --- a/data/hostname/esnode3.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode3.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/search-esnode0.internal.staging.swh.network.yaml b/data/hostname/search-esnode0.internal.staging.swh.network.yaml --- a/data/hostname/search-esnode0.internal.staging.swh.network.yaml +++ b/data/hostname/search-esnode0.internal.staging.swh.network.yaml @@ -4,6 +4,9 @@ netmask: 255.255.255.0 gateway: 192.168.130.1 +elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' + swh::apt_config::enable_non_free: true swh::apt_config::backported_packages: buster: diff --git a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml --- a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/site-modules/profile/files/icinga2/plugins/check_journal b/site-modules/profile/files/icinga2/plugins/check_journal --- a/site-modules/profile/files/icinga2/plugins/check_journal +++ b/site-modules/profile/files/icinga2/plugins/check_journal @@ -25,6 +25,7 @@ import argparse import logging +import yaml import nagiosplugin from nagiosplugin import ScalarContext @@ -61,8 +62,10 @@ class JournalLag(nagiosplugin.Resource): """Check journal lag""" - def __init__(self, cursorfile): + def __init__(self, cursorfile, registryfile, registryentry): self.cursorfile = cursorfile + self.registryfile = registryfile + self.registryentry = registryentry def parse_cursor(self, cursor): """Parse a journald cursor entry""" @@ -79,17 +82,42 @@ return ret - def get_file_journal_cursor(self): - _log.info("querying the journal cursor cache file %s" % self.cursorfile) + def get_legacy_journal_cursor(self): + _log.info("querying the journal cursor cache file %s", self.cursorfile) try: with open(self.cursorfile, 'r') as f: ret = f.read().strip() except OSError as e: - raise nagiosplugin.CheckError("failed to read journal cursor file: %s" % e) + _log.warning("failed to read the legacy journal cursor file %s", self.cursorfile) else: _log.debug("current journal cursor: %s" % ret) return ret + def get_journal_cursor_from_registry(self): + _log.info("querying the journal cursor %s from registry file %s", self.registryentry, self.registryfile) + try: + with open(self.registryfile, 'r') as f: + registrycontent = yaml.safe_load(f) + except OSError as e: + _log.warning("failed to read the journal registry file %s", self.registryfile) + else: + entry = [e for e in registrycontent['journal_entries'] if e['path'] == self.registryentry] + cursor = entry[0]["cursor"] + _log.debug("current journal cursor: %s" % cursor) + return cursor + + def get_file_journal_cursor(self): + cursor = self.get_journal_cursor_from_registry() + + if not cursor: + _log.debug("Unable to read cursor position from the registry, fallback on the legacy file") + cursor = self.get_legacy_journal_cursor() + + if not cursor: + raise nagiosplugin.CheckError("failed to get cursor from registry or legacy file") + + return cursor + def get_system_journal_cursor(self): _log.info("querying the system journal for the current cursor") reader = systemd.journal.Reader() @@ -136,6 +164,10 @@ help='increase output verbosity (use up to 3 times)') argp.add_argument('-f', '--file', metavar='FILE', default='/var/lib/journalbeat/cursor-state', help='read journald cursor state from this file') + argp.add_argument('-r', '--registry', metavar='FILE', default='/var/lib/journalbeat/registry', + help='read journald cursor state from this journalbeat registry file') + argp.add_argument('-e', '--entry', default='LOCAL_SYSTEM_JOURNAL', + help='Check this registry entry') argp.add_argument('-w', '--warning', metavar='RANGE', default='1200', help='return warning if temporal lag is outside RANGE') argp.add_argument('-c', '--critical', metavar='RANGE', default='3600', @@ -148,7 +180,7 @@ args = argp.parse_args() check = nagiosplugin.Check( - JournalLag(args.file), + JournalLag(args.file, args.registry, args.entry), BooleanContext('sameboot'), ScalarContext('lag_time', args.warning, args.critical), ScalarContext('lag_entries', args.warning_entries, args.critical_entries), diff --git a/site-modules/profile/files/journalbeat/manage_index_template.sh b/site-modules/profile/files/journalbeat/manage_index_template.sh new file mode 100644 --- /dev/null +++ b/site-modules/profile/files/journalbeat/manage_index_template.sh @@ -0,0 +1,40 @@ +#!/bin/bash -x +# +# File managed by puppet (class ::profile::journalbeat::index_template_manager), changes will be lost. + +# Generate the journalbeat index template and create it in elasticsearch +# Save the json in the /var/lib/journalbeat directory +# Params: +# - ES HOST +# - template name +# - index template +# Output: +# - /var/lib/journalbeat/