diff --git a/sysadmin/grid5000/cassandra/Vagrantfile b/sysadmin/grid5000/cassandra/Vagrantfile index 9be1e32..ce21d56 100644 --- a/sysadmin/grid5000/cassandra/Vagrantfile +++ b/sysadmin/grid5000/cassandra/Vagrantfile @@ -1,88 +1,95 @@ # -*- mode: ruby -*- # vi: set ft=ruby : vms = { "cassandra1" => { :ip => "10.168.180.11", :memory => 2048, :cpus => 2, :type => 'cassandra', }, "cassandra2" => { :ip => "10.168.180.12", :memory => 2048, :cpus => 2, :type => 'cassandra', }, "cassandra3" => { :ip => "10.168.180.13", :memory => 2048, :cpus => 2, :type => 'cassandra', }, + "cassandra4" => { + :ip => "10.168.180.24", + :memory => 2048, + :cpus => 2, + :type => 'cassandra', + }, "swh-storage1" => { :ip => "10.168.180.14", :memory => 1024, :cpus => 2, :type => 'swh-storage', }, "monitoring1" => { :ip => "10.168.180.15", :memory => 1024, :cpus => 2, :type => 'monitoring', }, } # Images/remote configuration -$global_debian10_box = "debian10-20210517-1348" +$global_debian10_box = "debian10-20210820-1622" $global_debian10_box_url = "https://annex.softwareheritage.org/public/isos/libvirt/debian/swh-debian-10.9-amd64-20210517-1348.qcow2" +$global_debian10_box_url = "file:///home/vsellier/src/swh/puppet-environment/packer/builds/swh-debian-10.10-amd64-20210820-1622.qcow2" vms.each { | vm_name, vm_props | Vagrant.configure("2") do |global_config| unless Vagrant.has_plugin?("libvirt") $stderr.puts <<-MSG vagrant-libvirt plugin is required for this. To install: `$ sudo apt install vagrant-libvirt MSG exit 1 end global_config.vm.define vm_name do |config| config.vm.box = $global_debian10_box config.vm.box_url = $global_debian10_box_url config.vm.box_check_update = false config.vm.hostname = vm_name config.vm.network :private_network, ip: vm_props[:ip], netmask: "255.255.0.0" config.vm.synced_folder ".", "/vagrant", type: 'nfs', nfs_version: 4 config.vm.provision :ansible do |ansible| ansible.verbose = true ansible.become = true ansible.playbook = "ansible/playbook.yml" ansible.inventory_path = "ansible/hosts.yml" ansible.raw_arguments = [ "-v", "--connection=paramiko", "--private-key=/home/.../.vagrant/machines/.../private_key", "--extra-vars=@.credentials", "--vault-password-file=.vault_password" ] end config.vm.provider :libvirt do |provider| provider.memory = vm_props[:memory] provider.cpus = vm_props[:cpus] provider.driver = 'kvm' if vm_props[:type] == "cassandra" provider.storage :file, :size => '1G' provider.storage :file, :size => '1G' provider.storage :file, :size => '1G' end end end end } diff --git a/sysadmin/grid5000/cassandra/ansible/cassandra.yml b/sysadmin/grid5000/cassandra/ansible/cassandra.yml index 4abdf37..d856dc8 100644 --- a/sysadmin/grid5000/cassandra/ansible/cassandra.yml +++ b/sysadmin/grid5000/cassandra/ansible/cassandra.yml @@ -1,92 +1,92 @@ --- # - name: "Get public ipv4 address" # set_fact: # cassandra_seed_ips: "{{ansible_facts[item]['ipv4']['address']}}" # with_items: # - "{{cassandra_listen_interface }}" - name: Install cassandra signing key apt_key: url: https://downloads.apache.org/cassandra/KEYS state: present - name: Install cassandra apt repository apt_repository: repo: deb http://downloads.apache.org/cassandra/debian 40x main state: present filename: cassandra.sources - name: Install cassandra packages apt: update_cache: true # force an apt update before name: ## TODO: check other version than jdk11 - cassandra - dstat - facter - openjdk-11-jdk - smartmontools - tcpdump - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create datadirs file: state: directory path: "{{ item }}" owner: "cassandra" group: "cassandra" mode: "0755" recurse: true with_items: - "{{ cassandra_data_dir_base }}" - "{{ cassandra_data_dir_system }}" - "{{ cassandra_data_dir }}" - "{{ cassandra_commitlogs_dir }}" - name: Download prometheus jmx exporter get_url: url: https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.15.0/jmx_prometheus_javaagent-0.15.0.jar dest: /opt/jmx_prometheus_javaagent-0.15.0.jar - name: Download prometheus exporter configuration get_url: url: https://raw.githubusercontent.com/prometheus/jmx_exporter/master/example_configs/cassandra.yml dest: /opt/jmx_exporter.yml - name: Configure cassandra template: src: "templates/{{item}}" dest: "{{cassandra_config_dir}}/{{item}}" - with_items: [cassandra.yaml, jvm11-server.options] + with_items: [cassandra.yaml, cassandra-rackdc.properties, jvm11-server.options] register: cassandra_configuration_files - name: Restart cassandra service service: name: cassandra state: restarted when: cassandra_configuration_files.changed - name: cassandra replication configuration script template: src: templates/swhstorage/change-cassandra-replication.sh dest: /usr/local/bin/change-cassandra-replication.sh mode: 0755 # TODO test different read ahead - name: Install object count exporter copy: src: "files/cassandra/prometheus-object-count.sh" dest: "/usr/local/bin/prometheus-object-count.sh" owner: root group: root mode: "0755" - name: Send object count to prometheus cron: name: "object count" minute: "*" hour: "*" job: "/usr/local/bin/prometheus-object-count.sh > /dev/null" diff --git a/sysadmin/grid5000/cassandra/ansible/hosts.yml b/sysadmin/grid5000/cassandra/ansible/hosts.yml index 9c1fa7b..5e88884 100644 --- a/sysadmin/grid5000/cassandra/ansible/hosts.yml +++ b/sysadmin/grid5000/cassandra/ansible/hosts.yml @@ -1,219 +1,251 @@ # Global configuration monitoring: hosts: monitoring1: paranoia-3.rennes.grid5000.fr: parasilo-[18:19].rennes.grid5000.fr: + federated_prometheus: + - gros-124.nancy.grid5000.fr:9090 paravance-[1:9].rennes.grid5000.fr: troll-1.grenoble.grid5000.fr: + gros-[100:124].nancy.grid5000.fr: vars: ansible_connection: local install_docker_install_script: false swh-storage: hosts: parasilo-[20:28].rennes.grid5000.fr: paravance-[10:78].rennes.grid5000.fr: troll-[2:4].grenoble.grid5000.fr: # local vagrant hosts swh-storage1: vars: ansible_connection: local journal: brokers: - broker1.journal.softwareheritage.org:9093 - broker2.journal.softwareheritage.org:9093 - broker3.journal.softwareheritage.org:9093 - broker4.journal.softwareheritage.org:9093 consumer: user: swh-vse group: swh-vse-grid5000-1 replayer_count: - content: 60 + content: 10 skipped_content: 1 - directory: 60 - extid: 5 + directory: 10 + extid: 1 origin: 1 origin_visit: 1 origin_visit_status: 1 release: 1 revision: 1 snapshot: 1 # Initial backfill # content: 20 # skipped_content: 2 # directory: 20 # origin: 20 # origin_visit: 20 # origin_visit_status: 20 # release: 20 # revision: 20 # snapshot: 20 cassandra: hosts: dahu-[1:32].grenoble.grid5000.fr: parasilo-[1:17].rennes.grid5000.fr: + cassandra_datacenter: datacenter1 # local vagrant hosts cassandra[1:9]: + gros-[1:99].nancy.grid5000.fr: + cassandra_datacenter: datacenter2 + cassandra_disk_optimization: ssd vars: ansible_connection: local cassandra_config_dir: /etc/cassandra cassandra_data_dir_base: /srv/cassandra cassandra_data_dir_system: "{{cassandra_data_dir_base}}/system" cassandra_data_dir: "{{ cassandra_data_dir_base }}/data" cassandra_commitlogs_dir: "{{ cassandra_data_dir_base }}/commitlogs" - + cassandra_rack: rack1 + cassandra_memory: 16G + cassandra_disk_optimization: spinning # Per cluster specificities dahu_cluster_hosts: hosts: dahu-[1:32].grenoble.grid5000.fr: vars: cassandra_listen_interface: enp24s0f0 zfs_pools: commitlogs: disks: - sdb datasets: commitlogs: /srv/cassandra/commitlogs data: disks: - sdc datasets: data: /srv/cassandra/data +gros_cluster_hosts: + hosts: + gros-[1:124].nancy.grid5000.fr: + vars: + cassandra_listen_interface: eno1 + zfs_pools: + data: + disks: + - sdb + datasets: + data: /srv/cassandra + + parasilo_cluster_hosts: hosts: parasilo-[1:28].rennes.grid5000.fr: vars: cassandra_listen_interface: eno1 ## run 1/2/3 CL on SSD + data on HDDS zfs_pools: commitlogs: disks: - sdf datasets: commitlogs: /srv/cassandra/commitlogs data: disks: - sdb - sdc - sdd - sde datasets: data: /srv/cassandra/data ## run 4 CL and data on same partition # zfs_pools: # data: # disks: # - sdb # - sdc # - sdd # - sde # datasets: # data: /srv/cassandra # foo: # workaround # disks: # - sdf # datasets: # foo: /srv/foo ## run 5: CL on SSD + data on 2 HDD # zfs_pools: # commitlogs: # disks: # - sdf # datasets: # commitlogs: /srv/cassandra/commitlogs # data: # disks: # - sdc # - sdd # datasets: # data: /srv/cassandra/data ## run 6: CL on HDD + data on 2 HDD # zfs_pools: # commitlogs: # disks: # - sdb # datasets: # commitlogs: /srv/cassandra/commitlogs # data: # disks: # - sdc # - sdd # datasets: # data: /srv/cassandra/data paravance_cluster_hosts: hosts: paravance-[1:78].rennes.grid5000.fr: vars: cassandra_listen_interface: eno1 zfs_pools: - data: disks: - sdb datasets: data: /srv/cassandra # Vagrant configuration vagrant_nodes: hosts: cassandra1: ansible_host: 10.168.180.11 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra1/libvirt/private_key cassandra2: ansible_host: 10.168.180.12 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra2/libvirt/private_key cassandra3: ansible_host: 10.168.180.13 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra3/libvirt/private_key + cassandra4: + ansible_host: 10.168.180.24 + ansible_user: vagrant + ansible_ssh_private_key_file: .vagrant/machines/cassandra3/libvirt/private_key + cassandra_datacenter: datacenter2 swh-storage1: ansible_host: 10.168.180.14 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/swh-storage/libvirt/private_key monitoring1: ansible_host: 10.168.180.15 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/monitoring/libvirt/private_key + federated_prometheus: + - monitoring2 vars: ansible_connection: ssh install_docker_install_script: true + cassandra_memory: 1G + cassandra_datacenter: datacenter1 + cassandra_rack: rack1 journal: brokers: # staging - broker0.journal.staging.swh.network:9093 consumer: user: swh-vse group: swh-vse-grid5000-1 replayer_count: content: 0 skipped_content: 0 directory: 5 origin: 0 origin_visit: 0 origin_visit_status: 0 release: 0 revision: 0 snapshot: 0 cassandra_listen_interface: eth1 # passed through --extra-vars on grid5000 cassandra_seed_ips: 10.168.180.11,10.168.180.12,10.168.180.13 nodes: 10.168.180.11,10.168.180.12,10.168.180.13,10.168.180.14,10.168.180.15,test zfs_pools: commitlogs: disks: - vdb datasets: commitlogs: /srv/cassandra/commitlogs data: disks: - vdc - vdd datasets: data: /srv/cassandra/data diff --git a/sysadmin/grid5000/cassandra/ansible/templates/cassandra-rackdc.properties b/sysadmin/grid5000/cassandra/ansible/templates/cassandra-rackdc.properties new file mode 100644 index 0000000..0c585c4 --- /dev/null +++ b/sysadmin/grid5000/cassandra/ansible/templates/cassandra-rackdc.properties @@ -0,0 +1,2 @@ +dc={{ cassandra_datacenter }} +rack={{ cassandra_rack }} \ No newline at end of file diff --git a/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml b/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml index 9056310..2947f3a 100644 --- a/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml +++ b/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml @@ -1,74 +1,74 @@ cluster_name: swh-storage # default 'Test Cluster' num_tokens: 256 # default 256 allocate_tokens_for_local_replication_factor: 3 data_file_directories: - {{ cassandra_data_dir }} # TODO use several disks # local_system_data_file_directory: {{ cassandra_data_dir_system }} commitlog_directory: {{ cassandra_commitlogs_dir }} -disk_optimization_strategy: spinning # spinning | ssd +disk_optimization_strategy: {{ cassandra_disk_optimization }} # spinning | ssd # listen_address: 0.0.0.0 # always wrong according to the documentation listen_interface: {{ cassandra_listen_interface }} # always wrong according to the documentation concurrent_compactors: 4 # should be min(nb core, nb disks) internode_compression: dc # default dc possible all|dc|none concurrent_reads: 64 # 16 x number of drives concurrent_writes: 128 # 8 x number of cores concurrent_counter_writes: 48 commitlog_sync: periodic # default periodic commitlog_sync_period_in_ms: 10000 # default 10000 commitlog_total_space_in_mb: 16384 # default 8192 commitlog_segment_size_in_mb: 256 # default 32 (due to oversize mutation on revision table) partitioner: org.apache.cassandra.dht.Murmur3Partitioner -endpoint_snitch: SimpleSnitch +endpoint_snitch: GossipingPropertyFileSnitch seed_provider: - class_name: org.apache.cassandra.locator.SimpleSeedProvider parameters: # seeds is actually a comma-delimited list of addresses. # Ex: ",," - seeds: "{{ cassandra_seed_ips }}" # needed by swh-storage enable_user_defined_functions: true # TODO Test this options effects # disk_failure_policy: # cdc_enabled #end # Trying to reduce cassandra_compaction_pendingtasks compaction_throughput_mb_per_sec: 160 # https://forge.softwareheritage.org/source/cassandra-replayer-deployment/browse/master/playbooks/templates/cassandra.yaml$854 # How long the coordinator should wait for read operations to complete. # Lowest acceptable value is 10 ms. read_request_timeout_in_ms: 5000 # How long the coordinator should wait for seq or index scans to complete. # Lowest acceptable value is 10 ms. range_request_timeout_in_ms: 10000 # How long the coordinator should wait for writes to complete. # Lowest acceptable value is 10 ms. write_request_timeout_in_ms: 2000 # How long the coordinator should wait for counter writes to complete. # Lowest acceptable value is 10 ms. counter_write_request_timeout_in_ms: 5000 # How long a coordinator should continue to retry a CAS operation # that contends with other proposals for the same row. # Lowest acceptable value is 10 ms. cas_contention_timeout_in_ms: 1000 # How long the coordinator should wait for truncates to complete # (This can be much longer, because unless auto_snapshot is disabled # we need to flush first so we can snapshot before removing the data.) # Lowest acceptable value is 10 ms. truncate_request_timeout_in_ms: 60000 # The default timeout for other, miscellaneous operations. # Lowest acceptable value is 10 ms. request_timeout_in_ms: 10000 slow_query_log_timeout_in_ms: 1000 diff --git a/sysadmin/grid5000/cassandra/ansible/templates/jvm11-server.options b/sysadmin/grid5000/cassandra/ansible/templates/jvm11-server.options index 16aa304..ed95da5 100644 --- a/sysadmin/grid5000/cassandra/ansible/templates/jvm11-server.options +++ b/sysadmin/grid5000/cassandra/ansible/templates/jvm11-server.options @@ -1,106 +1,106 @@ ########################################################################### # jvm11-server.options # # # # See jvm-server.options. This file is specific for Java 11 and newer. # ########################################################################### ################# # GC SETTINGS # ################# --Xmx16G --Xms16G +-Xmx{{ cassandra_memory }} +-Xms{{ cassandra_memory }} ### CMS Settings -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=1 -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSWaitDuration=10000 -XX:+CMSParallelInitialMarkEnabled -XX:+CMSEdenChunksRecordAlways ## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 -XX:+CMSClassUnloadingEnabled ### G1 Settings ## Use the Hotspot garbage-first collector. #-XX:+UseG1GC #-XX:+ParallelRefProcEnabled # ## Have the JVM do less remembered set work during STW, instead ## preferring concurrent GC. Reduces p99.9 latency. #-XX:G1RSetUpdatingPauseTimePercent=5 # ## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. ## 200ms is the JVM default and lowest viable setting ## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. #-XX:MaxGCPauseMillis=500 ## Optional G1 Settings # Save CPU time on large (>= 16GB) heaps by delaying region scanning # until the heap is 70% full. The default in Hotspot 8u40 is 40%. #-XX:InitiatingHeapOccupancyPercent=70 # For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. # Otherwise equal to the number of cores when 8 or less. # Machines with > 10 cores should try setting these to <= full cores. #-XX:ParallelGCThreads=16 # By default, ConcGCThreads is 1/4 of ParallelGCThreads. # Setting both to the same value can reduce STW durations. #-XX:ConcGCThreads=16 ### JPMS -Djdk.attach.allowAttachSelf=true --add-exports java.base/jdk.internal.misc=ALL-UNNAMED --add-exports java.base/jdk.internal.ref=ALL-UNNAMED --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED --add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports java.rmi/sun.rmi.server=ALL-UNNAMED --add-exports java.sql/java.sql=ALL-UNNAMED --add-opens java.base/java.lang.module=ALL-UNNAMED --add-opens java.base/jdk.internal.loader=ALL-UNNAMED --add-opens java.base/jdk.internal.ref=ALL-UNNAMED --add-opens java.base/jdk.internal.reflect=ALL-UNNAMED --add-opens java.base/jdk.internal.math=ALL-UNNAMED --add-opens java.base/jdk.internal.module=ALL-UNNAMED --add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED ### GC logging options -- uncomment to enable # Java 11 (and newer) GC logging options: # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M #-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 # Notes for Java 8 migration: # # -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" # -XX:+PrintGCDateStamps maps to decorator 'time' # # -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' # -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' # -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' # -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' # -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' ### Netty Options # On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable # creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has # inferior performance and risks exceeding MaxDirectMemory -Dio.netty.tryReflectionSetAccessible=true -javaagent:/opt/jmx_prometheus_javaagent-0.15.0.jar=7070:/opt/jmx_exporter.yml # The newline in the end of file is intentional diff --git a/sysadmin/grid5000/cassandra/ansible/templates/prometheus/prometheus.yml b/sysadmin/grid5000/cassandra/ansible/templates/prometheus/prometheus.yml index d63724b..9825b95 100644 --- a/sysadmin/grid5000/cassandra/ansible/templates/prometheus/prometheus.yml +++ b/sysadmin/grid5000/cassandra/ansible/templates/prometheus/prometheus.yml @@ -1,54 +1,73 @@ # my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first_rules.yml" # - "second_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: 'prometheus' # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ['localhost:9090'] - job_name: 'cassandra exporter' static_configs: - targets: [ {% for cassandra in cassandra_seed_ips.split(',') %} '{{cassandra}}:7070', {% endfor %} ] - job_name: 'Node exporter' static_configs: - targets: [ {% for node in nodes.split(',') %} '{{node}}:9100', {% endfor %} ] - job_name: 'Statsd exporter' static_configs: - targets: [ {% for node in nodes.split(',') %} '{{node}}:9102', {% endfor %} ] + +{% if federated_prometheus is defined %} + - job_name: 'federate' + scrape_interval: 15s + + honor_labels: true + metrics_path: '/federate' + + params: + 'match[]': + - '{__name__=~".*"}' + + static_configs: + - targets: [ +{% for node in federated_prometheus %} + '{{node}}', +{% endfor %} + ] +{% endif %} diff --git a/sysadmin/grid5000/cassandra/ansible/zfs.yml b/sysadmin/grid5000/cassandra/ansible/zfs.yml index 8861957..fe73581 100644 --- a/sysadmin/grid5000/cassandra/ansible/zfs.yml +++ b/sysadmin/grid5000/cassandra/ansible/zfs.yml @@ -1,73 +1,77 @@ --- - name: Install contrib and non-free repositories apt_repository: repo: deb http://deb.debian.org/debian/ buster-backports main contrib non-free filename: backports.sources +- name: Install contrib + apt_repository: + repo: deb http://deb.debian.org/debian/ buster contrib non-free + filename: buster-contrib.sources - name: Install zfs packages apt: update_cache: true # force an apt update before name: - linux-image-amd64 - linux-headers-amd64 - libnvpair1linux - libuutil1linux - libzfs2linux - libzpool2linux - zfs-dkms - zfs-zed - zfsutils-linux ignore_errors: True - name: Ensure zfs initialized shell: /usr/sbin/modprobe zfs - name: Finalize zfs packages installation apt: update_cache: true # force an apt update before name: - linux-image-amd64 - linux-headers-amd64 - libnvpair1linux - libuutil1linux - libzfs2linux - libzpool2linux - zfs-dkms - zfs-zed - zfsutils-linux - name: Install zfs packages after modprobe apt: update_cache: true # force an apt update before name: - zfsutils-linux - zfs-zed - name: Remove possible old lvm volumes - shell: vgscan | awk '{print $4}'| grep -v volume | xargs -r -n1 vgremove -y + shell: vgscan | awk '{print $4}'| grep -v -e volume -e swh-debian | xargs -r -n1 vgremove -y - name: test if zfs pools are configured command: zfs list -o name register: pool_list - name: zfs pool import shell: "zpool import {{ item.key }}" loop: "{{ lookup('dict', zfs_pools) }}" when: item.key not in pool_list.stdout_lines ignore_errors: True - name: test if zfs pools are configured command: zfs list -o name register: pool_list - name: create zfs pools shell: "zpool create -f {{ item.key }} {{ item.value.disks | join(' ') }}" loop: "{{ lookup('dict', zfs_pools, wantlist=True) }}" when: item.key not in pool_list.stdout_lines - name: call dataset creation include: _zfs_create_dataset.yml obj={{ outside_item }} loop: "{{ lookup('dict', zfs_pools, wantlist=True) }}" loop_control: loop_var: outside_item when: outside_item.key not in pool_list.stdout_lines diff --git a/sysadmin/grid5000/cassandra/environment-gros.cfg b/sysadmin/grid5000/cassandra/environment-gros.cfg new file mode 100644 index 0000000..c7bf3bb --- /dev/null +++ b/sysadmin/grid5000/cassandra/environment-gros.cfg @@ -0,0 +1,20 @@ +export G5K_SITE='nancy.grid5000.fr' + +export BEST_EFFORT_CLUSTER='gros' + +export CASSANDRA_HOSTS="gros-50 gros-51 gros-52 gros-53 gros-54 gros-55 gros-56 gros-57 gros-58 gros-59" +export CASSANDRA_DISKS_COUNT=1 +export STORAGE_HOSTS="" +export JOURNAL_CLIENT_HOSTS="" +export MONITORING_HOSTS="gros-124" + +export CASSANDRA_NB_DISK_PER_NODE=1 +export RESERVATION_DATE="2021-08-25 19:01" +export DISK_RESERVATION_DURATION=$((7 * 24)) # 1day during tests +export NODE_RESERVATION_DURATION=12:50 # night + +export CASSANDRA_REPLICATION_FACTOR=3 + +export SSH_USER=root +# export SSH_OPTIONS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" +export SSH_OPTIONS="" diff --git a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json index 4eeef2f..55f9c93 100644 --- a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json +++ b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json @@ -1,1212 +1,1064 @@ { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "limit": 100, "matchAny": true, "name": "Annotations & Alerts", "tags": [ "configuration", "admin", "run" ], "type": "tags" } ] }, "editable": true, "gnetId": null, "graphTooltip": 1, "id": 6, + "iteration": 1629968819671, "links": [], "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 0 }, "hiddenSeries": false, "id": 6, "legend": { "alignAsTable": true, "avg": false, "current": false, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_client_connectednativeclients", + "expr": "cassandra_client_connectednativeclients{instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Client connections", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 }, "hiddenSeries": false, "id": 12, "legend": { "alignAsTable": true, "avg": false, "current": false, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "increase(cassandra_clientrequest_timeouts_count[$__rate_interval]) >0", + "expr": "increase(cassandra_clientrequest_timeouts_count{instance=~\"${cluster}.*\"}[$__rate_interval]) >0", "interval": "", "legendFormat": "{{instance}} - {{clientrequest}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Client request timeout", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "last" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "name": "Oversized mutations alert", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 18 }, "hiddenSeries": false, "id": 11, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_commitlog_oversizedmutations_count", + "expr": "cassandra_commitlog_oversizedmutations_count{instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}} - {{clientrequest}}", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0, "visible": true } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Oversized mutations", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 27 }, "hiddenSeries": false, "id": 4, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(cassandra_keyspace_totaldiskspaceused) by (instance)", + "expr": "sum(cassandra_keyspace_totaldiskspaceused{instance=~\"${cluster}.*\"}) by (instance)", "interval": "", "legendFormat": "{{instance}} - {{clientrequest}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Data occupation per node", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 36 }, "hiddenSeries": false, "id": 5, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_commitlog_totalcommitlogsize", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Commitlog size per node", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 45 }, "hiddenSeries": false, "id": 8, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_storage_totalhintsinprogress_count", + "expr": "cassandra_storage_totalhintsinprogress_count{instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Pending Hints", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 45 }, "hiddenSeries": false, "id": 9, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_storage_totalhints_count", + "expr": "cassandra_storage_totalhints_count{instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Total hints", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 54 }, "hiddenSeries": false, "id": 7, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_table_pendingcompactions{keyspace=''}", + "expr": "cassandra_table_pendingcompactions{keyspace='', instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Pending Compaction", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 63 }, "hiddenSeries": false, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_table_maxpartitionsize{keyspace=\"\"} > 0", + "expr": "cassandra_table_maxpartitionsize{keyspace=\"\",instance=~\"${cluster}.*\"} > 0", "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "partition size", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:423", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:424", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 72 }, "hiddenSeries": false, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": true, "max": false, "min": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "cassandra_table_repairjobsstarted - cassandra_table_repairjobscompleted", + "expr": "cassandra_table_repairjobsstarted{instance=~\"${cluster}.*\"} - cassandra_table_repairjobscompleted{instance=~\"${cluster}.*\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Repair job", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:423", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:424", "format": "none", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 81 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.6", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "cassandra_table_livesstablecount{keyspace=\"\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Number of ssttables per node", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:423", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:424", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 81 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.6", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sort(cassandra_table_livesstablecount{keyspace!=\"\"})", - "interval": "", - "legendFormat": "{{instance}} - {{keyspacE}} - {{table}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "SST table per host and table", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:423", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:424", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } } ], "refresh": "10s", "schemaVersion": 30, "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": [ + "parasilo" + ], + "value": [ + "parasilo" + ] + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "cluster", + "options": [ + { + "selected": false, + "text": "All", + "value": "$__all" + }, + { + "selected": true, + "text": "parasilo", + "value": "parasilo" + }, + { + "selected": false, + "text": "gros", + "value": "gros" + } + ], + "query": "parasilo,gros", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] }, "time": { - "from": "now-7d", + "from": "now-24h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Cassandra", "uid": "ta3ruAgnk", - "version": 32 -} + "version": 34 +} \ No newline at end of file diff --git a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/system_dashboard.json b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/system_dashboard.json index 5ff9fc2..d8439d8 100644 --- a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/system_dashboard.json +++ b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/system_dashboard.json @@ -1,956 +1,957 @@ { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "limit": 100, "matchAny": true, "name": "Annotations & Alerts", "showIn": 0, "tags": [ "configuration", "admin", "run" ], "type": "tags" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": 2, - "iteration": 1628630431997, + "iteration": 1629414615884, "links": [], "panels": [ { "collapsed": false, "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 4, "panels": [], "repeat": "nodes", "title": "$nodes", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 0, "y": 1 }, "hiddenSeries": false, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "node_load1{instance=~\"$nodes\"}", "interval": "", "legendFormat": "Load1", "refId": "A" }, { "exemplar": true, "expr": "node_load5{instance=~\"$nodes\"}", "hide": false, "interval": "", "legendFormat": "Load5", "refId": "B" }, { "exemplar": true, "expr": "node_load15{instance=~\"$nodes\"}", "hide": false, "interval": "", "legendFormat": "Load15", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Load1", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:245", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:246", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 9, "y": 1 }, "hiddenSeries": false, "id": 39, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "avg without (cpu)(irate(node_cpu_seconds_total{instance=~\"$nodes\",mode!=\"idle\"}[5m]))", "interval": "", "legendFormat": "{{mode}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "CPU", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:101", "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:102", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 0, "y": 10 }, "hiddenSeries": false, "id": 56, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "node_memory_MemTotal_bytes{instance=~\"$nodes\"} - node_memory_MemFree_bytes{instance=~\"$nodes\"} - node_memory_Buffers_bytes{instance=~\"$nodes\"} - node_memory_Cached_bytes{instance=~\"$nodes\"}", "interval": "", "intervalFactor": 1, "legendFormat": "Used", "refId": "A" }, { "exemplar": true, "expr": "node_memory_Buffers_bytes{instance=~'$nodes'}", "hide": false, "interval": "", "legendFormat": "Buffers", "refId": "B" }, { "exemplar": true, "expr": "node_memory_Cached_bytes{instance=~'$nodes'}", "hide": false, "interval": "", "legendFormat": "Cached", "refId": "C" }, { "exemplar": true, "expr": "node_memory_MemFree_bytes{instance=~'$nodes'}", "hide": false, "interval": "", "legendFormat": "Free", "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Memory", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:101", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:102", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 9, "y": 10 }, "hiddenSeries": false, "id": 107, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:131", "alias": "/.*out.*/", "transform": "negative-Y" } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "irate(node_network_receive_bytes_total{instance=~\"$nodes\"}[5m]) * 8", "interval": "", "legendFormat": "bytes per second in - {{device}}", "refId": "A" }, { "exemplar": true, "expr": "irate(node_network_transmit_bytes_total{instance=~\"$nodes\"}[5m]) * 8", "hide": false, "interval": "", "legendFormat": "bytes per second out - {{device}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Network", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:101", "format": "bits", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:102", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 0, "y": 19 }, "hiddenSeries": false, "id": 26, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$nodes\"}[5m])", "interval": "", "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Disk IO", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:101", "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:102", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 9, "x": 9, "y": 19 }, "hiddenSeries": false, "id": 168, "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:709", "alias": "/Write.*/", "transform": "negative-Y" } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "irate(node_disk_read_bytes_total{instance=~\"$nodes\"}[1m])", "interval": "", "legendFormat": "Read - {{device}}", "refId": "A" }, { "exemplar": true, "expr": "irate(node_disk_written_bytes_total{instance=~\"$nodes\"}[1m])", "hide": false, "interval": "", "legendFormat": "Write - {{device}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Disk Read/Write", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:101", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:102", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "datasource": null, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { - "id": "byName", - "options": "node_filesystem_size_bytes{device=\"data/data\", fstype=\"zfs\", instance=\"parasilo-2.rennes.grid5000.fr:9100\", job=\"Node exporter\", mountpoint=\"/srv/cassandra/data\"}" + "id": "byRegexp", + "options": "/node_filesystem_size_bytes/" }, "properties": [ - { - "id": "custom.fillOpacity", - "value": 30 - }, { "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } + }, + { + "id": "custom.fillOpacity", + "value": 43 } ] } ] }, "gridPos": { "h": 7, "w": 9, "x": 0, "y": 28 }, "id": 198, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "targets": [ { "exemplar": true, "expr": "node_filesystem_size_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/data\"} - node_filesystem_free_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/data\"}", "interval": "", "legendFormat": "", "refId": "A" }, { "exemplar": true, "expr": "node_filesystem_size_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/data\"}", "hide": false, "interval": "", "legendFormat": "", "refId": "B" } ], "title": "Cassandra data", "type": "timeseries" }, { "datasource": null, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { - "id": "byName", - "options": "node_filesystem_size_bytes{device=\"commitlogs/commitlogs\", fstype=\"zfs\", instance=\"parasilo-2.rennes.grid5000.fr:9100\", job=\"Node exporter\", mountpoint=\"/srv/cassandra/commitlogs\"}" + "id": "byRegexp", + "options": "/node_filesystem_size_bytes/" }, "properties": [ - { - "id": "custom.fillOpacity", - "value": 30 - }, { "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } + }, + { + "id": "custom.fillOpacity", + "value": 42 } ] } ] }, "gridPos": { "h": 7, "w": 9, "x": 9, "y": 28 }, "id": 231, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "targets": [ { "exemplar": true, "expr": "node_filesystem_size_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/commitlogs\"} - node_filesystem_free_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/commitlogs\"}", "interval": "", "legendFormat": "", "refId": "A" }, { "exemplar": true, "expr": "node_filesystem_size_bytes{instance=~\"${nodes}\", mountpoint=\"/srv/cassandra/commitlogs\"}", "hide": false, "interval": "", "legendFormat": "", "refId": "B" } ], "title": "Cassandra commitlogs", "type": "timeseries" } ], "refresh": "10s", "schemaVersion": 30, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, "current": { - "selected": false, + "selected": true, "text": [ "parasilo-2.rennes.grid5000.fr:9100", "parasilo-3.rennes.grid5000.fr:9100", "parasilo-4.rennes.grid5000.fr:9100", "parasilo-5.rennes.grid5000.fr:9100", "parasilo-6.rennes.grid5000.fr:9100" ], "value": [ "parasilo-2.rennes.grid5000.fr:9100", "parasilo-3.rennes.grid5000.fr:9100", "parasilo-4.rennes.grid5000.fr:9100", "parasilo-5.rennes.grid5000.fr:9100", "parasilo-6.rennes.grid5000.fr:9100" ] }, "datasource": null, "definition": "label_values(node_load1, instance)", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Node", "multi": true, "name": "nodes", "options": [], "query": { "query": "label_values(node_load1, instance)", "refId": "StandardVariableQuery" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { - "from": "now-1h", + "from": "now-3h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "System", "uid": "i-x02ognk", - "version": 22 -} + "version": 26 +} \ No newline at end of file