diff --git a/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml b/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml index 9353ea7..0c3d5b3 100644 --- a/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml +++ b/sysadmin/grid5000/cassandra/ansible/templates/cassandra.yaml @@ -1,73 +1,74 @@ cluster_name: swh-storage # default 'Test Cluster' num_tokens: 256 # default 256 allocate_tokens_for_local_replication_factor: 3 data_file_directories: - {{ cassandra_data_dir }} # TODO use several disks # local_system_data_file_directory: {{ cassandra_data_dir_system }} commitlog_directory: {{ cassandra_commitlogs_dir }} disk_optimization_strategy: spinning # spinning | ssd # listen_address: 0.0.0.0 # always wrong according to the documentation listen_interface: {{ cassandra_listen_interface }} # always wrong according to the documentation concurrent_compactors: 4 # should be min(nb core, nb disks) internode_compression: dc # default dc possible all|dc|none concurrent_reads: 64 # 16 x number of drives concurrent_writes: 128 # 8 x number of cores concurrent_counter_writes: 48 commitlog_sync: periodic # default periodic commitlog_sync_period_in_ms: 10000 # default 10000 commitlog_total_space_in_mb: 16384 # default 8192 +commitlog_segment_size_in_mb: 64 # default 32 (due to oversize mutation on revision table) partitioner: org.apache.cassandra.dht.Murmur3Partitioner endpoint_snitch: SimpleSnitch seed_provider: - class_name: org.apache.cassandra.locator.SimpleSeedProvider parameters: # seeds is actually a comma-delimited list of addresses. # Ex: ",," - seeds: "{{ cassandra_seed_ips }}" # needed by swh-storage enable_user_defined_functions: true # TODO Test this options effects # disk_failure_policy: # cdc_enabled #end # Trying to reduce cassandra_compaction_pendingtasks compaction_throughput_mb_per_sec: 160 # https://forge.softwareheritage.org/source/cassandra-replayer-deployment/browse/master/playbooks/templates/cassandra.yaml$854 # How long the coordinator should wait for read operations to complete. # Lowest acceptable value is 10 ms. read_request_timeout_in_ms: 5000 # How long the coordinator should wait for seq or index scans to complete. # Lowest acceptable value is 10 ms. range_request_timeout_in_ms: 10000 # How long the coordinator should wait for writes to complete. # Lowest acceptable value is 10 ms. write_request_timeout_in_ms: 2000 # How long the coordinator should wait for counter writes to complete. # Lowest acceptable value is 10 ms. counter_write_request_timeout_in_ms: 5000 # How long a coordinator should continue to retry a CAS operation # that contends with other proposals for the same row. # Lowest acceptable value is 10 ms. cas_contention_timeout_in_ms: 1000 # How long the coordinator should wait for truncates to complete # (This can be much longer, because unless auto_snapshot is disabled # we need to flush first so we can snapshot before removing the data.) # Lowest acceptable value is 10 ms. truncate_request_timeout_in_ms: 60000 # The default timeout for other, miscellaneous operations. # Lowest acceptable value is 10 ms. request_timeout_in_ms: 10000 slow_query_log_timeout_in_ms: 1000 diff --git a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json index 8e07dab..af606d6 100644 --- a/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json +++ b/sysadmin/grid5000/cassandra/monitoring/grafana/dashboards/cassandra_dashboard.json @@ -1,790 +1,928 @@ { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "limit": 100, "matchAny": true, "name": "Annotations & Alerts", "tags": [ "configuration", "admin", "run" ], "type": "tags" } ] }, "editable": true, "gnetId": null, "graphTooltip": 1, "id": 6, "links": [], "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 0 }, "hiddenSeries": false, "id": 6, "legend": { "alignAsTable": true, "avg": false, "current": false, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "increase(cassandra_clientrequest_timeouts_count[$__rate_interval]) >0", "interval": "", "legendFormat": "{{instance}} - {{clientrequest}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Client request timeout", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Oversized mutations alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 }, "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "cassandra_commitlog_oversizedmutations_count", + "interval": "", + "legendFormat": "{{instance}} - {{clientrequest}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Oversized mutations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:131", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:132", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, "id": 4, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "sum(cassandra_keyspace_totaldiskspaceused) by (instance)", "interval": "", "legendFormat": "{{instance}} - {{clientrequest}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Data occupation per node", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 18 + "y": 27 }, "hiddenSeries": false, "id": 5, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_commitlog_totalcommitlogsize", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Commitlog size per node", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, - "y": 27 + "y": 36 }, "hiddenSeries": false, "id": 8, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_storage_totalhintsinprogress_count", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Pending Hints", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 27 + "y": 36 }, "hiddenSeries": false, "id": 9, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_storage_totalhints_count", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Total hints", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 36 + "y": 45 }, "hiddenSeries": false, "id": 7, "legend": { "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_table_pendingcompactions{keyspace=''}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Pending Compaction", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 45 + "y": 54 }, "hiddenSeries": false, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_table_maxpartitionsize{keyspace=\"\"} > 0", "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "partition size", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:423", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:424", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 54 + "y": 63 }, "hiddenSeries": false, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": true, "max": false, "min": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.0.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "cassandra_table_repairjobsstarted - cassandra_table_repairjobscompleted", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Repair job", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:423", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:424", "format": "none", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": "10s", "schemaVersion": 30, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Cassandra", "uid": "ta3ruAgnk", - "version": 17 + "version": 22 }