[stable/2023.1] Enhance `MySQLDown` alert (#2290)
This is an automated cherry-pick of #2186
/assign mnaser
diff --git a/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet b/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet
index 2b876b7..3a31daa 100644
--- a/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet
+++ b/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet
@@ -16,6 +16,10 @@
// * Dropped `CephPGImbalance`
// the balancer module takes care of this
'CephPGImbalance',
+
+ // * Dropped `MySQLDown` due to noisy alerts even
+ // the replication still more than minimum
+ 'MySQLDown',
];
// NOTE(mnaser): This is the default mapping for severities:
@@ -141,6 +145,36 @@
severity: 'warning',
},
},
+ {
+ alert: 'MysqlClusterDown',
+ 'for': '5m',
+ expr: 'mysql_up == 0',
+ labels: { severity: 'info' },
+ annotations: {
+ summary: 'Percona XtraDB Cluster replica is down',
+ description: "{{ $labels.instance }} replica is down.",
+ },
+ },
+ {
+ alert: 'MysqlClusterDown',
+ 'for': '5m',
+ expr: 'round(count(mysql_up==1) / count(mysql_up) * 100) <= 50',
+ labels: { severity: 'warning' },
+ annotations: {
+ summary: 'Percona XtraDB Cluster replicas are down',
+ description: "{{ $value }}% of replicas are online.",
+ },
+ },
+ {
+ alert: 'MysqlClusterDown',
+ 'for': '1m',
+ expr: 'count(mysql_up==0) == count(mysql_up)',
+ labels: { severity: 'critical' },
+ annotations: {
+ summary: 'Percona XtraDB Cluster is down',
+ description: "All replicas are down.",
+ },
+ },
],
},
],
diff --git a/roles/kube_prometheus_stack/files/jsonnet/tests.yml b/roles/kube_prometheus_stack/files/jsonnet/tests.yml
index 4775bb2..6ed198d 100644
--- a/roles/kube_prometheus_stack/files/jsonnet/tests.yml
+++ b/roles/kube_prometheus_stack/files/jsonnet/tests.yml
@@ -105,3 +105,86 @@
- eval_time: 5m
alertname: NodeTimeSkewDetected
exp_alerts: []
+
+ - interval: 1m
+ input_series:
+ - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}'
+ values: '1'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}'
+ values: '1'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}'
+ values: '1'
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: MysqlClusterDown
+ exp_alerts: []
+
+ - interval: 1m
+ input_series:
+ - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}'
+ values: '1'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}'
+ values: '1'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}'
+ values: '0'
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: MysqlClusterDown
+ exp_alerts:
+ - exp_labels:
+ severity: P5
+ instance: percona-xtradb-pxc-2
+ job: pxc
+ exp_annotations:
+ summary: Percona XtraDB Cluster replica is down
+ description: percona-xtradb-pxc-2 replica is down.
+
+ - interval: 1m
+ input_series:
+ - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}'
+ values: '1'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}'
+ values: '0'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}'
+ values: '0'
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: MysqlClusterDown
+ exp_alerts:
+ - exp_labels:
+ severity: P3
+ exp_annotations:
+ summary: Percona XtraDB Cluster replicas are down
+ description: 33% of replicas are online.
+ - exp_labels:
+ severity: P5
+ instance: percona-xtradb-pxc-1
+ job: pxc
+ exp_annotations:
+ summary: Percona XtraDB Cluster replica is down
+ description: percona-xtradb-pxc-1 replica is down.
+ - exp_labels:
+ severity: P5
+ instance: percona-xtradb-pxc-2
+ job: pxc
+ exp_annotations:
+ summary: Percona XtraDB Cluster replica is down
+ description: percona-xtradb-pxc-2 replica is down.
+
+ - interval: 1m
+ input_series:
+ - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}'
+ values: '0'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}'
+ values: '0'
+ - series: 'mysql_up{instance="percona-xtradb-pxc-3", job="pxc"}'
+ values: '0'
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: MysqlClusterDown
+ exp_alerts:
+ - exp_labels:
+ severity: P1
+ exp_annotations:
+ summary: Percona XtraDB Cluster is down
+ description: All replicas are down.