add alerts for node softnet
Add alerts NodeSoftNetTimesSqueezed and NodeSoftNetBacklogLength
Partially revert "fix: tune net.core.netdev_budget"
This partially reverts commit 6b7accacd089c740ae2ba92405d32e0e9f64b4dc.
And re-introduce alert NodeSoftNetTimesSqueezed
Add new alert NodeSoftNetBacklogLength
diff --git a/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet b/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet
index 3489639..3300e66 100644
--- a/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet
+++ b/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet
@@ -172,6 +172,14 @@
name: 'softnet',
rules: [
{
+ alert: 'NodeSoftNetBacklogLength',
+ expr: 'sum(node_softnet_backlog_len) by (instance) > 5000',
+ 'for': '1m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
alert: 'NodeSoftNetDrops',
expr: 'sum(rate(node_softnet_dropped_total[1m])) by (instance) != 0',
'for': '1m',
@@ -179,6 +187,14 @@
severity: 'critical',
},
},
+ {
+ alert: 'NodeSoftNetTimesSqueezed',
+ expr: 'sum(rate(node_softnet_times_squeezed_total[1m])) by (instance) > 10',
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ },
],
},
],