[ATMOSPHERE-440] chore: Add loki rule to delect Nova cell down (#1966)
fix #490
Reviewed-by: Oleksandr K.
diff --git a/roles/loki/vars/main.yml b/roles/loki/vars/main.yml
index d46ec76..82479df 100644
--- a/roles/loki/vars/main.yml
+++ b/roles/loki/vars/main.yml
@@ -73,6 +73,13 @@
openstack-control-plane: enabled
persistence:
size: 256Gi
+ extraVolumeMounts:
+ - name: rules
+ mountPath: /var/loki/rulestorage/fake
+ extraVolumes:
+ - name: rules
+ configMap:
+ name: loki-alerting-rules
write:
replicas: 0
read:
@@ -88,3 +95,21 @@
openstack-control-plane: enabled
lokiCanary:
enabled: false
+ extraObjects:
+ - apiVersion: v1
+ kind: ConfigMap
+ metadata:
+ name: loki-alerting-rules
+ labels:
+ loki_rule: "atmosphere"
+ data:
+ loki-alerting-rules.yaml: |-
+ groups:
+ - name: additional-loki-rules
+ rules:
+ - alert: NovaCellNotResponding
+ expr: 'count_over_time({pod_label_component="compute"} |= "not responding and hence is being omitted from the results" [1m]) > 0'
+ labels:
+ severity: critical
+ annotations:
+ summary: Nova Cell is not responding. It can cause port deletion in CAPI.