rabbitmq: monitoring and reliablity improvements
Sem-Ver: bugfix
Change-Id: I655a6e5237ee0dc98547b5e8b4fa146a020f5606
diff --git a/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml
new file mode 100644
index 0000000..b38d4f2
--- /dev/null
+++ b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml
@@ -0,0 +1,7 @@
+---
+features:
+ - Added additional monitoring to RabbitMQ in order to detect and alert on
+ alarms raised by it such as memory, etc.
+fixes:
+ - Switch RabbitmqConnections to a more reliable solution that can avoid
+ alerting on larger scale clouds.
diff --git a/roles/rabbitmq/tasks/main.yml b/roles/rabbitmq/tasks/main.yml
index 8e51bfe..b650123 100644
--- a/roles/rabbitmq/tasks/main.yml
+++ b/roles/rabbitmq/tasks/main.yml
@@ -31,6 +31,9 @@
operator: In
values:
- enabled
+ rabbitmq:
+ additionalConfig: |
+ vm_memory_high_watermark.relative = 0.9
resources:
requests:
cpu: 500m
diff --git a/roles/rabbitmq_operator/tasks/main.yml b/roles/rabbitmq_operator/tasks/main.yml
index 0a8ce52..5124b24 100644
--- a/roles/rabbitmq_operator/tasks/main.yml
+++ b/roles/rabbitmq_operator/tasks/main.yml
@@ -129,6 +129,20 @@
"(.*)"
)
)
+ - name: alarms
+ rules:
+ - alert: RabbitmqAlarmFreeDiskSpace
+ expr: rabbitmq_alarms_free_disk_space_watermark == 1
+ labels:
+ severity: critical
+ - alert: RabbitmqAlarmMemoryUsedWatermark
+ expr: rabbitmq_alarms_memory_used_watermark == 1
+ labels:
+ severity: critical
+ - alert: RabbitmqAlarmFileDescriptorLimit
+ expr: rabbitmq_alarms_file_descriptor_limit == 1
+ labels:
+ severity: critical
- name: limits
rules:
- alert: RabbitmqMemoryHigh
@@ -147,10 +161,14 @@
expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95
labels:
severity: critical
- - alert: RabbitmqConnections
- expr: rabbitmq_connections > 1000
+ - alert: RabbitmqTcpSocketsUsage
+ expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80
labels:
severity: warning
+ - alert: RabbitmqTcpSocketsUsage
+ expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95
+ labels:
+ severity: critical
- name: msgs
rules:
- alert: RabbitmqUnackedMessages