rabbitmq: monitoring and reliablity improvements

Sem-Ver: bugfix
Change-Id: I655a6e5237ee0dc98547b5e8b4fa146a020f5606
diff --git a/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml
new file mode 100644
index 0000000..b38d4f2
--- /dev/null
+++ b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml
@@ -0,0 +1,7 @@
+---
+features:
+  - Added additional monitoring to RabbitMQ in order to detect and alert on
+    alarms raised by it such as memory, etc.
+fixes:
+  - Switch RabbitmqConnections to a more reliable solution that can avoid
+    alerting on larger scale clouds.
diff --git a/roles/rabbitmq/tasks/main.yml b/roles/rabbitmq/tasks/main.yml
index 8e51bfe..b650123 100644
--- a/roles/rabbitmq/tasks/main.yml
+++ b/roles/rabbitmq/tasks/main.yml
@@ -31,6 +31,9 @@
                       operator: In
                       values:
                         - enabled
+        rabbitmq:
+          additionalConfig: |
+            vm_memory_high_watermark.relative = 0.9
         resources:
           requests:
             cpu: 500m
diff --git a/roles/rabbitmq_operator/tasks/main.yml b/roles/rabbitmq_operator/tasks/main.yml
index 0a8ce52..5124b24 100644
--- a/roles/rabbitmq_operator/tasks/main.yml
+++ b/roles/rabbitmq_operator/tasks/main.yml
@@ -129,6 +129,20 @@
                         "(.*)"
                       )
                     )
+            - name: alarms
+              rules:
+                - alert: RabbitmqAlarmFreeDiskSpace
+                  expr: rabbitmq_alarms_free_disk_space_watermark == 1
+                  labels:
+                    severity: critical
+                - alert: RabbitmqAlarmMemoryUsedWatermark
+                  expr: rabbitmq_alarms_memory_used_watermark == 1
+                  labels:
+                    severity: critical
+                - alert: RabbitmqAlarmFileDescriptorLimit
+                  expr: rabbitmq_alarms_file_descriptor_limit == 1
+                  labels:
+                    severity: critical
             - name: limits
               rules:
                 - alert: RabbitmqMemoryHigh
@@ -147,10 +161,14 @@
                   expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95
                   labels:
                     severity: critical
-                - alert: RabbitmqConnections
-                  expr: rabbitmq_connections > 1000
+                - alert: RabbitmqTcpSocketsUsage
+                  expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80
                   labels:
                     severity: warning
+                - alert: RabbitmqTcpSocketsUsage
+                  expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95
+                  labels:
+                    severity: critical
             - name: msgs
               rules:
                 - alert: RabbitmqUnackedMessages