| # Copyright (c) 2022 VEXXHOST, Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. You may obtain |
| # a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| |
| - name: Deploy Helm chart |
| kubernetes.core.k8s: |
| state: present |
| definition: |
| - apiVersion: source.toolkit.fluxcd.io/v1beta2 |
| kind: HelmRepository |
| metadata: |
| name: bitnami |
| namespace: openstack |
| spec: |
| interval: 60s |
| url: https://charts.bitnami.com/bitnami |
| |
| - apiVersion: helm.toolkit.fluxcd.io/v2beta1 |
| kind: HelmRelease |
| metadata: |
| name: rabbitmq-cluster-operator |
| namespace: openstack |
| spec: |
| interval: 60s |
| chart: |
| spec: |
| chart: rabbitmq-cluster-operator |
| version: 2.5.2 |
| sourceRef: |
| kind: HelmRepository |
| name: bitnami |
| install: |
| crds: CreateReplace |
| upgrade: |
| crds: CreateReplace |
| values: |
| rabbitmqImage: |
| repository: library/rabbitmq |
| tag: 3.10.2-management |
| credentialUpdaterImage: |
| repository: rabbitmqoperator/default-user-credential-updater |
| tag: 1.0.2 |
| clusterOperator: |
| image: |
| repository: rabbitmqoperator/cluster-operator |
| tag: 1.13.1 |
| fullnameOverride: rabbitmq-cluster-operator |
| nodeSelector: |
| openstack-control-plane: enabled |
| msgTopologyOperator: |
| image: |
| repository: rabbitmqoperator/messaging-topology-operator |
| tag: 1.6.0 |
| fullnameOverride: rabbitmq-messaging-topology-operator |
| nodeSelector: |
| openstack-control-plane: enabled |
| useCertManager: true |
| |
| - name: Deploy monitoring for RabbitMQ |
| kubernetes.core.k8s: |
| state: present |
| definition: |
| - apiVersion: monitoring.coreos.com/v1 |
| kind: PodMonitor |
| metadata: |
| name: rabbitmq |
| namespace: monitoring |
| labels: |
| release: kube-prometheus-stack |
| spec: |
| jobLabel: app.kubernetes.io/component |
| podMetricsEndpoints: |
| - port: prometheus |
| path: /metrics |
| relabelings: |
| - sourceLabels: ["__meta_kubernetes_pod_name"] |
| targetLabel: "instance" |
| - action: "labeldrop" |
| regex: "^(container|endpoint|namespace|pod|service)$" |
| namespaceSelector: |
| matchNames: |
| - openstack |
| selector: |
| matchLabels: |
| app.kubernetes.io/component: rabbitmq |
| |
| - apiVersion: monitoring.coreos.com/v1 |
| kind: PrometheusRule |
| metadata: |
| name: rabbitmq |
| namespace: monitoring |
| labels: |
| release: kube-prometheus-stack |
| spec: |
| groups: |
| - name: recording |
| rules: |
| - record: rabbitmq:usage:memory |
| labels: |
| job: rabbitmq |
| expr: | |
| sum without (job) ( |
| rabbitmq_process_resident_memory_bytes |
| ) / sum without ( |
| container, |
| pod, |
| job, |
| namespace, |
| node, |
| resource, |
| uid, |
| unit |
| ) ( |
| label_replace( |
| cluster:namespace:pod_memory:active:kube_pod_container_resource_limits, |
| "instance", |
| "$1", |
| "pod", |
| "(.*)" |
| ) |
| ) |
| - name: alarms |
| rules: |
| - alert: RabbitmqAlarmFreeDiskSpace |
| expr: rabbitmq_alarms_free_disk_space_watermark == 1 |
| labels: |
| severity: critical |
| - alert: RabbitmqAlarmMemoryUsedWatermark |
| expr: rabbitmq_alarms_memory_used_watermark == 1 |
| labels: |
| severity: critical |
| - alert: RabbitmqAlarmFileDescriptorLimit |
| expr: rabbitmq_alarms_file_descriptor_limit == 1 |
| labels: |
| severity: critical |
| - name: limits |
| rules: |
| - alert: RabbitmqMemoryHigh |
| expr: rabbitmq:usage:memory > 0.80 |
| labels: |
| severity: warning |
| - alert: RabbitmqMemoryHigh |
| expr: rabbitmq:usage:memory > 0.95 |
| labels: |
| severity: critical |
| - alert: RabbitmqFileDescriptorsUsage |
| expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.80 |
| labels: |
| severity: warning |
| - alert: RabbitmqFileDescriptorsUsage |
| expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95 |
| labels: |
| severity: critical |
| - alert: RabbitmqTcpSocketsUsage |
| expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80 |
| labels: |
| severity: warning |
| - alert: RabbitmqTcpSocketsUsage |
| expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95 |
| labels: |
| severity: critical |
| - name: msgs |
| rules: |
| - alert: RabbitmqUnackedMessages |
| expr: sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000 |
| for: 5m |
| labels: |
| severity: warning |
| - alert: RabbitmqUnackedMessages |
| expr: sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000 |
| for: 1h |
| labels: |
| severity: critical |
| # NOTE(mnaser): Since we haven't moved to the operator pattern yet, we need to |
| # keep retrying a few times as the CRDs might not be installed |
| # yet. |
| retries: 60 |
| delay: 5 |
| register: _result |
| until: _result is not failed |