monitoring: add ipmi-exporter

Sem-Ver: feature
Change-Id: I0ee9362f3a60c65eada81feee5f4de1af2f4dd12
diff --git a/doc/source/roles/ipmi_exporter/index.rst b/doc/source/roles/ipmi_exporter/index.rst
new file mode 100644
index 0000000..28dbfba
--- /dev/null
+++ b/doc/source/roles/ipmi_exporter/index.rst
@@ -0,0 +1,10 @@
+.. Copyright (C) 2022 VEXXHOST, Inc.
+.. SPDX-License-Identifier: Apache-2.0
+
+``ipmi_exporter``
+=================
+
+.. toctree::
+   :maxdepth: 2
+
+   defaults/main
\ No newline at end of file
diff --git a/playbooks/openstack.yml b/playbooks/openstack.yml
index d771604..aff12a8 100644
--- a/playbooks/openstack.yml
+++ b/playbooks/openstack.yml
@@ -32,6 +32,10 @@
       tags:
         - node-feature-discovery
 
+    - role: ipmi_exporter
+      tags:
+        - ipmi-exporter
+
     - role: prometheus_pushgateway
       tags:
         - prometheus-pushgateway
diff --git a/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml b/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml
new file mode 100644
index 0000000..c12ee7e
--- /dev/null
+++ b/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml
@@ -0,0 +1,3 @@
+---
+features:
+  - Added ``ipmi-exporter`` with alertings.
diff --git a/roles/ipmi_exporter/defaults/main.yml b/roles/ipmi_exporter/defaults/main.yml
new file mode 100644
index 0000000..451f12c
--- /dev/null
+++ b/roles/ipmi_exporter/defaults/main.yml
@@ -0,0 +1,35 @@
+---
+# .. vim: foldmarker=[[[,]]]:foldmethod=marker
+
+# .. Copyright (C) 2022 VEXXHOST, Inc.
+# .. SPDX-License-Identifier: Apache-2.0
+
+# Default variables
+# =================
+
+# .. contents:: Sections
+#    :local:
+
+
+# .. envvar:: ipmi_exporter_config [[[
+#
+# Configuration for the IPMI exporter
+ipmi_exporter_config:
+  modules:
+    default:
+      collectors: ["bmc", "ipmi", "chassis", "sel"]
+      exclude_sensor_ids: [50, 52, 54, 82]
+
+                                                                   # ]]]
+# .. envvar:: ipmi_exporter_image_repository [[[
+#
+# Keepalived container image repository location
+ipmi_exporter_image_repository: "{{ atmosphere_image_repository | default('us-docker.pkg.dev/vexxhost-infra/openstack') }}"
+
+                                                                   # ]]]
+# .. envvar:: ipmi_exporter_image_tag [[[
+#
+# Keepalived container image tag
+ipmi_exporter_image_tag: 1.4.0
+
+                                                                   # ]]]
diff --git a/roles/ipmi_exporter/meta/main.yml b/roles/ipmi_exporter/meta/main.yml
new file mode 100644
index 0000000..c44b55c
--- /dev/null
+++ b/roles/ipmi_exporter/meta/main.yml
@@ -0,0 +1,23 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+galaxy_info:
+  author: VEXXHOST, Inc.
+  description: Ansible role for IPMI exporter
+  license: Apache-2.0
+  min_ansible_version: 5.5.0
+  platforms:
+    - name: Ubuntu
+      versions:
+        - focal
diff --git a/roles/ipmi_exporter/tasks/main.yml b/roles/ipmi_exporter/tasks/main.yml
new file mode 100644
index 0000000..b356139
--- /dev/null
+++ b/roles/ipmi_exporter/tasks/main.yml
@@ -0,0 +1,172 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Create ConfigMap
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      apiVersion: v1
+      kind: ConfigMap
+      metadata:
+        name: ipmi-exporter
+        namespace: monitoring
+        labels:
+          application: ipmi-exporter
+      data:
+        config.yml: "{{ ipmi_exporter_config | to_yaml }}"
+
+- name: Create DaemonSet
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      apiVersion: apps/v1
+      kind: DaemonSet
+      metadata:
+        name: ipmi-exporter
+        namespace: monitoring
+        labels:
+          application: ipmi-exporter
+      spec:
+        selector:
+          matchLabels:
+            application: ipmi-exporter
+        template:
+          metadata:
+            annotations:
+              config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
+            labels:
+              application: ipmi-exporter
+              job: ipmi
+          spec:
+            containers:
+              - name: exporter
+                image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
+                ports:
+                  - name: metrics
+                    containerPort: 9290
+                securityContext:
+                  privileged: true
+                volumeMounts:
+                  - name: dev-ipmi0
+                    mountPath: /dev/ipmi0
+                  - name: ipmi-exporter
+                    mountPath: /config.yml
+                    subPath: config.yml
+            volumes:
+              - name: dev-ipmi0
+                hostPath:
+                  path: /dev/ipmi0
+              - name: ipmi-exporter
+                configMap:
+                  name: ipmi-exporter
+            nodeSelector:
+              feature.node.kubernetes.io/ipmi: "true"
+
+- name: Create PodMonitor
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      apiVersion: monitoring.coreos.com/v1
+      kind: PodMonitor
+      metadata:
+        name: ipmi-exporter
+        namespace: monitoring
+        labels:
+          application: ipmi-exporter
+          release: kube-prometheus-stack
+      spec:
+        jobLabel: job
+        podMetricsEndpoints:
+          - port: metrics
+            path: /metrics
+            interval: 60s
+            relabelings:
+              - sourceLabels: ["__meta_kubernetes_pod_node_name"]
+                targetLabel: instance
+              - action: labeldrop
+                regex: ^(container|endpoint|namespace|pod)$
+        selector:
+          matchLabels:
+            application: ipmi-exporter
+
+- name: Create PrometheusRule
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      apiVersion: monitoring.coreos.com/v1
+      kind: PrometheusRule
+      metadata:
+        name: ipmi-exporter
+        namespace: monitoring
+        labels:
+          application: ipmi-exporter
+          release: kube-prometheus-stack
+      spec:
+        groups:
+          - name: rules
+            rules:
+              - alert: IpmiCollectorDown
+                expr: ipmi_up == 0
+          - name: collectors-state-warning
+            rules:
+              - alert: IpmiCurrent
+                expr: ipmi_current_state == 1
+                labels:
+                  severity: warning
+              - alert: IpmiFanSpeed
+                expr: ipmi_fan_speed_state == 1
+                labels:
+                  severity: warning
+              - alert: IpmiPower
+                expr: ipmi_power_state == 1
+                labels:
+                  severity: warning
+              - alert: IpmiSensor
+                expr: ipmi_sensor_state == 1
+                labels:
+                  severity: warning
+              - alert: IpmiTemperature
+                expr: ipmi_temperature_state == 1
+                labels:
+                  severity: warning
+              - alert: IpmiVoltage
+                expr: ipmi_voltage_state == 1
+                labels:
+                  severity: warning
+          - name: collectors-state-critical
+            rules:
+              - alert: IpmiCurrent
+                expr: ipmi_current_state == 2
+                labels:
+                  severity: critical
+              - alert: IpmiFanSpeed
+                expr: ipmi_fan_speed_state == 2
+                labels:
+                  severity: critical
+              - alert: IpmiPower
+                expr: ipmi_power_state == 2
+                labels:
+                  severity: critical
+              - alert: IpmiSensor
+                expr: ipmi_sensor_state == 2
+                labels:
+                  severity: critical
+              - alert: IpmiTemperature
+                expr: ipmi_temperature_state == 2
+                labels:
+                  severity: critical
+              - alert: IpmiVoltage
+                expr: ipmi_voltage_state == 2
+                labels:
+                  severity: critical
diff --git a/roles/keepalived/defaults/main.yml b/roles/keepalived/defaults/main.yml
index 9ee7cff..5fddd22 100644
--- a/roles/keepalived/defaults/main.yml
+++ b/roles/keepalived/defaults/main.yml
@@ -10,6 +10,7 @@
 # .. contents:: Sections
 #    :local:
 
+
 # .. envvar:: keepalived_enabled [[[
 #
 # Enable Keepalived for the Atmosphere cluster.  You can disable this if you
diff --git a/roles/kubernetes/defaults/main.yml b/roles/kubernetes/defaults/main.yml
index c73d385..2bba2ce 100644
--- a/roles/kubernetes/defaults/main.yml
+++ b/roles/kubernetes/defaults/main.yml
@@ -54,7 +54,7 @@
 kubernetes_control_plane_group: controllers
 
                                                                    # ]]]
-# .. envvar:: kubernetes_control_plane_group [[[
+# .. envvar:: kubernetes_control_plane_labels [[[
 #
 # Labels to apply for all control plane nodes
 kubernetes_control_plane_labels: