monitoring: add ipmi-exporter
Sem-Ver: feature
Change-Id: I0ee9362f3a60c65eada81feee5f4de1af2f4dd12
diff --git a/doc/source/roles/ipmi_exporter/index.rst b/doc/source/roles/ipmi_exporter/index.rst
new file mode 100644
index 0000000..28dbfba
--- /dev/null
+++ b/doc/source/roles/ipmi_exporter/index.rst
@@ -0,0 +1,10 @@
+.. Copyright (C) 2022 VEXXHOST, Inc.
+.. SPDX-License-Identifier: Apache-2.0
+
+``ipmi_exporter``
+=================
+
+.. toctree::
+ :maxdepth: 2
+
+ defaults/main
\ No newline at end of file
diff --git a/playbooks/openstack.yml b/playbooks/openstack.yml
index d771604..aff12a8 100644
--- a/playbooks/openstack.yml
+++ b/playbooks/openstack.yml
@@ -32,6 +32,10 @@
tags:
- node-feature-discovery
+ - role: ipmi_exporter
+ tags:
+ - ipmi-exporter
+
- role: prometheus_pushgateway
tags:
- prometheus-pushgateway
diff --git a/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml b/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml
new file mode 100644
index 0000000..c12ee7e
--- /dev/null
+++ b/releasenotes/notes/add-ipmi-exporter-37a8c16fe24597dc.yaml
@@ -0,0 +1,3 @@
+---
+features:
+ - Added ``ipmi-exporter`` with alertings.
diff --git a/roles/ipmi_exporter/defaults/main.yml b/roles/ipmi_exporter/defaults/main.yml
new file mode 100644
index 0000000..451f12c
--- /dev/null
+++ b/roles/ipmi_exporter/defaults/main.yml
@@ -0,0 +1,35 @@
+---
+# .. vim: foldmarker=[[[,]]]:foldmethod=marker
+
+# .. Copyright (C) 2022 VEXXHOST, Inc.
+# .. SPDX-License-Identifier: Apache-2.0
+
+# Default variables
+# =================
+
+# .. contents:: Sections
+# :local:
+
+
+# .. envvar:: ipmi_exporter_config [[[
+#
+# Configuration for the IPMI exporter
+ipmi_exporter_config:
+ modules:
+ default:
+ collectors: ["bmc", "ipmi", "chassis", "sel"]
+ exclude_sensor_ids: [50, 52, 54, 82]
+
+ # ]]]
+# .. envvar:: ipmi_exporter_image_repository [[[
+#
+# Keepalived container image repository location
+ipmi_exporter_image_repository: "{{ atmosphere_image_repository | default('us-docker.pkg.dev/vexxhost-infra/openstack') }}"
+
+ # ]]]
+# .. envvar:: ipmi_exporter_image_tag [[[
+#
+# Keepalived container image tag
+ipmi_exporter_image_tag: 1.4.0
+
+ # ]]]
diff --git a/roles/ipmi_exporter/meta/main.yml b/roles/ipmi_exporter/meta/main.yml
new file mode 100644
index 0000000..c44b55c
--- /dev/null
+++ b/roles/ipmi_exporter/meta/main.yml
@@ -0,0 +1,23 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+galaxy_info:
+ author: VEXXHOST, Inc.
+ description: Ansible role for IPMI exporter
+ license: Apache-2.0
+ min_ansible_version: 5.5.0
+ platforms:
+ - name: Ubuntu
+ versions:
+ - focal
diff --git a/roles/ipmi_exporter/tasks/main.yml b/roles/ipmi_exporter/tasks/main.yml
new file mode 100644
index 0000000..b356139
--- /dev/null
+++ b/roles/ipmi_exporter/tasks/main.yml
@@ -0,0 +1,172 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Create ConfigMap
+ kubernetes.core.k8s:
+ state: present
+ definition:
+ apiVersion: v1
+ kind: ConfigMap
+ metadata:
+ name: ipmi-exporter
+ namespace: monitoring
+ labels:
+ application: ipmi-exporter
+ data:
+ config.yml: "{{ ipmi_exporter_config | to_yaml }}"
+
+- name: Create DaemonSet
+ kubernetes.core.k8s:
+ state: present
+ definition:
+ apiVersion: apps/v1
+ kind: DaemonSet
+ metadata:
+ name: ipmi-exporter
+ namespace: monitoring
+ labels:
+ application: ipmi-exporter
+ spec:
+ selector:
+ matchLabels:
+ application: ipmi-exporter
+ template:
+ metadata:
+ annotations:
+ config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
+ labels:
+ application: ipmi-exporter
+ job: ipmi
+ spec:
+ containers:
+ - name: exporter
+ image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
+ ports:
+ - name: metrics
+ containerPort: 9290
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - name: dev-ipmi0
+ mountPath: /dev/ipmi0
+ - name: ipmi-exporter
+ mountPath: /config.yml
+ subPath: config.yml
+ volumes:
+ - name: dev-ipmi0
+ hostPath:
+ path: /dev/ipmi0
+ - name: ipmi-exporter
+ configMap:
+ name: ipmi-exporter
+ nodeSelector:
+ feature.node.kubernetes.io/ipmi: "true"
+
+- name: Create PodMonitor
+ kubernetes.core.k8s:
+ state: present
+ definition:
+ apiVersion: monitoring.coreos.com/v1
+ kind: PodMonitor
+ metadata:
+ name: ipmi-exporter
+ namespace: monitoring
+ labels:
+ application: ipmi-exporter
+ release: kube-prometheus-stack
+ spec:
+ jobLabel: job
+ podMetricsEndpoints:
+ - port: metrics
+ path: /metrics
+ interval: 60s
+ relabelings:
+ - sourceLabels: ["__meta_kubernetes_pod_node_name"]
+ targetLabel: instance
+ - action: labeldrop
+ regex: ^(container|endpoint|namespace|pod)$
+ selector:
+ matchLabels:
+ application: ipmi-exporter
+
+- name: Create PrometheusRule
+ kubernetes.core.k8s:
+ state: present
+ definition:
+ apiVersion: monitoring.coreos.com/v1
+ kind: PrometheusRule
+ metadata:
+ name: ipmi-exporter
+ namespace: monitoring
+ labels:
+ application: ipmi-exporter
+ release: kube-prometheus-stack
+ spec:
+ groups:
+ - name: rules
+ rules:
+ - alert: IpmiCollectorDown
+ expr: ipmi_up == 0
+ - name: collectors-state-warning
+ rules:
+ - alert: IpmiCurrent
+ expr: ipmi_current_state == 1
+ labels:
+ severity: warning
+ - alert: IpmiFanSpeed
+ expr: ipmi_fan_speed_state == 1
+ labels:
+ severity: warning
+ - alert: IpmiPower
+ expr: ipmi_power_state == 1
+ labels:
+ severity: warning
+ - alert: IpmiSensor
+ expr: ipmi_sensor_state == 1
+ labels:
+ severity: warning
+ - alert: IpmiTemperature
+ expr: ipmi_temperature_state == 1
+ labels:
+ severity: warning
+ - alert: IpmiVoltage
+ expr: ipmi_voltage_state == 1
+ labels:
+ severity: warning
+ - name: collectors-state-critical
+ rules:
+ - alert: IpmiCurrent
+ expr: ipmi_current_state == 2
+ labels:
+ severity: critical
+ - alert: IpmiFanSpeed
+ expr: ipmi_fan_speed_state == 2
+ labels:
+ severity: critical
+ - alert: IpmiPower
+ expr: ipmi_power_state == 2
+ labels:
+ severity: critical
+ - alert: IpmiSensor
+ expr: ipmi_sensor_state == 2
+ labels:
+ severity: critical
+ - alert: IpmiTemperature
+ expr: ipmi_temperature_state == 2
+ labels:
+ severity: critical
+ - alert: IpmiVoltage
+ expr: ipmi_voltage_state == 2
+ labels:
+ severity: critical
diff --git a/roles/keepalived/defaults/main.yml b/roles/keepalived/defaults/main.yml
index 9ee7cff..5fddd22 100644
--- a/roles/keepalived/defaults/main.yml
+++ b/roles/keepalived/defaults/main.yml
@@ -10,6 +10,7 @@
# .. contents:: Sections
# :local:
+
# .. envvar:: keepalived_enabled [[[
#
# Enable Keepalived for the Atmosphere cluster. You can disable this if you
diff --git a/roles/kubernetes/defaults/main.yml b/roles/kubernetes/defaults/main.yml
index c73d385..2bba2ce 100644
--- a/roles/kubernetes/defaults/main.yml
+++ b/roles/kubernetes/defaults/main.yml
@@ -54,7 +54,7 @@
kubernetes_control_plane_group: controllers
# ]]]
-# .. envvar:: kubernetes_control_plane_group [[[
+# .. envvar:: kubernetes_control_plane_labels [[[
#
# Labels to apply for all control plane nodes
kubernetes_control_plane_labels: