blob: b356139f11678c32722057062c314f9f459f327e [file] [log] [blame]
Mohammed Naser2144b342022-05-23 10:25:31 -04001# Copyright (c) 2022 VEXXHOST, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
15- name: Create ConfigMap
16 kubernetes.core.k8s:
17 state: present
18 definition:
19 apiVersion: v1
20 kind: ConfigMap
21 metadata:
22 name: ipmi-exporter
23 namespace: monitoring
24 labels:
25 application: ipmi-exporter
26 data:
27 config.yml: "{{ ipmi_exporter_config | to_yaml }}"
28
29- name: Create DaemonSet
30 kubernetes.core.k8s:
31 state: present
32 definition:
33 apiVersion: apps/v1
34 kind: DaemonSet
35 metadata:
36 name: ipmi-exporter
37 namespace: monitoring
38 labels:
39 application: ipmi-exporter
40 spec:
41 selector:
42 matchLabels:
43 application: ipmi-exporter
44 template:
45 metadata:
46 annotations:
47 config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
48 labels:
49 application: ipmi-exporter
50 job: ipmi
51 spec:
52 containers:
53 - name: exporter
54 image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
55 ports:
56 - name: metrics
57 containerPort: 9290
58 securityContext:
59 privileged: true
60 volumeMounts:
61 - name: dev-ipmi0
62 mountPath: /dev/ipmi0
63 - name: ipmi-exporter
64 mountPath: /config.yml
65 subPath: config.yml
66 volumes:
67 - name: dev-ipmi0
68 hostPath:
69 path: /dev/ipmi0
70 - name: ipmi-exporter
71 configMap:
72 name: ipmi-exporter
73 nodeSelector:
74 feature.node.kubernetes.io/ipmi: "true"
75
76- name: Create PodMonitor
77 kubernetes.core.k8s:
78 state: present
79 definition:
80 apiVersion: monitoring.coreos.com/v1
81 kind: PodMonitor
82 metadata:
83 name: ipmi-exporter
84 namespace: monitoring
85 labels:
86 application: ipmi-exporter
87 release: kube-prometheus-stack
88 spec:
89 jobLabel: job
90 podMetricsEndpoints:
91 - port: metrics
92 path: /metrics
93 interval: 60s
94 relabelings:
95 - sourceLabels: ["__meta_kubernetes_pod_node_name"]
96 targetLabel: instance
97 - action: labeldrop
98 regex: ^(container|endpoint|namespace|pod)$
99 selector:
100 matchLabels:
101 application: ipmi-exporter
102
103- name: Create PrometheusRule
104 kubernetes.core.k8s:
105 state: present
106 definition:
107 apiVersion: monitoring.coreos.com/v1
108 kind: PrometheusRule
109 metadata:
110 name: ipmi-exporter
111 namespace: monitoring
112 labels:
113 application: ipmi-exporter
114 release: kube-prometheus-stack
115 spec:
116 groups:
117 - name: rules
118 rules:
119 - alert: IpmiCollectorDown
120 expr: ipmi_up == 0
121 - name: collectors-state-warning
122 rules:
123 - alert: IpmiCurrent
124 expr: ipmi_current_state == 1
125 labels:
126 severity: warning
127 - alert: IpmiFanSpeed
128 expr: ipmi_fan_speed_state == 1
129 labels:
130 severity: warning
131 - alert: IpmiPower
132 expr: ipmi_power_state == 1
133 labels:
134 severity: warning
135 - alert: IpmiSensor
136 expr: ipmi_sensor_state == 1
137 labels:
138 severity: warning
139 - alert: IpmiTemperature
140 expr: ipmi_temperature_state == 1
141 labels:
142 severity: warning
143 - alert: IpmiVoltage
144 expr: ipmi_voltage_state == 1
145 labels:
146 severity: warning
147 - name: collectors-state-critical
148 rules:
149 - alert: IpmiCurrent
150 expr: ipmi_current_state == 2
151 labels:
152 severity: critical
153 - alert: IpmiFanSpeed
154 expr: ipmi_fan_speed_state == 2
155 labels:
156 severity: critical
157 - alert: IpmiPower
158 expr: ipmi_power_state == 2
159 labels:
160 severity: critical
161 - alert: IpmiSensor
162 expr: ipmi_sensor_state == 2
163 labels:
164 severity: critical
165 - alert: IpmiTemperature
166 expr: ipmi_temperature_state == 2
167 labels:
168 severity: critical
169 - alert: IpmiVoltage
170 expr: ipmi_voltage_state == 2
171 labels:
172 severity: critical