blob: 6d93c10fdd0d39952594b4d8ccc5815bb99dd952 [file] [log] [blame]
Mohammed Naser2144b342022-05-23 10:25:31 -04001# Copyright (c) 2022 VEXXHOST, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
15- name: Create ConfigMap
16 kubernetes.core.k8s:
17 state: present
18 definition:
19 apiVersion: v1
20 kind: ConfigMap
21 metadata:
22 name: ipmi-exporter
23 namespace: monitoring
24 labels:
25 application: ipmi-exporter
26 data:
27 config.yml: "{{ ipmi_exporter_config | to_yaml }}"
28
29- name: Create DaemonSet
30 kubernetes.core.k8s:
31 state: present
32 definition:
33 apiVersion: apps/v1
34 kind: DaemonSet
35 metadata:
36 name: ipmi-exporter
37 namespace: monitoring
38 labels:
39 application: ipmi-exporter
40 spec:
41 selector:
42 matchLabels:
43 application: ipmi-exporter
44 template:
45 metadata:
46 annotations:
47 config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
48 labels:
49 application: ipmi-exporter
50 job: ipmi
51 spec:
52 containers:
53 - name: exporter
54 image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
55 ports:
56 - name: metrics
57 containerPort: 9290
58 securityContext:
59 privileged: true
60 volumeMounts:
61 - name: dev-ipmi0
62 mountPath: /dev/ipmi0
63 - name: ipmi-exporter
64 mountPath: /config.yml
65 subPath: config.yml
66 volumes:
67 - name: dev-ipmi0
68 hostPath:
69 path: /dev/ipmi0
70 - name: ipmi-exporter
71 configMap:
72 name: ipmi-exporter
Mohammed Naser6407b412022-05-23 19:34:55 -040073 affinity:
74 nodeAffinity:
75 requiredDuringSchedulingIgnoredDuringExecution:
76 nodeSelectorTerms:
77 - matchExpressions:
78 - key: feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR
79 operator: NotIn
80 values: ["true"]
Mohammed Naser2144b342022-05-23 10:25:31 -040081 nodeSelector:
82 feature.node.kubernetes.io/ipmi: "true"
83
84- name: Create PodMonitor
85 kubernetes.core.k8s:
86 state: present
87 definition:
88 apiVersion: monitoring.coreos.com/v1
89 kind: PodMonitor
90 metadata:
91 name: ipmi-exporter
92 namespace: monitoring
93 labels:
94 application: ipmi-exporter
95 release: kube-prometheus-stack
96 spec:
97 jobLabel: job
98 podMetricsEndpoints:
99 - port: metrics
100 path: /metrics
101 interval: 60s
102 relabelings:
103 - sourceLabels: ["__meta_kubernetes_pod_node_name"]
104 targetLabel: instance
105 - action: labeldrop
106 regex: ^(container|endpoint|namespace|pod)$
107 selector:
108 matchLabels:
109 application: ipmi-exporter
110
111- name: Create PrometheusRule
112 kubernetes.core.k8s:
113 state: present
114 definition:
115 apiVersion: monitoring.coreos.com/v1
116 kind: PrometheusRule
117 metadata:
118 name: ipmi-exporter
119 namespace: monitoring
120 labels:
121 application: ipmi-exporter
122 release: kube-prometheus-stack
123 spec:
124 groups:
125 - name: rules
126 rules:
127 - alert: IpmiCollectorDown
128 expr: ipmi_up == 0
129 - name: collectors-state-warning
130 rules:
131 - alert: IpmiCurrent
132 expr: ipmi_current_state == 1
133 labels:
134 severity: warning
135 - alert: IpmiFanSpeed
136 expr: ipmi_fan_speed_state == 1
137 labels:
138 severity: warning
139 - alert: IpmiPower
140 expr: ipmi_power_state == 1
141 labels:
142 severity: warning
143 - alert: IpmiSensor
144 expr: ipmi_sensor_state == 1
145 labels:
146 severity: warning
147 - alert: IpmiTemperature
148 expr: ipmi_temperature_state == 1
149 labels:
150 severity: warning
151 - alert: IpmiVoltage
152 expr: ipmi_voltage_state == 1
153 labels:
154 severity: warning
155 - name: collectors-state-critical
156 rules:
157 - alert: IpmiCurrent
158 expr: ipmi_current_state == 2
159 labels:
160 severity: critical
161 - alert: IpmiFanSpeed
162 expr: ipmi_fan_speed_state == 2
163 labels:
164 severity: critical
165 - alert: IpmiPower
166 expr: ipmi_power_state == 2
167 labels:
168 severity: critical
169 - alert: IpmiSensor
170 expr: ipmi_sensor_state == 2
171 labels:
172 severity: critical
173 - alert: IpmiTemperature
174 expr: ipmi_temperature_state == 2
175 labels:
176 severity: critical
177 - alert: IpmiVoltage
178 expr: ipmi_voltage_state == 2
179 labels:
180 severity: critical