blob: 58056ef7c9ef6fa334955c45efd29a24f24d5264 [file] [log] [blame]
Mohammed Naser2144b342022-05-23 10:25:31 -04001# Copyright (c) 2022 VEXXHOST, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
15- name: Create ConfigMap
16 kubernetes.core.k8s:
17 state: present
18 definition:
19 apiVersion: v1
20 kind: ConfigMap
21 metadata:
22 name: ipmi-exporter
23 namespace: monitoring
24 labels:
25 application: ipmi-exporter
26 data:
27 config.yml: "{{ ipmi_exporter_config | to_yaml }}"
28
29- name: Create DaemonSet
30 kubernetes.core.k8s:
31 state: present
32 definition:
33 apiVersion: apps/v1
34 kind: DaemonSet
35 metadata:
36 name: ipmi-exporter
37 namespace: monitoring
38 labels:
39 application: ipmi-exporter
40 spec:
41 selector:
42 matchLabels:
43 application: ipmi-exporter
44 template:
45 metadata:
46 annotations:
47 config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
48 labels:
49 application: ipmi-exporter
50 job: ipmi
51 spec:
52 containers:
53 - name: exporter
54 image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
55 ports:
56 - name: metrics
57 containerPort: 9290
58 securityContext:
59 privileged: true
60 volumeMounts:
61 - name: dev-ipmi0
62 mountPath: /dev/ipmi0
63 - name: ipmi-exporter
64 mountPath: /config.yml
65 subPath: config.yml
66 volumes:
67 - name: dev-ipmi0
68 hostPath:
69 path: /dev/ipmi0
70 - name: ipmi-exporter
71 configMap:
72 name: ipmi-exporter
Mohammed Naser6407b412022-05-23 19:34:55 -040073 affinity:
74 nodeAffinity:
75 requiredDuringSchedulingIgnoredDuringExecution:
76 nodeSelectorTerms:
77 - matchExpressions:
78 - key: feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR
79 operator: NotIn
80 values: ["true"]
Mohammed Naser2144b342022-05-23 10:25:31 -040081
82- name: Create PodMonitor
83 kubernetes.core.k8s:
84 state: present
85 definition:
86 apiVersion: monitoring.coreos.com/v1
87 kind: PodMonitor
88 metadata:
89 name: ipmi-exporter
90 namespace: monitoring
91 labels:
92 application: ipmi-exporter
93 release: kube-prometheus-stack
94 spec:
95 jobLabel: job
96 podMetricsEndpoints:
97 - port: metrics
98 path: /metrics
99 interval: 60s
100 relabelings:
101 - sourceLabels: ["__meta_kubernetes_pod_node_name"]
102 targetLabel: instance
103 - action: labeldrop
104 regex: ^(container|endpoint|namespace|pod)$
105 selector:
106 matchLabels:
107 application: ipmi-exporter
108
109- name: Create PrometheusRule
110 kubernetes.core.k8s:
111 state: present
112 definition:
113 apiVersion: monitoring.coreos.com/v1
114 kind: PrometheusRule
115 metadata:
116 name: ipmi-exporter
117 namespace: monitoring
118 labels:
119 application: ipmi-exporter
120 release: kube-prometheus-stack
121 spec:
122 groups:
123 - name: rules
124 rules:
125 - alert: IpmiCollectorDown
126 expr: ipmi_up == 0
127 - name: collectors-state-warning
128 rules:
129 - alert: IpmiCurrent
130 expr: ipmi_current_state == 1
131 labels:
132 severity: warning
133 - alert: IpmiFanSpeed
134 expr: ipmi_fan_speed_state == 1
135 labels:
136 severity: warning
137 - alert: IpmiPower
138 expr: ipmi_power_state == 1
139 labels:
140 severity: warning
141 - alert: IpmiSensor
142 expr: ipmi_sensor_state == 1
143 labels:
144 severity: warning
145 - alert: IpmiTemperature
146 expr: ipmi_temperature_state == 1
147 labels:
148 severity: warning
149 - alert: IpmiVoltage
150 expr: ipmi_voltage_state == 1
151 labels:
152 severity: warning
153 - name: collectors-state-critical
154 rules:
155 - alert: IpmiCurrent
156 expr: ipmi_current_state == 2
157 labels:
158 severity: critical
159 - alert: IpmiFanSpeed
160 expr: ipmi_fan_speed_state == 2
161 labels:
162 severity: critical
163 - alert: IpmiPower
164 expr: ipmi_power_state == 2
165 labels:
166 severity: critical
167 - alert: IpmiSensor
168 expr: ipmi_sensor_state == 2
169 labels:
170 severity: critical
171 - alert: IpmiTemperature
172 expr: ipmi_temperature_state == 2
173 labels:
174 severity: critical
175 - alert: IpmiVoltage
176 expr: ipmi_voltage_state == 2
177 labels:
178 severity: critical