blob: 8c2ca9ff1b09da4c0d3e4e9a04e2b5fe8f02e7e4 [file] [log] [blame]
Mohammed Naser2144b342022-05-23 10:25:31 -04001# Copyright (c) 2022 VEXXHOST, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
Mohammed Naserc8e1a452022-08-11 16:16:13 -040015- name: Deploy service
Mohammed Naser2144b342022-05-23 10:25:31 -040016 kubernetes.core.k8s:
17 state: present
18 definition:
Mohammed Naserc8e1a452022-08-11 16:16:13 -040019 - apiVersion: v1
20 kind: ConfigMap
21 metadata:
22 name: ipmi-exporter
23 namespace: monitoring
24 labels:
Mohammed Naser2144b342022-05-23 10:25:31 -040025 application: ipmi-exporter
Mohammed Naserc8e1a452022-08-11 16:16:13 -040026 data:
27 config.yml: "{{ ipmi_exporter_config | to_yaml }}"
28
29 - apiVersion: apps/v1
30 kind: DaemonSet
31 metadata:
32 name: ipmi-exporter
33 namespace: monitoring
34 labels:
35 application: ipmi-exporter
36 spec:
37 selector:
38 matchLabels:
Mohammed Naser2144b342022-05-23 10:25:31 -040039 application: ipmi-exporter
Mohammed Naserc8e1a452022-08-11 16:16:13 -040040 template:
41 metadata:
42 annotations:
43 config-hash: "{{ ipmi_exporter_config | to_yaml | hash('md5') }}"
44 labels:
45 application: ipmi-exporter
46 job: ipmi
47 spec:
48 containers:
49 - name: exporter
50 image: "{{ ipmi_exporter_image_repository }}/ipmi-exporter:{{ ipmi_exporter_image_tag }}"
51 ports:
52 - name: metrics
53 containerPort: 9290
54 securityContext:
55 privileged: true
56 volumeMounts:
57 - name: dev-ipmi0
58 mountPath: /dev/ipmi0
59 - name: ipmi-exporter
60 mountPath: /config.yml
61 subPath: config.yml
62 volumes:
63 - name: dev-ipmi0
64 hostPath:
65 path: /dev/ipmi0
66 - name: ipmi-exporter
67 configMap:
68 name: ipmi-exporter
69 affinity:
70 nodeAffinity:
71 requiredDuringSchedulingIgnoredDuringExecution:
72 nodeSelectorTerms:
73 - matchExpressions:
74 - key: feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR
75 operator: NotIn
76 values: ["true"]
Mohammed Naser2144b342022-05-23 10:25:31 -040077
Mohammed Naserc8e1a452022-08-11 16:16:13 -040078 - apiVersion: monitoring.coreos.com/v1
79 kind: PodMonitor
80 metadata:
81 name: ipmi-exporter
82 namespace: monitoring
83 labels:
Mohammed Naser2144b342022-05-23 10:25:31 -040084 application: ipmi-exporter
Mohammed Naserc8e1a452022-08-11 16:16:13 -040085 release: kube-prometheus-stack
86 spec:
87 jobLabel: job
88 podMetricsEndpoints:
89 - port: metrics
90 path: /metrics
91 interval: 60s
92 relabelings:
93 - sourceLabels: ["__meta_kubernetes_pod_node_name"]
94 targetLabel: instance
95 - action: labeldrop
96 regex: ^(container|endpoint|namespace|pod)$
97 selector:
98 matchLabels:
99 application: ipmi-exporter
Mohammed Naser2144b342022-05-23 10:25:31 -0400100
Mohammed Naserc8e1a452022-08-11 16:16:13 -0400101 - apiVersion: monitoring.coreos.com/v1
102 kind: PrometheusRule
103 metadata:
104 name: ipmi-exporter
105 namespace: monitoring
106 labels:
107 application: ipmi-exporter
108 release: kube-prometheus-stack
109 spec:
110 groups:
111 - name: rules
112 rules:
113 - alert: IpmiCollectorDown
114 expr: ipmi_up == 0
115 - name: collectors-state-warning
116 rules:
117 - alert: IpmiCurrent
118 expr: ipmi_current_state == 1
119 labels:
120 severity: warning
121 - alert: IpmiFanSpeed
122 expr: ipmi_fan_speed_state == 1
123 labels:
124 severity: warning
125 - alert: IpmiPower
126 expr: ipmi_power_state == 1
127 labels:
128 severity: warning
129 - alert: IpmiSensor
130 expr: ipmi_sensor_state == 1
131 labels:
132 severity: warning
133 - alert: IpmiTemperature
134 expr: ipmi_temperature_state == 1
135 labels:
136 severity: warning
137 - alert: IpmiVoltage
138 expr: ipmi_voltage_state == 1
139 labels:
140 severity: warning
141 - name: collectors-state-critical
142 rules:
143 - alert: IpmiCurrent
144 expr: ipmi_current_state == 2
145 labels:
146 severity: critical
147 - alert: IpmiFanSpeed
148 expr: ipmi_fan_speed_state == 2
149 labels:
150 severity: critical
151 - alert: IpmiPower
152 expr: ipmi_power_state == 2
153 labels:
154 severity: critical
155 - alert: IpmiSensor
156 expr: ipmi_sensor_state == 2
157 labels:
158 severity: critical
159 - alert: IpmiTemperature
160 expr: ipmi_temperature_state == 2
161 labels:
162 severity: critical
163 - alert: IpmiVoltage
164 expr: ipmi_voltage_state == 2
165 labels:
166 severity: critical
167 # NOTE(mnaser): Since we haven't moved to the operator pattern yet, we need to
168 # keep retrying a few times as the CRDs might not be installed
169 # yet.
170 retries: 60
171 delay: 5
172 register: _result
173 until: _result is not failed