blob: b1e96877628df18a1eb6944727c6e48f5f72b015 [file] [log] [blame]
Mohammed Naser8a2c8fb2023-02-19 17:23:55 +00001groups:
2- name: loki_alerts
3 rules:
4 - alert: LokiRequestErrors
5 annotations:
6 message: |
7 {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
8 expr: |
9 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
10 /
11 sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route)
12 > 10
13 for: 15m
14 labels:
15 severity: critical
16 - alert: LokiRequestPanics
17 annotations:
18 message: |
19 {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
20 expr: |
21 sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
22 labels:
23 severity: critical
24 - alert: LokiRequestLatency
25 annotations:
26 message: |
27 {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
28 expr: |
29 namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1
30 for: 15m
31 labels:
32 severity: critical
33 - alert: LokiTooManyCompactorsRunning
34 annotations:
35 message: |
36 {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
37 expr: |
38 sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
39 for: 5m
40 labels:
41 severity: warning
42- name: 'loki_canaries_alerts'
43 rules:
44 - alert: 'LokiCanaryLatency'
45 annotations:
46 message: |
47 {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
48 expr: |
49 histogram_quantile(0.99, sum(rate(loki_canary_response_latency_seconds_bucket[5m])) by (le, namespace, job)) > 5
50 for: '15m'
51 labels:
52 severity: 'warning'