-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathalert.rules
119 lines (107 loc) · 3.99 KB
/
alert.rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
groups:
- name: node/server
rules:
- alert: InstanceDown
expr: up == 0
for: 3m
annotations:
title: 'Instance {{ $labels.instance }} down'
description: '{{ $labels.job }} on {{ $labels.instance }} has been down for more than 3 minutes'
labels:
severity: 'warning'
- alert: NodeFilesystemReadonly
expr: node_filesystem_readonly{fstype!~"rootfs|nfs4"} > 0
for: 5m
labels:
severity: critical
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" is read-only.'
- alert: NodeDiskFull48H
expr: predict_linear(node_filesystem_free_bytes{fstype!~"rootfs|nfs4|tmpfs"}[4h], 48 * 3600) < 0
for: 5m
labels:
severity: major
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" will be out of diskspace within 48 hours.'
- alert: NodeDiskFull1H
expr: predict_linear(node_filesystem_free_bytes{fstype!~"rootfs|nfs4|tmpfs"}[4h], 3600) < 0
for: 5m
labels:
severity: critical
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" will be out of diskspace within 1 hour.'
- alert: NodeDiskFull
expr: node_filesystem_avail_bytes/node_filesystem_size_bytes < 0.01
for: 5m
labels:
severity: critical
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" is out of diskspace (< 1% free).'
- alert: NodeInodeFull48H
expr: predict_linear(node_filesystem_files_free{fstype!~"rootfs|nfs4|tmpfs"}[4h], 48 * 3600) < 0
for: 5m
labels:
severity: major
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" will be out of inode numbers within 48 hours.'
- alert: NodeInodeFull1H
expr: predict_linear(node_filesystem_files_free{fstype!~"rootfs|nfs4|tmpfs"}[4h], 3600) < 0
for: 5m
labels:
severity: critical
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" will be out of inode numbers within 1 hour.'
- alert: NodeInodeFull
expr: node_filesystem_files_free/node_filesystem_files < 0.01
for: 5m
labels:
severity: critical
service: node_exporter
annotations:
description: 'Filesystem "{{ $labels.mountpoint }}" on "{{ $labels.instance }}" out of inodes (< 1% free).'
- alert: NodeOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 2m
labels:
severity: warning
service: node_exporter
annotations:
description: 'Node memory is filling up < 10% left\n VALUE = {{ $value }}\n LABELS: {{ $labels.instance }}'
- alert: NodeHighCPULoad
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 90
for: 0m
labels:
severity: warning
service: node_exporter
annotations:
description: 'CPU load is > 90%\n VALUE = {{ $value }}\n LABELS: {{ $labels.instance }}'
- alert: NodeTimeOutOfSync
expr: node_timex_sync_status{} != 1
for: 5m
labels:
severity: warning
service: node_exporter
annotations:
description: 'Time on instance "{{ $labels.instance }}" not in sync with NTP.'
- alert: NodeTextfileCollectorDown
expr: time() - node_textfile_mtime_seconds{} > 3000
for: 5m
labels:
severity: warning
service: node_exporter
annotations:
description: 'Node-exporter textfile collector for file "{{ $labels.file }}" on "{{ $labels.instance }}" has been down for 5 minutes.'
- alert: NodeTextfileScrapingError
expr: node_textfile_scrape_error != 0
for: 5m
labels:
severity: warning
service: node_exporter
annotations:
description: 'Node-exporter textfile collector scraping error on "{{ $labels.instance }}".'