forked from ncabatoff/dbms_exporter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
postgres.yaml
297 lines (286 loc) · 13.1 KB
/
postgres.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
pg_stat_bgwriter:
query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time / 1000 as checkpoint_write_time_seconds,
checkpoint_sync_time / 1000 as checkpoint_sync_time_seconds, buffers_checkpoint, buffers_clean,
maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset
FROM pg_stat_bgwriter"
metrics:
- checkpoints_timed:
usage: "COUNTER"
description: "Number of scheduled checkpoints that have been performed"
- checkpoints_req:
usage: "COUNTER"
description: "Number of requested checkpoints that have been performed"
- checkpoint_write_time_seconds:
usage: "COUNTER"
description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk"
- checkpoint_sync_time_seconds:
usage: "COUNTER"
description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk"
- buffers_checkpoint:
usage: "COUNTER"
description: "Number of buffers written during checkpoints"
- buffers_clean:
usage: "COUNTER"
description: "Number of buffers written by the background writer"
- maxwritten_clean:
usage: "COUNTER"
description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers"
- buffers_backend:
usage: "COUNTER"
description: "Number of buffers written directly by a backend"
- buffers_backend_fsync:
usage: "COUNTER"
description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)"
- buffers_alloc:
usage: "COUNTER"
description: "Number of buffers allocated"
- stats_reset:
usage: "GAUGE"
description: "Time at which these statistics were last reset"
pg_stat_database:
query: "SELECT datid, datname, numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned,
tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes,
deadlocks, blk_read_time, blk_write_time, stats_reset
FROM pg_stat_database"
metrics:
- datid:
usage: "LABEL"
description: "OID of this database"
- datname:
usage: "LABEL"
description: "Name of this database"
- numbackends:
usage: "GAUGE"
description: "Number of backends currently connected to this database. This is the only column in this view that returns a value reflecting current state; all other columns return the accumulated values since the last reset."
- xact_commit:
usage: "COUNTER"
description: "Number of transactions in this database that have been committed"
- xact_rollback:
usage: "COUNTER"
description: "Number of transactions in this database that have been rolled back"
- blks_read:
usage: "COUNTER"
description: "Number of disk blocks read in this database"
- blks_hit:
usage: "COUNTER"
description: "Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)"
- tup_returned:
usage: "COUNTER"
description: "Number of rows returned by queries in this database"
- tup_fetched:
usage: "COUNTER"
description: "Number of rows fetched by queries in this database"
- tup_inserted:
usage: "COUNTER"
description: "Number of rows inserted by queries in this database"
- tup_updated:
usage: "COUNTER"
description: "Number of rows updated by queries in this database"
- tup_deleted:
usage: "COUNTER"
description: "Number of rows deleted by queries in this database"
- conflicts:
usage: "COUNTER"
description: "Number of queries canceled due to conflicts with recovery in this database. (Conflicts occur only on standby servers; see pg_stat_database_conflicts for details.)"
- temp_files:
usage: "COUNTER"
description: "Number of temporary files created by queries in this database. All temporary files are counted, regardless of why the temporary file was created (e.g., sorting or hashing), and regardless of the log_temp_files setting."
- temp_bytes:
usage: "COUNTER"
description: "Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting."
- deadlocks:
usage: "COUNTER"
description: "Number of deadlocks detected in this database"
- blk_read_time:
usage: "COUNTER"
description: "Time spent reading data file blocks by backends in this database, in milliseconds"
- blk_write_time:
usage: "COUNTER"
description: "Time spent writing data file blocks by backends in this database, in milliseconds"
- stats_reset:
usage: "GAUGE"
description: "Time at which these statistics were last reset"
pg_stat_database_conflicts:
query: "SELECT datid, datname, confl_tablespace, confl_lock, confl_snapshot, confl_bufferpin, confl_deadlock FROM pg_stat_database_conflicts"
metrics:
- datid:
usage: "LABEL"
description: "OID of a database"
- datname:
usage: "LABEL"
description: "Name of this database"
- confl_tablespace:
usage: "COUNTER"
description: "Number of queries in this database that have been canceled due to dropped tablespaces"
- confl_lock:
usage: "COUNTER"
description: "Number of queries in this database that have been canceled due to lock timeouts"
- confl_snapshot:
usage: "COUNTER"
description: "Number of queries in this database that have been canceled due to old snapshots"
- confl_bufferpin:
usage: "COUNTER"
description: "Number of queries in this database that have been canceled due to pinned buffers"
- confl_deadlock:
usage: "COUNTER"
description: "Number of queries in this database that have been canceled due to deadlocks"
pg_locks:
query: "SELECT pg_database.datname,tmp.mode,COALESCE(count,0) as count
FROM (VALUES ('accesssharelock'),('rowsharelock'),('rowexclusivelock'),('shareupdateexclusivelock'),
('sharelock'),('sharerowexclusivelock'),('exclusivelock'),('accessexclusivelock'))
AS tmp(mode)
CROSS JOIN pg_database
LEFT JOIN (SELECT database, lower(mode) AS mode,count(*) AS count
FROM pg_locks
WHERE database IS NOT NULL
GROUP BY database, lower(mode)) AS tmp2
ON tmp.mode = tmp2.mode AND pg_database.oid = tmp2.database
ORDER BY 1"
metrics:
- datname:
usage: "LABEL"
description: "Name of this database"
- mode:
usage: "LABEL"
description: "Type of Lock"
- count:
usage: "GAUGE"
description: "Number of locks"
# pg_xlog_location_diff is gone in recent PG versions, need to figure out the
# right modern equiv, and if there isn't a common way to do it, how to manage
# multiple versions.
#pg_stat_replication:
# query: "SELECT client_addr, state,
# pg_xlog_location_diff(pg_current_xlog_location(), replay_location)::float
# FROM pg_stat_replication"
# metrics:
# - client_addr:
# usage: "LABEL"
# description: "IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine."
# - state:
# usage: "LABEL"
# description: "Current WAL sender state"
# - pg_xlog_location_diff:
# usage: "GAUGE"
# description: "Lag in bytes between master and slave"
pg_stat_activity:
query: "SELECT pg_database.datname,
tmp.state,
COALESCE(count,0) as count,
COALESCE(max_tx_duration,0) as max_tx_duration
FROM
(VALUES ('active'),('idle'),('idle in transaction'),('idle in transaction (aborted)'),('fastpath function call'),('disabled')) as tmp(state)
CROSS JOIN pg_database
LEFT JOIN (SELECT datname, state, count(*) AS count, MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration
FROM pg_stat_activity GROUP BY datname,state) as tmp2
ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname"
metrics:
- datname:
usage: "LABEL"
description: "Name of this database"
- state:
usage: "LABEL"
description: "connection state"
- count:
usage: "GAUGE"
description: "number of connections in this state"
- max_tx_duration:
usage: "GAUGE"
description: "max duration in seconds any active transaction has been running"
pg_replication:
query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::INT as lag"
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in seconds"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
pg_settings_shared_buffers:
query: "SELECT 8192*setting::int as bytes from pg_settings where name = 'shared_buffers'"
metrics:
- bytes:
usage: "GAUGE"
description: "Size of shared_buffers"
pg_settings_checkpoint:
query: "select (select setting::int from pg_settings where name = 'checkpoint_segments') as segments,
(select setting::int from pg_settings where name = 'checkpoint_timeout') as timeout_seconds,
(select setting::float from pg_settings where name = 'checkpoint_completion_target') as completion_target"
metrics:
- segments:
usage: "GAUGE"
description: "Number of checkpoint segments"
- timeout_seconds:
usage: "GAUGE"
description: "Checkpoint timeout in seconds"
- completion_target:
usage: "GAUGE"
description: "Checkpoint completion target, ranging from 0 to 1"
pg_stat_user_tables:
metrics:
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- seq_scan:
usage: "COUNTER"
description: "Number of sequential scans initiated on this table"
- seq_tup_read:
usage: "COUNTER"
description: "Number of live rows fetched by sequential scans"
- idx_scan:
usage: "COUNTER"
description: "Number of index scans initiated on this table"
- idx_tup_fetch:
usage: "COUNTER"
description: "Number of live rows fetched by index scans"
- n_tup_ins:
usage: "COUNTER"
description: "Number of rows inserted"
- n_tup_upd:
usage: "COUNTER"
description: "Number of rows updated"
- n_tup_del:
usage: "COUNTER"
description: "Number of rows deleted"
- n_tup_hot_upd:
usage: "COUNTER"
description: "Number of rows HOT updated (i.e., with no separate index update required)"
- n_live_tup:
usage: "GAUGE"
description: "Estimated number of live rows"
- n_dead_tup:
usage: "GAUGE"
description: "Estimated number of dead rows"
- n_mod_since_analyze:
usage: "GAUGE"
description: "Estimated number of rows changed since last analyze"
- last_vacuum:
usage: "GAUGE"
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
- last_autovacuum:
usage: "GAUGE"
description: "Last time at which this table was vacuumed by the autovacuum daemon"
- last_analyze:
usage: "GAUGE"
description: "Last time at which this table was manually analyzed"
- last_autoanalyze:
usage: "GAUGE"
description: "Last time at which this table was analyzed by the autovacuum daemon"
- vacuum_count:
usage: "COUNTER"
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
- autovacuum_count:
usage: "COUNTER"
description: "Number of times this table has been vacuumed by the autovacuum daemon"
- analyze_count:
usage: "COUNTER"
description: "Number of times this table has been manually analyzed"
- autoanalyze_count:
usage: "COUNTER"
description: "Number of times this table has been analyzed by the autovacuum daemon"