Skip to content

Commit

Permalink
etcdserver: add OS level FD metrics
Browse files Browse the repository at this point in the history
Similar counts are exposed via Prometheus.
This adds the one that are perceived by etcd server.

e.g.

os_fd_limit 120000
os_fd_used 14
process_cpu_seconds_total 0.31
process_max_fds 120000
process_open_fds 17

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
  • Loading branch information
gyuho committed Aug 18, 2020
1 parent b7243f0 commit 55db5b4
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion etcdserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,19 @@ var (
Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
},
[]string{"server_id"})

fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "used",
Help: "The number of used file descriptors.",
})
fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "limit",
Help: "The file descriptor limit.",
})
)

func init() {
Expand All @@ -142,6 +155,8 @@ func init() {
prometheus.MustRegister(currentVersion)
prometheus.MustRegister(currentGoVersion)
prometheus.MustRegister(serverID)
prometheus.MustRegister(fdUsed)
prometheus.MustRegister(fdLimit)

currentVersion.With(prometheus.Labels{
"server_version": version.Version,
Expand All @@ -152,19 +167,26 @@ func init() {
}

func monitorFileDescriptor(done <-chan struct{}) {
ticker := time.NewTicker(5 * time.Second)
// This ticker will check File Descriptor Requirements ,and count all fds in used.
// And recorded some logs when in used >= limit/5*4. Just recorded message.
// If fds was more than 10K,It's low performance due to FDUsage() works.
// So need to increase it.
// See https://github.com/etcd-io/etcd/issues/11969 for more detail.
ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop()
for {
used, err := runtime.FDUsage()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fdUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fdLimit.Set(float64(limit))
if used >= limit/5*4 {
plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
}
Expand Down

0 comments on commit 55db5b4

Please sign in to comment.