Skip to content

Commit

Permalink
Merge pull request #6349 from hashicorp/b-host-stats
Browse files Browse the repository at this point in the history
client: Return empty values when host stats fail
  • Loading branch information
preetapan committed Nov 20, 2019
2 parents ac239a3 + 4ba87cc commit d4f801d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 15 deletions.
4 changes: 4 additions & 0 deletions client/allocrunner/taskrunner/task_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1335,10 +1335,14 @@ func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {

if ru.ResourceUsage.MemoryStats != nil {
tr.setGaugeForMemory(ru)
} else {
tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
}

if ru.ResourceUsage.CpuStats != nil {
tr.setGaugeForCPU(ru)
} else {
tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
}
}

Expand Down
11 changes: 5 additions & 6 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2592,12 +2592,11 @@ func (c *Client) emitStats() {
next.Reset(c.config.StatsCollectionInterval)
if err != nil {
c.logger.Warn("error fetching host resource usage stats", "error", err)
continue
}

// Publish Node metrics if operator has opted in
if c.config.PublishNodeMetrics {
c.emitHostStats()
} else {
// Publish Node metrics if operator has opted in
if c.config.PublishNodeMetrics {
c.emitHostStats()
}
}

c.emitClientMetrics()
Expand Down
21 changes: 13 additions & 8 deletions client/stats/host.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package stats

import (
"fmt"
"math"
"runtime"
"sync"
Expand Down Expand Up @@ -117,39 +116,45 @@ func (h *HostStatsCollector) collectLocked() error {
// Determine up-time
uptime, err := host.Uptime()
if err != nil {
return err
h.logger.Error("failed to collect upstime stats", "error", err)
uptime = 0
}
hs.Uptime = uptime

// Collect memory stats
mstats, err := h.collectMemoryStats()
if err != nil {
return err
h.logger.Error("failed to collect memory stats", "error", err)
mstats = &MemoryStats{}
}
hs.Memory = mstats

// Collect cpu stats
cpus, ticks, err := h.collectCPUStats()
if err != nil {
return err
h.logger.Error("failed to collect cpu stats", "error", err)
cpus = []*CPUStats{}
ticks = 0
}
hs.CPU = cpus
hs.CPUTicksConsumed = ticks

// Collect disk stats
diskStats, err := h.collectDiskStats()
if err != nil {
return err
h.logger.Error("failed to collect disk stats", "error", err)
hs.DiskStats = []*DiskStats{}
}
hs.DiskStats = diskStats

// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err)
h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
hs.AllocDirStats = &DiskStats{}
} else {
hs.AllocDirStats = h.toDiskStats(usage, nil)
}
hs.AllocDirStats = h.toDiskStats(usage, nil)

// Collect devices stats
deviceStats := h.collectDeviceGroupStats()
hs.DeviceStats = deviceStats
Expand Down
2 changes: 1 addition & 1 deletion command/agent/metrics_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
terminal == float32(numTasks), nil
}, func(err error) {
require.Fail("timed out waiting for metrics to converge",
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
"expected: (pending: 0, running: 0, terminal: %v), got: (pending: %v, running: %v, terminal: %v)", numTasks, pending, running, terminal)
})
})
}

0 comments on commit d4f801d

Please sign in to comment.