Skip to content

Commit

Permalink
client/metrics: modified metrics to use (updated) client copy of allo…
Browse files Browse the repository at this point in the history
…cation instead of (unupdated) server copy
  • Loading branch information
Chris Baker committed Apr 19, 2019
1 parent 8a0df40 commit d0021cd
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 2 deletions.
2 changes: 1 addition & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2638,7 +2638,7 @@ func (c *Client) emitClientMetrics() {
// Emit allocation metrics
blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0
for _, ar := range c.getAllocRunners() {
switch ar.Alloc().ClientStatus {
switch ar.AllocState().ClientStatus {
case structs.AllocClientStatusPending:
switch {
case ar.IsWaiting():
Expand Down
84 changes: 83 additions & 1 deletion command/agent/metrics_endpoint_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
package agent

import (
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"

metrics "github.com/armon/go-metrics"
"github.com/hashicorp/nomad/nomad/structs"

"github.com/hashicorp/nomad/nomad/mock"

"github.com/stretchr/testify/require"

"github.com/armon/go-metrics"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -56,3 +65,76 @@ func TestHTTP_Metrics(t *testing.T) {
})
})
}

// When emitting metrics, the client should use the local copy of the allocs with
// updated task states (not the copy submitted by the server).
func TestHTTP_FreshClientAllocMetrics(t *testing.T) {
t.Parallel()
require := require.New(t)

numTasks := 10

httpTest(t, func(c *Config) {
c.Telemetry.PublishAllocationMetrics = true
c.Telemetry.PublishNodeMetrics = true
c.Telemetry.BackwardsCompatibleMetrics = false
c.Telemetry.DisableTaggedMetrics = false
}, func(s *TestAgent) {
// make a separate HTTP request first, to ensure Nomad has written metrics
// and prevent a race condition
//req, err := http.NewRequest("GET", "/v1/agent/self", nil)
//require.NoError(err)
//respW := httptest.NewRecorder()
//s.Server.AgentSelfRequest(respW, req)

// Create the job, wait for it to finish
job := mock.BatchJob()
job.TaskGroups[0].Count = numTasks
testutil.RegisterJob(t, s.RPC, job)
testutil.WaitForResult(func() (bool, error) {
args := &structs.JobSpecificRequest{}
args.JobID = job.ID
args.QueryOptions.Region = "global"
var resp structs.SingleJobResponse
err := s.RPC("Job.GetJob", args, &resp)
require.NoError(err)
return resp.Job.Status != "dead", nil
}, func(err error) {
require.Fail("timed-out waiting for job to complete")
})

// wait for metrics to converge
var pending, running, terminal float32 = -1.0, -1.0, -1.0
testutil.WaitForResultRetries(100, func() (bool, error) {
time.Sleep(100 * time.Millisecond)
// client alloc metrics should reflect that there is one running alloc and zero pending allocs
req, err := http.NewRequest("GET", "/v1/metrics", nil)
require.NoError(err)
respW := httptest.NewRecorder()

obj, err := s.Server.MetricsRequest(respW, req)
require.NoError(err)

metrics := obj.(metrics.MetricsSummary)
for _, g := range metrics.Gauges {
if strings.Contains(g.Name, "allocations") {
fmt.Println(fmt.Sprintf("%v: %v", g.Name, g.Value))
}
if strings.HasSuffix(g.Name, "client.allocations.pending") {
pending = g.Value
}
if strings.HasSuffix(g.Name, "client.allocations.running") {
running = g.Value
}
if strings.HasSuffix(g.Name, "client.allocations.terminal") {
terminal = g.Value
}
}
return pending == float32(0) && running == float32(0) &&
terminal == float32(numTasks), nil
}, func(err error) {
require.Fail("timed out waiting for metrics to converge",
"pending: %v, running: %v, terminal: %v", pending, running, terminal)
})
})
}

0 comments on commit d0021cd

Please sign in to comment.