diff --git a/client/client.go b/client/client.go index 953b58bc4f35..d2c12b05dafe 100644 --- a/client/client.go +++ b/client/client.go @@ -2631,7 +2631,7 @@ func (c *Client) emitClientMetrics() { // Emit allocation metrics blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0 for _, ar := range c.getAllocRunners() { - switch ar.AllocState().ClientStatus { + switch ar.Alloc().ClientStatus { case structs.AllocClientStatusPending: switch { case ar.IsWaiting(): diff --git a/command/agent/metrics_endpoint_test.go b/command/agent/metrics_endpoint_test.go index 3d8d3a1db1f8..c42fe615a699 100644 --- a/command/agent/metrics_endpoint_test.go +++ b/command/agent/metrics_endpoint_test.go @@ -1,11 +1,16 @@ package agent import ( + "fmt" "net/http" "net/http/httptest" "testing" - metrics "github.com/armon/go-metrics" + "github.com/hashicorp/nomad/nomad/mock" + + "github.com/stretchr/testify/require" + + "github.com/armon/go-metrics" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/assert" ) @@ -56,3 +61,52 @@ func TestHTTP_Metrics(t *testing.T) { }) }) } + +// When emitting metrics, the client should use the local copy of the allocs with +// updated task states (not the copy submitted by the server). +func TestHTTP_FreshClientAllocMetrics(t *testing.T) { + t.Parallel() + require := require.New(t) + + httpTest(t, nil, func(s *TestAgent) { + // make a separate HTTP request first, to ensure Nomad has written metrics + // and prevent a race condition + req, err := http.NewRequest("GET", "/v1/agent/self", nil) + require.NoError(err) + respW := httptest.NewRecorder() + s.Server.AgentSelfRequest(respW, req) + + // Create the job + fmt.Println("creating job") + job := mock.BatchJob() + _ = testutil.WaitForRunning(t, s.RPC, job)[0] + fmt.Println("job running") + + // wait for metrics to match + testutil.WaitForResult(func() (bool, error) { + // client alloc metrics should reflect that there is one running alloc and zero pending allocs + req, err := http.NewRequest("GET", "/v1/metrics", nil) + require.NoError(err) + respW := httptest.NewRecorder() + + obj, err := s.Server.MetricsRequest(respW, req) + require.NoError(err) + + metrics := obj.(metrics.MetricsSummary) + var pending, running float32 = -1.0, -1.0 + for _, g := range metrics.Gauges { + if g.Name == "nomad.client.allocations.pending" { + pending = g.Value + } + if g.Name == "nomad.client.allocations.running" { + running = g.Value + } + } + fmt.Println(fmt.Sprintf("pending: %v, running: %v", pending, running)) + return pending == float32(0) && running == float32(1), nil + }, func(err error) { + require.Fail("timed out waiting for metrics to converge") + }) + + }) +}