From 627523e96c49d1c2758c8bb1cf5676b58d16b9cc Mon Sep 17 00:00:00 2001 From: Ray Allan Date: Wed, 20 Mar 2019 18:05:08 +0000 Subject: [PATCH] add noiseDelta to telemetry test --- .../tests/functionaltests_test.go | 35 ++++++++++++------- agent/functional_tests/util/utils.go | 13 +++---- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/agent/functional_tests/tests/functionaltests_test.go b/agent/functional_tests/tests/functionaltests_test.go index 7cc31b272e4..47b8a93e785 100644 --- a/agent/functional_tests/tests/functionaltests_test.go +++ b/agent/functional_tests/tests/functionaltests_test.go @@ -474,6 +474,9 @@ func telemetryTest(t *testing.T, taskDefinition string) { // Try to let the container use 25% cpu, but bound it within valid range cpuShare, expectedCPUPercentage := calculateCpuLimits(0.25) + // account for docker stats / CloudWatch noise + statsNoiseDelta := 5.0 + // Try to use a new cluster for this test, ensure no other task metrics for this cluster newClusterName := "ecstest-telemetry-" + uuid.New() _, err := ECS.CreateCluster(&ecsapi.CreateClusterInput{ @@ -511,11 +514,12 @@ func telemetryTest(t *testing.T, taskDefinition string) { time.Sleep(waitMetricsInCloudwatchDuration) cwclient := cloudwatch.New(session.New(), aws.NewConfig().WithRegion(*ECS.Config.Region)) - _, err = VerifyMetrics(cwclient, params, true) + + _, err = VerifyMetrics(cwclient, params, true, statsNoiseDelta) assert.NoError(t, err, "Before task running, verify metrics for CPU utilization failed") params.MetricName = aws.String("MemoryUtilization") - _, err = VerifyMetrics(cwclient, params, true) + _, err = VerifyMetrics(cwclient, params, true, statsNoiseDelta) assert.NoError(t, err, "Before task running, verify metrics for memory utilization failed") tdOverrides := make(map[string]string) @@ -531,19 +535,21 @@ func telemetryTest(t *testing.T, taskDefinition string) { params.EndTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC()) params.StartTime = aws.Time((*params.EndTime).Add(-waitMetricsInCloudwatchDuration).UTC()) params.MetricName = aws.String("CPUUtilization") - metrics, err := VerifyMetrics(cwclient, params, false) + metrics, err := VerifyMetrics(cwclient, params, false, 0.0) assert.NoError(t, err, "Task is running, verify metrics for CPU utilization failed") - // Also verify the cpu usage is around expectedCPUPercentage +/- 5% - assert.InDelta(t, expectedCPUPercentage*100.0, *metrics.Average, 5) + // Also verify the cpu usage is around expectedCPUPercentage + // +/- StatsNoiseDelta percentage + assert.InDelta(t, expectedCPUPercentage*100.0, *metrics.Average, statsNoiseDelta) params.MetricName = aws.String("MemoryUtilization") - metrics, err = VerifyMetrics(cwclient, params, false) + metrics, err = VerifyMetrics(cwclient, params, false, 0.0) assert.NoError(t, err, "Task is running, verify metrics for memory utilization failed") memInfo, err := system.ReadMemInfo() require.NoError(t, err, "Acquiring system info failed") totalMemory := memInfo.MemTotal / bytePerMegabyte - // Verify the memory usage is around 1024/totalMemory +/- 5% - assert.InDelta(t, float32(1024*100)/float32(totalMemory), *metrics.Average, 5) + // Verify the memory usage is around 1024/totalMemory + // +/- StatsNoiseDelta percentage + assert.InDelta(t, float32(1024*100)/float32(totalMemory), *metrics.Average, statsNoiseDelta) err = testTask.Stop() require.NoError(t, err, "Failed to stop the telemetry task") @@ -555,11 +561,11 @@ func telemetryTest(t *testing.T, taskDefinition string) { params.EndTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC()) params.StartTime = aws.Time((*params.EndTime).Add(-waitMetricsInCloudwatchDuration).UTC()) params.MetricName = aws.String("CPUUtilization") - _, err = VerifyMetrics(cwclient, params, true) + _, err = VerifyMetrics(cwclient, params, true, statsNoiseDelta) assert.NoError(t, err, "Task stopped: verify metrics for CPU utilization failed") params.MetricName = aws.String("MemoryUtilization") - _, err = VerifyMetrics(cwclient, params, true) + _, err = VerifyMetrics(cwclient, params, true, statsNoiseDelta) assert.NoError(t, err, "Task stopped, verify metrics for memory utilization failed") } @@ -570,6 +576,9 @@ func telemetryTestWithStatsPolling(t *testing.T, taskDefinition string) { // Try to let the container use 25% cpu, but bound it within valid range cpuShare, expectedCPUPercentage := calculateCpuLimits(0.25) + // account for docker stats / CloudWatch noise + statsNoiseDelta := 5.0 + // Try to use a new cluster for this test, ensure no other task metrics for this cluster newClusterName := "ecstest-telemetry-polling-" + uuid.New() _, err := ECS.CreateCluster(&ecsapi.CreateClusterInput{ @@ -621,13 +630,13 @@ func telemetryTestWithStatsPolling(t *testing.T, taskDefinition string) { params.EndTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC()) params.StartTime = aws.Time((*params.EndTime).Add(-waitMetricsInCloudwatchDuration).UTC()) params.MetricName = aws.String("CPUUtilization") - metrics, err := VerifyMetrics(cwclient, params, false) + metrics, err := VerifyMetrics(cwclient, params, false, 0.0) assert.NoError(t, err, "Task is running, verify metrics for CPU utilization failed") // Also verify the cpu usage is around expectedCPUPercentage +/- 5% - assert.InDelta(t, expectedCPUPercentage*100.0, *metrics.Average, 5) + assert.InDelta(t, expectedCPUPercentage*100.0, *metrics.Average, statsNoiseDelta) params.MetricName = aws.String("MemoryUtilization") - metrics, err = VerifyMetrics(cwclient, params, false) + metrics, err = VerifyMetrics(cwclient, params, false, 0.0) assert.NoError(t, err, "Task is running, verify metrics for memory utilization failed") memInfo, err := system.ReadMemInfo() require.NoError(t, err, "Acquiring system info failed") diff --git a/agent/functional_tests/util/utils.go b/agent/functional_tests/util/utils.go index 4671d3175be..a0e992edddd 100644 --- a/agent/functional_tests/util/utils.go +++ b/agent/functional_tests/util/utils.go @@ -334,7 +334,9 @@ func DeleteCluster(t *testing.T, clusterName string) { // VerifyMetrics whether the response is as expected // the expected value can be 0 or positive -func VerifyMetrics(cwclient *cloudwatch.CloudWatch, params *cloudwatch.GetMetricStatisticsInput, idleCluster bool) (*cloudwatch.Datapoint, error) { +// noiseDelta should be significantly less than the percentage of cpu/memory we +// use for non-idle workload. +func VerifyMetrics(cwclient *cloudwatch.CloudWatch, params *cloudwatch.GetMetricStatisticsInput, idleCluster bool, noiseDelta float64) (*cloudwatch.Datapoint, error) { resp, err := cwclient.GetMetricStatistics(params) if err != nil { return nil, fmt.Errorf("Error getting metrics of cluster: %v", err) @@ -353,14 +355,13 @@ func VerifyMetrics(cwclient *cloudwatch.CloudWatch, params *cloudwatch.GetMetric if *datapoint.SampleCount != 1.0 { return nil, fmt.Errorf("Incorrect SampleCount %f, expected 1", *datapoint.SampleCount) } - if idleCluster { - if *datapoint.Average != 0.0 { - return nil, fmt.Errorf("non-zero utilization for idle cluster") + if *datapoint.Average >= noiseDelta { + return nil, fmt.Errorf("utilization is >= expected noise delta for idle cluster") } } else { - if *datapoint.Average == 0.0 { - return nil, fmt.Errorf("utilization is zero for non-idle cluster") + if *datapoint.Average < noiseDelta { + return nil, fmt.Errorf("utilization is < expected noise delta for non-idle cluster") } } return datapoint, nil