From 81db54a529f97d843f449cfe945fd1f0b1b752af Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Mon, 6 Nov 2023 16:28:23 -0500 Subject: [PATCH] chore: Fix flaky metrics tests (#1865) (#1993) This fixes a few flaky unit tests that relied on `Thread.sleep` to ensure that all metrics processing was done. Rather than using `Thread.sleep`, we can instead use an inline event queue in the OpenCensus stats component to execute all work inline, removing the need to wait for anything to finish. Co-authored-by: Steven Niemitz --- .../metrics/BigtableTracerCallableTest.java | 11 +++---- .../metrics/BuiltinMetricsTracerTest.java | 7 ++++- .../v2/stub/metrics/MetricsTracerTest.java | 31 +++++-------------- .../v2/stub/metrics/SimpleStatsComponent.java | 27 ++++++++++++++++ 4 files changed, 46 insertions(+), 30 deletions(-) create mode 100644 google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/SimpleStatsComponent.java diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerCallableTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerCallableTest.java index 1b833f5c06..e783352bf0 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerCallableTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracerCallableTest.java @@ -19,6 +19,7 @@ import static org.junit.Assert.fail; import com.google.api.gax.rpc.ClientContext; +import com.google.api.gax.rpc.ServerStream; import com.google.api.gax.rpc.UnavailableException; import com.google.bigtable.v2.BigtableGrpc.BigtableImplBase; import com.google.bigtable.v2.CheckAndMutateRowRequest; @@ -54,7 +55,6 @@ import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; -import io.opencensus.impl.stats.StatsComponentImpl; import io.opencensus.stats.StatsComponent; import io.opencensus.tags.TagKey; import io.opencensus.tags.TagValue; @@ -74,7 +74,7 @@ public class BigtableTracerCallableTest { private FakeService fakeService = new FakeService(); - private final StatsComponent localStats = new StatsComponentImpl(); + private final StatsComponent localStats = new SimpleStatsComponent(); private EnhancedBigtableStub stub; private EnhancedBigtableStub noHeaderStub; private int attempts; @@ -157,10 +157,9 @@ public void tearDown() { } @Test - public void testGFELatencyMetricReadRows() throws InterruptedException { - stub.readRowsCallable().call(Query.create(TABLE_ID)); - - Thread.sleep(WAIT_FOR_METRICS_TIME_MS); + public void testGFELatencyMetricReadRows() { + ServerStream call = stub.readRowsCallable().call(Query.create(TABLE_ID)); + call.forEach(r -> {}); long latency = StatsTestUtils.getAggregationValueAsLong( diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerTest.java index 3bc283a7f7..443b87a26d 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerTest.java @@ -109,6 +109,7 @@ public class BuiltinMetricsTracerTest { private static final long FAKE_SERVER_TIMING = 50; private static final long SERVER_LATENCY = 100; private static final long APPLICATION_LATENCY = 200; + private static final long SLEEP_VARIABILITY = 15; private static final long CHANNEL_BLOCKING_LATENCY = 75; @@ -353,7 +354,11 @@ public void onComplete() { .recordOperation(status.capture(), tableId.capture(), zone.capture(), cluster.capture()); assertThat(counter.get()).isEqualTo(fakeService.getResponseCounter().get()); - assertThat(applicationLatency.getValue()).isAtLeast(APPLICATION_LATENCY * counter.get()); + // Thread.sleep might not sleep for the requested amount depending on the interrupt period + // defined by the OS. + // On linux this is ~1ms but on windows may be as high as 15-20ms. + assertThat(applicationLatency.getValue()) + .isAtLeast((APPLICATION_LATENCY - SLEEP_VARIABILITY) * counter.get()); assertThat(applicationLatency.getValue()) .isAtMost(operationLatency.getValue() - SERVER_LATENCY); } diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/MetricsTracerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/MetricsTracerTest.java index b1b966ee9d..5460d4feca 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/MetricsTracerTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/MetricsTracerTest.java @@ -54,7 +54,7 @@ import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; -import io.opencensus.impl.stats.StatsComponentImpl; +import io.opencensus.stats.StatsComponent; import io.opencensus.tags.TagKey; import io.opencensus.tags.TagValue; import io.opencensus.tags.Tags; @@ -84,6 +84,7 @@ public class MetricsTracerTest { private static final String INSTANCE_ID = "fake-instance"; private static final String APP_PROFILE_ID = "default"; private static final String TABLE_ID = "fake-table"; + private static final long SLEEP_VARIABILITY = 15; private static final ReadRowsResponse DEFAULT_READ_ROWS_RESPONSES = ReadRowsResponse.newBuilder() @@ -104,7 +105,7 @@ public class MetricsTracerTest { @Mock(answer = Answers.CALLS_REAL_METHODS) private BigtableGrpc.BigtableImplBase mockService; - private final StatsComponentImpl localStats = new StatsComponentImpl(); + private final StatsComponent localStats = new SimpleStatsComponent(); private EnhancedBigtableStub stub; private BigtableDataSettings settings; @@ -156,9 +157,6 @@ public Object answer(InvocationOnMock invocation) throws Throwable { Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID))); long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); - long opLatency = StatsTestUtils.getAggregationValueAsLong( localStats, @@ -192,9 +190,6 @@ public Object answer(InvocationOnMock invocation) { Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID))); Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID))); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); - long opLatency = StatsTestUtils.getAggregationValueAsLong( localStats, @@ -246,8 +241,6 @@ public void testReadRowsFirstRow() throws InterruptedException { } long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); executor.shutdown(); long firstRowLatency = @@ -259,7 +252,10 @@ public void testReadRowsFirstRow() throws InterruptedException { INSTANCE_ID, APP_PROFILE_ID); - assertThat(firstRowLatency).isIn(Range.closed(beforeSleep, elapsed - afterSleep)); + assertThat(firstRowLatency) + .isIn( + Range.closed( + beforeSleep - SLEEP_VARIABILITY, elapsed - afterSleep + SLEEP_VARIABILITY)); } @Test @@ -291,9 +287,6 @@ public Object answer(InvocationOnMock invocation) { Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID))); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); - long opLatency = StatsTestUtils.getAggregationValueAsLong( localStats, @@ -340,9 +333,6 @@ public Object answer(InvocationOnMock invocation) throws Throwable { Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID))); long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); - long attemptLatency = StatsTestUtils.getAggregationValueAsLong( localStats, @@ -359,12 +349,11 @@ public Object answer(InvocationOnMock invocation) throws Throwable { } @Test - public void testInvalidRequest() throws InterruptedException { + public void testInvalidRequest() { try { stub.bulkMutateRowsCallable().call(BulkMutation.create(TABLE_ID)); Assert.fail("Invalid request should throw exception"); } catch (IllegalStateException e) { - Thread.sleep(100); // Verify that the latency is recorded with an error code (in this case UNKNOWN) long attemptLatency = StatsTestUtils.getAggregationValueAsLong( @@ -402,9 +391,6 @@ public Object answer(InvocationOnMock invocation) { batcher.add(ByteString.copyFromUtf8("row1")); batcher.sendOutstanding(); - // Give OpenCensus a chance to update the views asynchronously. - Thread.sleep(100); - long throttledTimeMetric = StatsTestUtils.getAggregationValueAsLong( localStats, @@ -469,7 +455,6 @@ public Object answer(InvocationOnMock invocation) { batcher.add(RowMutationEntry.create("key")); batcher.sendOutstanding(); - Thread.sleep(100); long throttledTimeMetric = StatsTestUtils.getAggregationValueAsLong( localStats, diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/SimpleStatsComponent.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/SimpleStatsComponent.java new file mode 100644 index 0000000000..99aed9c3b4 --- /dev/null +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/metrics/SimpleStatsComponent.java @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.stub.metrics; + +import io.opencensus.implcore.common.MillisClock; +import io.opencensus.implcore.internal.SimpleEventQueue; +import io.opencensus.implcore.stats.StatsComponentImplBase; + +/** A StatsComponent implementation for testing that executes all events inline. */ +public class SimpleStatsComponent extends StatsComponentImplBase { + public SimpleStatsComponent() { + super(new SimpleEventQueue(), MillisClock.getInstance()); + } +}