Skip to content

Commit

Permalink
chore: Fix flaky metrics tests (#1865) (#1993)
Browse files Browse the repository at this point in the history
This fixes a few flaky unit tests that relied on `Thread.sleep` to ensure that all metrics processing was done.  Rather than using `Thread.sleep`, we can instead use an inline event queue in the OpenCensus stats component to execute all work inline, removing the need to wait for anything to finish.

Co-authored-by: Steven Niemitz <steve@niemi.tz>
  • Loading branch information
mutianf and steveniemitz authored Nov 6, 2023
1 parent 0c43056 commit 81db54a
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static org.junit.Assert.fail;

import com.google.api.gax.rpc.ClientContext;
import com.google.api.gax.rpc.ServerStream;
import com.google.api.gax.rpc.UnavailableException;
import com.google.bigtable.v2.BigtableGrpc.BigtableImplBase;
import com.google.bigtable.v2.CheckAndMutateRowRequest;
Expand Down Expand Up @@ -54,7 +55,6 @@
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.grpc.stub.StreamObserver;
import io.opencensus.impl.stats.StatsComponentImpl;
import io.opencensus.stats.StatsComponent;
import io.opencensus.tags.TagKey;
import io.opencensus.tags.TagValue;
Expand All @@ -74,7 +74,7 @@ public class BigtableTracerCallableTest {

private FakeService fakeService = new FakeService();

private final StatsComponent localStats = new StatsComponentImpl();
private final StatsComponent localStats = new SimpleStatsComponent();
private EnhancedBigtableStub stub;
private EnhancedBigtableStub noHeaderStub;
private int attempts;
Expand Down Expand Up @@ -157,10 +157,9 @@ public void tearDown() {
}

@Test
public void testGFELatencyMetricReadRows() throws InterruptedException {
stub.readRowsCallable().call(Query.create(TABLE_ID));

Thread.sleep(WAIT_FOR_METRICS_TIME_MS);
public void testGFELatencyMetricReadRows() {
ServerStream<?> call = stub.readRowsCallable().call(Query.create(TABLE_ID));
call.forEach(r -> {});

long latency =
StatsTestUtils.getAggregationValueAsLong(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ public class BuiltinMetricsTracerTest {
private static final long FAKE_SERVER_TIMING = 50;
private static final long SERVER_LATENCY = 100;
private static final long APPLICATION_LATENCY = 200;
private static final long SLEEP_VARIABILITY = 15;

private static final long CHANNEL_BLOCKING_LATENCY = 75;

Expand Down Expand Up @@ -353,7 +354,11 @@ public void onComplete() {
.recordOperation(status.capture(), tableId.capture(), zone.capture(), cluster.capture());

assertThat(counter.get()).isEqualTo(fakeService.getResponseCounter().get());
assertThat(applicationLatency.getValue()).isAtLeast(APPLICATION_LATENCY * counter.get());
// Thread.sleep might not sleep for the requested amount depending on the interrupt period
// defined by the OS.
// On linux this is ~1ms but on windows may be as high as 15-20ms.
assertThat(applicationLatency.getValue())
.isAtLeast((APPLICATION_LATENCY - SLEEP_VARIABILITY) * counter.get());
assertThat(applicationLatency.getValue())
.isAtMost(operationLatency.getValue() - SERVER_LATENCY);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.grpc.stub.StreamObserver;
import io.opencensus.impl.stats.StatsComponentImpl;
import io.opencensus.stats.StatsComponent;
import io.opencensus.tags.TagKey;
import io.opencensus.tags.TagValue;
import io.opencensus.tags.Tags;
Expand Down Expand Up @@ -84,6 +84,7 @@ public class MetricsTracerTest {
private static final String INSTANCE_ID = "fake-instance";
private static final String APP_PROFILE_ID = "default";
private static final String TABLE_ID = "fake-table";
private static final long SLEEP_VARIABILITY = 15;

private static final ReadRowsResponse DEFAULT_READ_ROWS_RESPONSES =
ReadRowsResponse.newBuilder()
Expand All @@ -104,7 +105,7 @@ public class MetricsTracerTest {
@Mock(answer = Answers.CALLS_REAL_METHODS)
private BigtableGrpc.BigtableImplBase mockService;

private final StatsComponentImpl localStats = new StatsComponentImpl();
private final StatsComponent localStats = new SimpleStatsComponent();
private EnhancedBigtableStub stub;
private BigtableDataSettings settings;

Expand Down Expand Up @@ -156,9 +157,6 @@ public Object answer(InvocationOnMock invocation) throws Throwable {
Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID)));
long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS);

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);

long opLatency =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand Down Expand Up @@ -192,9 +190,6 @@ public Object answer(InvocationOnMock invocation) {
Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID)));
Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID)));

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);

long opLatency =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand Down Expand Up @@ -246,8 +241,6 @@ public void testReadRowsFirstRow() throws InterruptedException {
}
long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS);

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);
executor.shutdown();

long firstRowLatency =
Expand All @@ -259,7 +252,10 @@ public void testReadRowsFirstRow() throws InterruptedException {
INSTANCE_ID,
APP_PROFILE_ID);

assertThat(firstRowLatency).isIn(Range.closed(beforeSleep, elapsed - afterSleep));
assertThat(firstRowLatency)
.isIn(
Range.closed(
beforeSleep - SLEEP_VARIABILITY, elapsed - afterSleep + SLEEP_VARIABILITY));
}

@Test
Expand Down Expand Up @@ -291,9 +287,6 @@ public Object answer(InvocationOnMock invocation) {

Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID)));

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);

long opLatency =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand Down Expand Up @@ -340,9 +333,6 @@ public Object answer(InvocationOnMock invocation) throws Throwable {
Lists.newArrayList(stub.readRowsCallable().call(Query.create(TABLE_ID)));
long elapsed = stopwatch.elapsed(TimeUnit.MILLISECONDS);

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);

long attemptLatency =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand All @@ -359,12 +349,11 @@ public Object answer(InvocationOnMock invocation) throws Throwable {
}

@Test
public void testInvalidRequest() throws InterruptedException {
public void testInvalidRequest() {
try {
stub.bulkMutateRowsCallable().call(BulkMutation.create(TABLE_ID));
Assert.fail("Invalid request should throw exception");
} catch (IllegalStateException e) {
Thread.sleep(100);
// Verify that the latency is recorded with an error code (in this case UNKNOWN)
long attemptLatency =
StatsTestUtils.getAggregationValueAsLong(
Expand Down Expand Up @@ -402,9 +391,6 @@ public Object answer(InvocationOnMock invocation) {
batcher.add(ByteString.copyFromUtf8("row1"));
batcher.sendOutstanding();

// Give OpenCensus a chance to update the views asynchronously.
Thread.sleep(100);

long throttledTimeMetric =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand Down Expand Up @@ -469,7 +455,6 @@ public Object answer(InvocationOnMock invocation) {
batcher.add(RowMutationEntry.create("key"));
batcher.sendOutstanding();

Thread.sleep(100);
long throttledTimeMetric =
StatsTestUtils.getAggregationValueAsLong(
localStats,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigtable.data.v2.stub.metrics;

import io.opencensus.implcore.common.MillisClock;
import io.opencensus.implcore.internal.SimpleEventQueue;
import io.opencensus.implcore.stats.StatsComponentImplBase;

/** A StatsComponent implementation for testing that executes all events inline. */
public class SimpleStatsComponent extends StatsComponentImplBase {
public SimpleStatsComponent() {
super(new SimpleEventQueue(), MillisClock.getInstance());
}
}

0 comments on commit 81db54a

Please sign in to comment.