Skip to content

Commit

Permalink
batcheval: delete flaky integration test
Browse files Browse the repository at this point in the history
TestLeaseCommandLearnerReplica is fundamentally flaky as commit can't
retry all underlying replication failures and acceptable failure causes
are not exposed on the higher levels where test operates.

Release note: None
  • Loading branch information
aliher1911 committed Jul 10, 2023
1 parent 7496dba commit a298b71
Showing 1 changed file with 0 additions and 92 deletions.
92 changes: 0 additions & 92 deletions pkg/kv/kvserver/batcheval/cmd_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,112 +15,20 @@ import (
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/stretchr/testify/require"
)

// TestLeaseTransferWithPipelinedWrite verifies that pipelined writes
// do not cause retry errors to be leaked to clients when the error
// can be handled internally. Pipelining dissociates a write from its
// caller, so the retries of internally-generated errors (specifically
// out-of-order lease indexes) must be retried below that level.
//
// This issue was observed in practice to affect the first insert
// after table creation with high probability.
func TestLeaseTransferWithPipelinedWrite(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()

tc := serverutils.StartNewTestCluster(t, 3, base.TestClusterArgs{})
defer tc.Stopper().Stop(ctx)

db := tc.ServerConn(0)

for iter := 0; iter < 100; iter++ {
log.Infof(ctx, "iter %d", iter)
if _, err := db.ExecContext(ctx, "drop table if exists test"); err != nil {
t.Fatal(err)
}
if _, err := db.ExecContext(ctx, "create table test (a int, b int, primary key (a, b))"); err != nil {
t.Fatal(err)
}

workerErrCh := make(chan error, 1)
go func() {
workerErrCh <- func() error {
for i := 0; i < 1; i++ {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() {
if tx != nil {
if err := tx.Rollback(); err != nil {
log.Warningf(ctx, "error rolling back: %+v", err)
}
}
}()
// Run two inserts in a transaction to ensure that we have
// pipelined writes that cannot be retried at the SQL layer
// due to the first-statement rule.
if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 1); err != nil {
return err
}
if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 2); err != nil {
return err
}
if err := tx.Commit(); err != nil {
return err
}
tx = nil
}
return nil
}()
}()

// TODO(bdarnell): This test reliably reproduced the issue when
// introduced, because table creation causes splits and repeated
// table creation leads to lease transfers due to rebalancing.
// This is a subtle thing to rely on and the test might become
// more reliable if we ran more iterations in the worker goroutine
// and added a second goroutine to explicitly transfer leases back
// and forth.

select {
case <-time.After(15 * time.Second):
// TODO(bdarnell): The test seems flaky under stress with a 5s
// timeout. Why? I'm giving it a high timeout since hanging
// isn't a failure mode we're particularly concerned about here,
// but it shouldn't be taking this long even with stress.
t.Fatal("timed out")
case err := <-workerErrCh:
if err != nil {
// We allow the transaction to run into an aborted error due to a lease
// transfer when it attempts to create its transaction record. This it
// outside of the focus of this test.
okErr := testutils.IsError(err, kvpb.ABORT_REASON_NEW_LEASE_PREVENTS_TXN.String())
if !okErr {
t.Fatalf("worker failed: %+v", err)
}
}
}
}
}

func TestLeaseCommandLearnerReplica(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down

0 comments on commit a298b71

Please sign in to comment.