Skip to content

Commit

Permalink
Deflake TestPlannedReparentShardPromoteReplicaFail (#13548)
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <manan@planetscale.com>
  • Loading branch information
GuptaManan100 authored Jul 19, 2023
1 parent a851bb7 commit f2600e8
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions go/vt/wrangler/testlib/planned_reparent_shard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"FAKE SET MASTER",
"START SLAVE",
// We call a SetReplicationSource explicitly
"FAKE SET MASTER",
"START SLAVE",
// extra SetReplicationSource call due to retry
Expand Down Expand Up @@ -855,6 +858,13 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly")
assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")

// After the first call to PRS has failed, we don't know whether `SetReplicationSource` RPC has succeeded on the oldPrimary or not.
// This causes the test to become non-deterministic. To prevent this, we call `SetReplicationSource` on the oldPrimary again, and make sure it has succeeded.
// We also wait until the oldPrimary has demoted itself to a replica type.
err = wr.TabletManagerClient().SetReplicationSource(context.Background(), oldPrimary.Tablet, newPrimary.Tablet.Alias, 0, "", false, false)
require.NoError(t, err)
waitForTabletType(t, wr, oldPrimary.Tablet.Alias, topodatapb.TabletType_REPLICA)

// retrying should work
newPrimary.FakeMysqlDaemon.PromoteError = nil
newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
Expand All @@ -868,6 +878,26 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")
}

// waitForTabletType waits for the given tablet type to be reached.
func waitForTabletType(t *testing.T, wr *wrangler.Wrangler, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType) {
timeout := time.After(15 * time.Second)
for {
tablet, err := wr.TopoServer().GetTablet(context.Background(), tabletAlias)
require.NoError(t, err)
if tablet.Type == tabletType {
return
}

select {
case <-timeout:
t.Fatalf("%s didn't reach the tablet type %v", topoproto.TabletAliasString(tabletAlias), tabletType.String())
return
default:
time.Sleep(100 * time.Millisecond)
}
}
}

// TestPlannedReparentShardSamePrimary tests PRS with oldPrimary works correctly
// Simulate failure of previous PRS and oldPrimary is ReadOnly
// Verify that primary correctly gets set to ReadWrite
Expand Down

0 comments on commit f2600e8

Please sign in to comment.