Skip to content

Commit

Permalink
[release-17.0] Fixing backup_pitr flaky tests via wait-for loop on …
Browse files Browse the repository at this point in the history
…topo reads (#13781) (#13790)

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
Co-authored-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
  • Loading branch information
vitess-bot[bot] and shlomi-noach authored Aug 16, 2023
1 parent 0820d3b commit 325cf0e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 6 deletions.
12 changes: 10 additions & 2 deletions go/test/endtoend/backup/pitr/backup_mysqlctld_pitr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ func TestIncrementalBackup(t *testing.T) {
{
name: "first incremental backup",
},
{
name: "fail1",
expectError: "no binary logs to backup",
},
{
name: "fail2",
expectError: "no binary logs to backup",
},
{
name: "make writes, succeed",
writeBeforeBackup: true,
Expand Down Expand Up @@ -159,10 +167,10 @@ func TestIncrementalBackup(t *testing.T) {
if tc.writeBeforeBackup {
backup.InsertRowOnPrimary(t, "")
}
// we wait for 1 second because backups are written to a directory named after the current timestamp,
// we wait for >1 second because backups are written to a directory named after the current timestamp,
// in 1 second resolution. We want to avoid two backups that have the same pathname. Realistically this
// is only ever a problem in this end-to-end test, not in production.
// Also, we gie the replica a chance to catch up.
// Also, we give the replica a chance to catch up.
time.Sleep(1100 * time.Millisecond)
waitForReplica(t)
recordRowsPerPosition(t)
Expand Down
43 changes: 39 additions & 4 deletions go/test/endtoend/backup/vtctlbackup/backup_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package vtctlbackup

import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
Expand Down Expand Up @@ -51,7 +52,8 @@ const (
XtraBackup = iota
BuiltinBackup
Mysqlctld
timeout = time.Duration(60 * time.Second)
timeout = time.Duration(60 * time.Second)
topoConsistencyTimeout = 20 * time.Second
)

var (
Expand Down Expand Up @@ -1068,13 +1070,15 @@ func terminateRestore(t *testing.T) {
}

func vtctlBackupReplicaNoDestroyNoWrites(t *testing.T, tabletType string) (backups []string, destroy func(t *testing.T)) {
numBackups := len(waitForNumBackups(t, -1))
restoreWaitForBackup(t, tabletType, nil, true)
verifyInitialReplication(t)

err := localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias)
require.Nil(t, err)

backups = localCluster.VerifyBackupCount(t, shardKsName, 1)
backups = waitForNumBackups(t, numBackups+1)
require.NotEmpty(t, backups)

verifyTabletBackupStats(t, replica1.VttabletProcess.GetVars())

Expand Down Expand Up @@ -1156,7 +1160,38 @@ func TestReplicaFullBackup(t *testing.T) (manifest *mysqlctl.BackupManifest, des
return readManifestFile(t, backupLocation), destroy
}

// waitForNumBackups waits for GetBackups to list exactly the given expected number.
// If expectNumBackups < 0 then any response is considered valid
func waitForNumBackups(t *testing.T, expectNumBackups int) []string {
ctx, cancel := context.WithTimeout(context.Background(), topoConsistencyTimeout)
defer cancel()

ticker := time.NewTicker(time.Second)
defer ticker.Stop()

for {
backups, err := localCluster.ListBackups(shardKsName)
require.NoError(t, err)
if expectNumBackups < 0 {
// any result is valid
return backups
}
if len(backups) == expectNumBackups {
// what we waited for
return backups
}
assert.Less(t, len(backups), expectNumBackups)
select {
case <-ctx.Done():
assert.Failf(t, ctx.Err().Error(), "expected %d backups, got %d", expectNumBackups, len(backups))
return nil
case <-ticker.C:
}
}
}

func TestReplicaIncrementalBackup(t *testing.T, incrementalFromPos mysql.Position, expectError string) (manifest *mysqlctl.BackupManifest, backupName string) {
numBackups := len(waitForNumBackups(t, -1))
incrementalFromPosArg := "auto"
if !incrementalFromPos.IsZero() {
incrementalFromPosArg = mysql.EncodePosition(incrementalFromPos)
Expand All @@ -1169,8 +1204,8 @@ func TestReplicaIncrementalBackup(t *testing.T, incrementalFromPos mysql.Positio
}
require.NoErrorf(t, err, "output: %v", output)

backups, err := localCluster.ListBackups(shardKsName)
require.NoError(t, err)
backups := waitForNumBackups(t, numBackups+1)
require.NotEmptyf(t, backups, "output: %v", output)
verifyTabletBackupStats(t, replica1.VttabletProcess.GetVars())
backupName = backups[len(backups)-1]
backupLocation := localCluster.CurrentVTDATAROOT + "/backups/" + shardKsName + "/" + backupName
Expand Down
1 change: 1 addition & 0 deletions go/vt/mysqlctl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ func Backup(ctx context.Context, params BackupParams) error {
if err != nil {
return vterrors.Wrap(err, "StartBackup failed")
}
params.Logger.Infof("Starting backup %v", bh.Name())

// Scope stats to selected backup engine.
beParams := params.Copy()
Expand Down

0 comments on commit 325cf0e

Please sign in to comment.