From 418e4ec9ba781e7be8839eef0c0a73e065ad2bba Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 4 Oct 2023 16:42:57 +0300 Subject: [PATCH 1/2] Online DDL: timeouts for all gRPC calls Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index d95d4afc41f..9680b111880 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -124,6 +124,7 @@ const ( readyToCompleteHint = "ready_to_complete" databasePoolSize = 3 qrBufferExtraTimeout = 5 * time.Second + grpcTimeout = 5 * time.Second vreplicationTestSuiteWaitSeconds = 5 ) @@ -735,9 +736,6 @@ func (e *Executor) primaryPosition(ctx context.Context) (pos replication.Positio // terminateVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration func (e *Executor) terminateVReplMigration(ctx context.Context, uuid string) error { - tmClient := e.tabletManagerClient() - defer tmClient.Close() - tablet, err := e.ts.GetTablet(ctx, e.tabletAlias) if err != nil { return err @@ -916,11 +914,13 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er e.toggleBufferTableFunc(bufferingCtx, onlineDDL.Table, timeout, bufferQueries) if !bufferQueries { + grpcCtx, cancel := context.WithTimeout(ctx, grpcTimeout) + defer cancel() // called after new table is in place. // unbuffer existing queries: bufferingContextCancel() // force re-read of tables - if err := tmClient.RefreshState(ctx, tablet.Tablet); err != nil { + if err := tmClient.RefreshState(grpcCtx, tablet.Tablet); err != nil { return err } } @@ -3684,7 +3684,10 @@ func (e *Executor) vreplicationExec(ctx context.Context, tablet *topodatapb.Tabl tmClient := e.tabletManagerClient() defer tmClient.Close() - return tmClient.VReplicationExec(ctx, tablet, query) + grpcCtx, cancel := context.WithTimeout(ctx, grpcTimeout) + defer cancel() + + return tmClient.VReplicationExec(grpcCtx, tablet, query) } // reloadSchema issues a ReloadSchema on this tablet @@ -3696,7 +3699,11 @@ func (e *Executor) reloadSchema(ctx context.Context) error { if err != nil { return err } - return tmClient.ReloadSchema(ctx, tablet.Tablet, "") + + grpcCtx, cancel := context.WithTimeout(ctx, grpcTimeout) + defer cancel() + + return tmClient.ReloadSchema(grpcCtx, tablet.Tablet, "") } // deleteVReplicationEntry cleans up a _vt.vreplication entry; this function is called as part of From 24d69d584dea692d4fbef551811b71e78b0b2a94 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 5 Oct 2023 08:52:28 +0300 Subject: [PATCH 2/2] more generous 30s timeout Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 9680b111880..66e81aef949 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -124,7 +124,7 @@ const ( readyToCompleteHint = "ready_to_complete" databasePoolSize = 3 qrBufferExtraTimeout = 5 * time.Second - grpcTimeout = 5 * time.Second + grpcTimeout = 30 * time.Second vreplicationTestSuiteWaitSeconds = 5 )