From 3cfaa0194298d5bb5599958e1da7cc74f473bc4f Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Thu, 11 Jul 2024 19:13:00 +0000 Subject: [PATCH 1/8] Add a test that shows that `RemoveTablet` breaks when called while two tablets are marked as primary. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck_test.go | 111 ++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index c87ba699234..07e294ed256 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -784,6 +784,117 @@ func TestRemoveTablet(t *testing.T) { assert.Empty(t, a, "wrong result, expected empty list") } +func TestTabletRemoveDuringExternalReparenting(t *testing.T) { + ctx := utils.LeakCheckContext(t) + + // reset error counters + hcErrorCounters.ResetAll() + ts := memorytopo.NewServer(ctx, "cell") + defer ts.Close() + hc := createTestHc(ctx, ts) + // close healthcheck + defer hc.Close() + + firstTablet := createTestTablet(0, "cell", "a") + firstTablet.Type = topodatapb.TabletType_PRIMARY + + secondTablet := createTestTablet(1, "cell", "b") + secondTablet.Type = topodatapb.TabletType_REPLICA + + thirdTablet := createTestTablet(2, "cell", "c") + thirdTablet.Type = topodatapb.TabletType_REPLICA + + firstTabletHealthStream := make(chan *querypb.StreamHealthResponse) + firstTabletConn := createFakeConn(firstTablet, firstTabletHealthStream) + firstTabletConn.errCh = make(chan error) + + secondTabletHealthStream := make(chan *querypb.StreamHealthResponse) + secondTabletConn := createFakeConn(secondTablet, secondTabletHealthStream) + secondTabletConn.errCh = make(chan error) + + thirdTabletHealthStream := make(chan *querypb.StreamHealthResponse) + thirdTabletConn := createFakeConn(thirdTablet, thirdTabletHealthStream) + thirdTabletConn.errCh = make(chan error) + + resultChan := hc.Subscribe() + + hc.AddTablet(firstTablet) + hc.AddTablet(secondTablet) + hc.AddTablet(thirdTablet) + + <-resultChan + <-resultChan + + firstTabletPromotionTime := time.Now().Unix() - 10 + + firstTabletHealthStream <- &querypb.StreamHealthResponse{ + TabletAlias: firstTablet.Alias, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, + Serving: true, + + PrimaryTermStartTimestamp: firstTabletPromotionTime, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + } + + secondTabletHealthStream <- &querypb.StreamHealthResponse{ + TabletAlias: secondTablet.Alias, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}, + Serving: true, + + PrimaryTermStartTimestamp: 0, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + } + + thirdTabletHealthStream <- &querypb.StreamHealthResponse{ + TabletAlias: thirdTablet.Alias, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}, + Serving: true, + + PrimaryTermStartTimestamp: 0, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + } + + <-resultChan + <-resultChan + <-resultChan + + // Simulate a failover + firstTabletHealthStream <- &querypb.StreamHealthResponse{ + TabletAlias: firstTablet.Alias, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, + Serving: true, + + PrimaryTermStartTimestamp: 0, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + } + + secondTabletHealthStream <- &querypb.StreamHealthResponse{ + TabletAlias: secondTablet.Alias, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, + Serving: true, + + PrimaryTermStartTimestamp: time.Now().Unix(), + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + } + + <-resultChan + <-resultChan + + hc.RemoveTablet(thirdTablet) + + // tablet 1 is the primary now + expectedTabletStats := []*TabletHealth{{ + Tablet: secondTablet, + Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, + Serving: true, + Stats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, + PrimaryTermStartTime: 10, + }} + + actualTabletStats := hc.GetHealthyTabletStats(&querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}) + mustMatch(t, expectedTabletStats, actualTabletStats, "unexpected result") +} + // TestGetHealthyTablets tests the functionality of GetHealthyTabletStats. func TestGetHealthyTablets(t *testing.T) { ctx := utils.LeakCheckContext(t) From 9fce905bf504e220208d87ab8ec1f38604696d7a Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Thu, 11 Jul 2024 21:40:37 +0000 Subject: [PATCH 2/8] Add a specialized function to recompute the health of primary tablets. This ensures that we can never end up in a situation where there is more than one healthy primary tablet for a given shard. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 23 +++++++++++++++++++++-- go/vt/discovery/healthcheck_test.go | 26 ++++++++++++++++++-------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 70799b0f6bc..890fc988693 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -37,6 +37,7 @@ import ( "encoding/json" "fmt" "hash/crc32" + "math" "net/http" "sort" "strings" @@ -467,10 +468,16 @@ func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) { continue } delete(ths, tabletAlias) - // delete from healthy list + healthy, ok := hc.healthy[key] if ok && len(healthy) > 0 { - hc.recomputeHealthy(key) + if tabletType == topodata.TabletType_PRIMARY { + // If tablet type is primary, we should only have one tablet in the healthy list. + hc.recomputeHealthyPrimary(key) + } else { + // Simply recompute the list of healthy tablets for all other tablet types. + hc.recomputeHealthy(key) + } } } }() @@ -598,6 +605,18 @@ func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) { hc.healthy[key] = FilterStatsByReplicationLag(allArray) } +// Recompute the healthy primary tablet for the given key. +func (hc *HealthCheckImpl) recomputeHealthyPrimary(key KeyspaceShardTabletType) { + highestPrimaryTermStartTime := int64(math.MinInt64) + + for _, s := range hc.healthData[key] { + if s.PrimaryTermStartTime >= highestPrimaryTermStartTime { + highestPrimaryTermStartTime = s.PrimaryTermStartTime + hc.healthy[key][0] = s + } + } +} + // Subscribe adds a listener. Used by vtgate buffer to learn about primary changes. func (hc *HealthCheckImpl) Subscribe() chan *TabletHealth { hc.subMu.Lock() diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index 07e294ed256..a033bcb2946 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -784,6 +784,14 @@ func TestRemoveTablet(t *testing.T) { assert.Empty(t, a, "wrong result, expected empty list") } +// When an external primary failover is performed, +// the demoted primary will advertise itself as a `PRIMARY` +// tablet until it recognizes that it was demoted, +// and until all in-flight operations have either finished +// (successfully or unsuccessfully, see `--shutdown_grace_period` flag). +// +// During this time, operations like `RemoveTablet` should not lead +// to multiple tablets becoming valid targets for `PRIMARY`. func TestTabletRemoveDuringExternalReparenting(t *testing.T) { ctx := utils.LeakCheckContext(t) @@ -825,15 +833,15 @@ func TestTabletRemoveDuringExternalReparenting(t *testing.T) { <-resultChan <-resultChan - firstTabletPromotionTime := time.Now().Unix() - 10 + firstTabletPrimaryTermStartTimestamp := time.Now().Unix() - 10 firstTabletHealthStream <- &querypb.StreamHealthResponse{ TabletAlias: firstTablet.Alias, Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, Serving: true, - PrimaryTermStartTimestamp: firstTabletPromotionTime, - RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + PrimaryTermStartTimestamp: firstTabletPrimaryTermStartTimestamp, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } secondTabletHealthStream <- &querypb.StreamHealthResponse{ @@ -858,14 +866,16 @@ func TestTabletRemoveDuringExternalReparenting(t *testing.T) { <-resultChan <-resultChan + secondTabletPrimaryTermStartTimestamp := time.Now().Unix() + // Simulate a failover firstTabletHealthStream <- &querypb.StreamHealthResponse{ TabletAlias: firstTablet.Alias, Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, Serving: true, - PrimaryTermStartTimestamp: 0, - RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + PrimaryTermStartTimestamp: firstTabletPrimaryTermStartTimestamp, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } secondTabletHealthStream <- &querypb.StreamHealthResponse{ @@ -873,8 +883,8 @@ func TestTabletRemoveDuringExternalReparenting(t *testing.T) { Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, Serving: true, - PrimaryTermStartTimestamp: time.Now().Unix(), - RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, + PrimaryTermStartTimestamp: secondTabletPrimaryTermStartTimestamp, + RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } <-resultChan @@ -888,7 +898,7 @@ func TestTabletRemoveDuringExternalReparenting(t *testing.T) { Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, Serving: true, Stats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, - PrimaryTermStartTime: 10, + PrimaryTermStartTime: secondTabletPrimaryTermStartTimestamp, }} actualTabletStats := hc.GetHealthyTabletStats(&querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}) From 5cefbbb1b79bc5b54862919d919d87d89cdf12a3 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 12 Jul 2024 08:25:02 +0000 Subject: [PATCH 3/8] Update recompute method and rename test case. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 9 ++++++++- go/vt/discovery/healthcheck_test.go | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 890fc988693..0a636386dfe 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -608,13 +608,20 @@ func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) { // Recompute the healthy primary tablet for the given key. func (hc *HealthCheckImpl) recomputeHealthyPrimary(key KeyspaceShardTabletType) { highestPrimaryTermStartTime := int64(math.MinInt64) + var newestPrimary *TabletHealth for _, s := range hc.healthData[key] { if s.PrimaryTermStartTime >= highestPrimaryTermStartTime { highestPrimaryTermStartTime = s.PrimaryTermStartTime - hc.healthy[key][0] = s + newestPrimary = s } } + + if newestPrimary != nil { + hc.healthy[key] = []*TabletHealth{newestPrimary} + } else { + hc.healthy[key] = []*TabletHealth{} + } } // Subscribe adds a listener. Used by vtgate buffer to learn about primary changes. diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index a033bcb2946..134941995cb 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -792,7 +792,7 @@ func TestRemoveTablet(t *testing.T) { // // During this time, operations like `RemoveTablet` should not lead // to multiple tablets becoming valid targets for `PRIMARY`. -func TestTabletRemoveDuringExternalReparenting(t *testing.T) { +func TestRemoveTabletDuringExternalReparenting(t *testing.T) { ctx := utils.LeakCheckContext(t) // reset error counters From 48caba50c00962a4bd481590565dcf6e367131fc Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Sun, 14 Jul 2024 16:52:00 +0000 Subject: [PATCH 4/8] Rename `recomputeHealthy` to `recomputeHealthyReplicas` to better communicate the intent. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 0a636386dfe..540bdab6185 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -476,7 +476,7 @@ func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) { hc.recomputeHealthyPrimary(key) } else { // Simply recompute the list of healthy tablets for all other tablet types. - hc.recomputeHealthy(key) + hc.recomputeHealthyReplicas(key) } } } @@ -575,11 +575,11 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ // Tablets from other cells for non-primary targets should not trigger a re-sort; // they should also be excluded from healthy list. if th.Target.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) { - hc.recomputeHealthy(targetKey) + hc.recomputeHealthyReplicas(targetKey) } if targetChanged && prevTarget.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) { // also recompute old target's healthy list oldTargetKey := KeyFromTarget(prevTarget) - hc.recomputeHealthy(oldTargetKey) + hc.recomputeHealthyReplicas(oldTargetKey) } } @@ -593,7 +593,12 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ hc.broadcast(th) } -func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) { +// Recomputes the healthy replica tablets for the given key. +// +// This filters out replicas that might be healthy, but are not part of the current +// cell or cell alias. It also performs filtering of tablets based on replication lag, +// if configured to do so. +func (hc *HealthCheckImpl) recomputeHealthyReplicas(key KeyspaceShardTabletType) { all := hc.healthData[key] allArray := make([]*TabletHealth, 0, len(all)) for _, s := range all { From bbb6365fd924b25becc78ffa4de8c97f0071a3ce Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 2 Aug 2024 14:35:47 +0000 Subject: [PATCH 5/8] Simplify the logic for primary tablet deletes. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 39 ++++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 540bdab6185..6e926ae711c 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -37,7 +37,6 @@ import ( "encoding/json" "fmt" "hash/crc32" - "math" "net/http" "sort" "strings" @@ -472,11 +471,18 @@ func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) { healthy, ok := hc.healthy[key] if ok && len(healthy) > 0 { if tabletType == topodata.TabletType_PRIMARY { - // If tablet type is primary, we should only have one tablet in the healthy list. - hc.recomputeHealthyPrimary(key) + // If the deleted tablet was a primary, + // and it matches what we think is the current active primary, + // clear the healthy list for the primary. + // + // See the logic in `updateHealth` for more details. + alias := tabletAliasString(topoproto.TabletAliasString(healthy[0].Tablet.Alias)) + if alias == tabletAlias { + hc.healthy[key] = []*TabletHealth{} + } } else { // Simply recompute the list of healthy tablets for all other tablet types. - hc.recomputeHealthyReplicas(key) + hc.recomputeHealthy(key) } } } @@ -575,11 +581,11 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ // Tablets from other cells for non-primary targets should not trigger a re-sort; // they should also be excluded from healthy list. if th.Target.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) { - hc.recomputeHealthyReplicas(targetKey) + hc.recomputeHealthy(targetKey) } if targetChanged && prevTarget.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) { // also recompute old target's healthy list oldTargetKey := KeyFromTarget(prevTarget) - hc.recomputeHealthyReplicas(oldTargetKey) + hc.recomputeHealthy(oldTargetKey) } } @@ -598,7 +604,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ // This filters out replicas that might be healthy, but are not part of the current // cell or cell alias. It also performs filtering of tablets based on replication lag, // if configured to do so. -func (hc *HealthCheckImpl) recomputeHealthyReplicas(key KeyspaceShardTabletType) { +func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) { all := hc.healthData[key] allArray := make([]*TabletHealth, 0, len(all)) for _, s := range all { @@ -610,25 +616,6 @@ func (hc *HealthCheckImpl) recomputeHealthyReplicas(key KeyspaceShardTabletType) hc.healthy[key] = FilterStatsByReplicationLag(allArray) } -// Recompute the healthy primary tablet for the given key. -func (hc *HealthCheckImpl) recomputeHealthyPrimary(key KeyspaceShardTabletType) { - highestPrimaryTermStartTime := int64(math.MinInt64) - var newestPrimary *TabletHealth - - for _, s := range hc.healthData[key] { - if s.PrimaryTermStartTime >= highestPrimaryTermStartTime { - highestPrimaryTermStartTime = s.PrimaryTermStartTime - newestPrimary = s - } - } - - if newestPrimary != nil { - hc.healthy[key] = []*TabletHealth{newestPrimary} - } else { - hc.healthy[key] = []*TabletHealth{} - } -} - // Subscribe adds a listener. Used by vtgate buffer to learn about primary changes. func (hc *HealthCheckImpl) Subscribe() chan *TabletHealth { hc.subMu.Lock() From 6941c1c016c1672d5e032b036313156089a8dbe9 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 2 Aug 2024 14:39:40 +0000 Subject: [PATCH 6/8] Small cleanups. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 6e926ae711c..a044a97785e 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -467,7 +467,7 @@ func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) { continue } delete(ths, tabletAlias) - + // delete from healthy list healthy, ok := hc.healthy[key] if ok && len(healthy) > 0 { if tabletType == topodata.TabletType_PRIMARY { @@ -599,11 +599,13 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ hc.broadcast(th) } -// Recomputes the healthy replica tablets for the given key. +// Recomputes the healthy tablets for the given key. // -// This filters out replicas that might be healthy, but are not part of the current +// This filters out tablets that might be healthy, but are not part of the current // cell or cell alias. It also performs filtering of tablets based on replication lag, // if configured to do so. +// +// This should not be called for primary tablets. func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) { all := hc.healthData[key] allArray := make([]*TabletHealth, 0, len(all)) From 6ded5e466f6416b26fa5fc8a3c5c0e2890182379 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 2 Aug 2024 14:43:11 +0000 Subject: [PATCH 7/8] Small test case fixups. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index 134941995cb..35c55354fb7 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -827,10 +827,12 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { resultChan := hc.Subscribe() hc.AddTablet(firstTablet) - hc.AddTablet(secondTablet) - hc.AddTablet(thirdTablet) + <-resultChan + hc.AddTablet(secondTablet) <-resultChan + + hc.AddTablet(thirdTablet) <-resultChan firstTabletPrimaryTermStartTimestamp := time.Now().Unix() - 10 @@ -843,6 +845,7 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { PrimaryTermStartTimestamp: firstTabletPrimaryTermStartTimestamp, RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } + <-resultChan secondTabletHealthStream <- &querypb.StreamHealthResponse{ TabletAlias: secondTablet.Alias, @@ -852,6 +855,7 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { PrimaryTermStartTimestamp: 0, RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, } + <-resultChan thirdTabletHealthStream <- &querypb.StreamHealthResponse{ TabletAlias: thirdTablet.Alias, @@ -861,9 +865,6 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { PrimaryTermStartTimestamp: 0, RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.5}, } - - <-resultChan - <-resultChan <-resultChan secondTabletPrimaryTermStartTimestamp := time.Now().Unix() @@ -877,6 +878,7 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { PrimaryTermStartTimestamp: firstTabletPrimaryTermStartTimestamp, RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } + <-resultChan secondTabletHealthStream <- &querypb.StreamHealthResponse{ TabletAlias: secondTablet.Alias, @@ -886,13 +888,11 @@ func TestRemoveTabletDuringExternalReparenting(t *testing.T) { PrimaryTermStartTimestamp: secondTabletPrimaryTermStartTimestamp, RealtimeStats: &querypb.RealtimeStats{ReplicationLagSeconds: 0, CpuUsage: 0.5}, } - - <-resultChan <-resultChan hc.RemoveTablet(thirdTablet) - // tablet 1 is the primary now + // `secondTablet` should be the primary now expectedTabletStats := []*TabletHealth{{ Tablet: secondTablet, Target: &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_PRIMARY}, From 4052306102676fecc0fdb4965cb846eb0d3cc0ef Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Thu, 8 Aug 2024 18:57:34 +0000 Subject: [PATCH 8/8] Make docs follow best practices. Signed-off-by: Arthur Schreiber --- go/vt/discovery/healthcheck.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index a044a97785e..2a467301eaf 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -599,7 +599,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ hc.broadcast(th) } -// Recomputes the healthy tablets for the given key. +// recomputeHealthy recomputes the healthy tablets for the given key. // // This filters out tablets that might be healthy, but are not part of the current // cell or cell alias. It also performs filtering of tablets based on replication lag,