Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport 13856 - resharding error fix #432

Merged
merged 3 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/local/scripts/vtgate-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ vtgate \
--tablet_types_to_wait PRIMARY,REPLICA \
--service_map 'grpc-vtgateservice' \
--pid_file $VTDATAROOT/tmp/vtgate.pid \
--enable_buffer \
--mysql_auth_server_impl none \
> $VTDATAROOT/tmp/vtgate.out 2>&1 &

Expand Down
37 changes: 25 additions & 12 deletions go/vt/discovery/keyspace_events.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ import (

"google.golang.org/protobuf/proto"

"vitess.io/vitess/go/vt/key"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/srvtopo"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)

// KeyspaceEventWatcher is an auxiliary watcher that watches all availability incidents
Expand Down Expand Up @@ -65,7 +67,7 @@ type KeyspaceEvent struct {

type ShardEvent struct {
Tablet *topodatapb.TabletAlias
Target *query.Target
Target *querypb.Target
Serving bool
}

Expand Down Expand Up @@ -124,17 +126,27 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool {
kss.mu.Lock()
defer kss.mu.Unlock()

// if the keyspace is gone, or if it has no known availability events, the keyspace
// cannot be in the middle of a resharding operation
// If the keyspace is gone, has no known availability events, or is in the middle of a
// MoveTables then the keyspace cannot be in the middle of a resharding operation.
if kss.deleted || kss.consistent {
return false
}

// for all the known shards, try to find a primary shard besides the one we're trying to access
// and which is currently healthy. if there are other healthy primaries in the keyspace, it means
// we're in the middle of a resharding operation
// If there are unequal and overlapping shards in the keyspace and any of them are
// currently serving then we assume that we are in the middle of a Reshard.
_, ckr, err := topo.ValidateShardName(currentShard)
if err != nil || ckr == nil { // Assume not and avoid potential panic
return false
}
for shard, sstate := range kss.shards {
if shard != currentShard && sstate.serving {
if !sstate.serving || shard == currentShard {
continue
}
_, skr, err := topo.ValidateShardName(shard)
if err != nil || skr == nil { // Assume not and avoid potential panic
return false
}
if key.KeyRangesIntersect(ckr, skr) {
return true
}
}
Expand All @@ -143,7 +155,7 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool {
}

type shardState struct {
target *query.Target
target *querypb.Target
serving bool
externallyReparented int64
currentPrimary *topodatapb.TabletAlias
Expand Down Expand Up @@ -426,7 +438,7 @@ func (kew *KeyspaceEventWatcher) getKeyspaceStatus(keyspace string) *keyspaceSta
// This is not a fully accurate heuristic, but it's good enough that we'd want to buffer the
// request for the given target under the assumption that the reason why it cannot be completed
// right now is transitory.
func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *query.Target) bool {
func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *querypb.Target) bool {
if target.TabletType != topodatapb.TabletType_PRIMARY {
return false
}
Expand All @@ -446,7 +458,8 @@ func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *query.Target) bo
// The shard state keeps track of the current primary and the last externally reparented time, which we can use
// to determine that there was a serving primary which now became non serving. This is only possible in a DemotePrimary
// RPC which are only called from ERS and PRS. So buffering will stop when these operations succeed.
func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *query.Target) bool {
// We return the tablet alias of the primary if it is serving.
func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *querypb.Target) bool {
if target.TabletType != topodatapb.TabletType_PRIMARY {
return false
}
Expand Down
Loading
Loading