Skip to content

Commit

Permalink
storage: regression test applying split while non-member
Browse files Browse the repository at this point in the history
Without the preceding commit, this fatals as seen in cockroachdb#39796:

> F190821 15:14:28.241623 312191 storage/store.go:2172
> [n2,s2,r21/3:/{Table/54-Max}] replica descriptor of local store not
> found in right hand side of split

Note that cockroachdb#40459 will break these tests since it will not allow the
replicaID to change without an intermittent GC cycle, but we now rely on
catching up via the raft log across a removal, split, and then re-add.
At that point, the only way in which a split trigger could execute while
in non-member status is if a replica got removed but would still receive
a later split trigger. This could happen as follows:

The log is `[..., remove, split]`

a) the follower receives `split`
b) it learns that `split` is committed so that
c) it will apply both `remove` and then `split`.

a) seems possible; if the removed follower is trailing a bit, the leader
may append both entries at the same time before it (the leader) applies
the replication change that removes the follower from the leader's
config.
b) seems possible like above, at least once we do async command
application, if said append carries a commit index encompassing the
append itself.
c) will then naturally happen in today's code.

There's no way we can reasonably enact this scenario in our testing at
this point, though. The suggestion for cockroachdb#40459 is to essentially revert
this commit, so that we at least retain the coverage related to the
RHS of this test.

Release note (bug fix): Avoid crashes with message "replica descriptor
of local store not found".
  • Loading branch information
tbg committed Sep 6, 2019
1 parent 40bb577 commit 4cdf0f9
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 13 deletions.
87 changes: 78 additions & 9 deletions pkg/storage/client_split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3195,22 +3195,71 @@ func TestStoreSplitDisappearingReplicas(t *testing.T) {
// Regression test for #21146. This verifies the behavior of when the
// application of some split command (part of the lhs's log) is delayed on some
// store and meanwhile the rhs has rebalanced away and back, ending up with a
// larger ReplicaID than the split thinks it will have.
// larger ReplicaID than the split thinks it will have. Additionally we remove
// the LHS replica on the same store before the split and re-add it after, so
// that when the connectivity restores the LHS will apply a split trigger while
// it is not a part of the descriptor.
//
// Or, in pictures (s3 looks like s1 throughout and is omitted):
//
// s1: [----r1@all-------------]
// s2: [----r1@all-------------]
// Remove s2:
// s1: [----r1@s1s3------------]
// s2: [----r1@all-------------] (outdated)
// Split r1:
// s1: [-r1@s1s3-|--r2@s1s3----]
// s2: [----r1@all-------------] (outdated)
// Add s2:
// s1: [-r1@all-|--r2@s1s3-----]
// s2: [----r1@all-------------] (outdated)
// Add learner to s2 on r2 (remains uninitialized due to LHS state blocking it):
// s1: [-r1@s1s3-|--r2@all-----]
// s2: [----r1@all-------------] (outdated), uninitialized replica r2/3
// Remove and re-add learner multiple times: r2/3 becomes r2/100
// (diagram looks the same except for replacing r2/3)
//
// When connectivity is restored, r1@s2 will start to catch up on the raft log
// after it learns of its new replicaID. It first processes the replication
// change that removes it and switches to a desc that doesn't contain itself as
// a replica. Next it sees the split trigger that once caused a crash because
// the store tried to look up itself and failed. This being handled correctly,
// the split trigger next has to look up the right hand side, which surprisingly
// has a higher replicaID than that seen in the split trigger. This too needs to
// be tolerated.
func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()

blockPromoteCh := make(chan struct{})
var skipLearnerSnaps int32
withoutLearnerSnap := func(fn func()) {
atomic.StoreInt32(&skipLearnerSnaps, 1)
fn()
atomic.StoreInt32(&skipLearnerSnaps, 0)
}
knobs := base.TestingKnobs{Store: &storage.StoreTestingKnobs{
ReplicaAddStopAfterLearnerSnapshot: func() bool {
<-blockPromoteCh
ReplicaSkipLearnerSnapshot: func() bool {
return atomic.LoadInt32(&skipLearnerSnaps) != 0
},
ReplicaAddStopAfterLearnerSnapshot: func(targets []roachpb.ReplicationTarget) bool {
if atomic.LoadInt32(&skipLearnerSnaps) != 0 {
return false
}
if len(targets) > 0 && targets[0].StoreID == 2 {
<-blockPromoteCh
}
return false
},
ReplicaAddSkipLearnerRollback: func() bool {
return true
},
// We rely on replicas remaining where they are even when they are removed
// from the range as this lets us set up a split trigger that will apply
// on a replica that is (at the time of the split trigger) not a member.
DisableReplicaGCQueue: true,
}}
tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{
tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{Knobs: knobs},
ReplicationMode: base.ReplicationManual,
})
Expand All @@ -3219,6 +3268,9 @@ func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) {
k := tc.ScratchRange(t)
desc := tc.LookupRangeOrFatal(t, k)

// Add a replica on n3 which we'll need to achieve quorum while we cut off n2 below.
tc.AddReplicasOrFatal(t, k, tc.Target(2))

// First construct a range with a learner replica on the second node (index 1)
// and split it, ending up with an orphaned learner on each side of the split.
// After the learner is created, but before the split, block all incoming raft
Expand All @@ -3237,23 +3289,40 @@ func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) {
})

_, kRHS := k, k.Next()
// Remove the LHS on the isolated store, split the range, and re-add it.
tc.RemoveReplicasOrFatal(t, k, tc.Target(1))
descLHS, descRHS := tc.SplitRangeOrFatal(t, kRHS)
withoutLearnerSnap(func() {
// NB: can't use AddReplicas since that waits for the target to be up
// to date, which it won't in this case.
//
// We avoid sending a snapshot because that snapshot would include the
// split trigger and we want that to be processed via the log.
d, err := tc.Servers[0].DB().AdminChangeReplicas(
ctx, descLHS.StartKey.AsRawKey(), descLHS, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, tc.Target(1)),
)
require.NoError(t, err)
descLHS = *d
})

close(blockPromoteCh)
if err := g.Wait(); !testutils.IsError(err, `descriptor changed`) {
t.Fatalf(`expected "descriptor changed" error got: %+v`, err)
}

// Now repeatedly remove and re-add the learner on the rhs, so it has a
// Now repeatedly re-add the learner on the rhs, so it has a
// different replicaID than the split trigger expects.
for i := 0; i < 5; i++ {
_, err := tc.RemoveReplicas(kRHS, tc.Target(1))
require.NoError(t, err)
_, err = tc.AddReplicas(kRHS, tc.Target(1))
add := func() {
_, err := tc.AddReplicas(kRHS, tc.Target(1))
if !testutils.IsError(err, `snapshot intersects existing range`) {
t.Fatalf(`expected snapshot intersects existing range" error got: %+v`, err)
}
}
for i := 0; i < 5; i++ {
add()
tc.RemoveReplicasOrFatal(t, kRHS, tc.Target(1))
}
add()

// Normally AddReplicas will return the latest version of the RangeDescriptor,
// but because we're getting snapshot errors and using the
Expand Down
3 changes: 3 additions & 0 deletions pkg/storage/raft_snapshot_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ func (rq *raftSnapshotQueue) processRaftSnapshot(
// the replicate queue. Either way, no point in also sending it a snapshot of
// type RAFT.
if repDesc.GetType() == roachpb.LEARNER {
if fn := repl.store.cfg.TestingKnobs.ReplicaSkipLearnerSnapshot; fn != nil && fn() {
return nil
}
snapType = SnapshotRequest_LEARNER
if index := repl.getAndGCSnapshotLogTruncationConstraints(timeutil.Now(), repDesc.StoreID); index > 0 {
// There is a snapshot being transferred. It's probably a LEARNER snap, so
Expand Down
8 changes: 6 additions & 2 deletions pkg/storage/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,10 @@ func (r *Replica) atomicReplicationChange(
return nil, errors.Errorf("programming error: cannot promote replica of type %s", rDesc.Type)
}

if fn := r.store.cfg.TestingKnobs.ReplicaSkipLearnerSnapshot; fn != nil && fn() {
continue
}

// Note that raft snapshot queue will refuse to send a snapshot to a learner
// replica if its store is already sending a snapshot to that replica. That
// would race with this snapshot, except that we've put a (best effort) lock
Expand All @@ -1231,8 +1235,8 @@ func (r *Replica) atomicReplicationChange(
}
}

if len(chgs.Additions()) > 0 {
if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil && fn() {
if adds := chgs.Additions(); len(adds) > 0 {
if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil && fn(adds) {
return desc, nil
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/replica_learner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (rtl *replicationTestKnobs) withStopAfterJointConfig(f func()) {

func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) {
var k replicationTestKnobs
k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func() bool {
k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func(_ []roachpb.ReplicationTarget) bool {
return atomic.LoadInt64(&k.replicaAddStopAfterLearnerAtomic) > 0
}
k.storeKnobs.ReplicaAddStopAfterJointConfig = func() bool {
Expand Down
7 changes: 6 additions & 1 deletion pkg/storage/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,12 @@ type StoreTestingKnobs struct {
// and after the LEARNER type snapshot, but before promoting it to a voter.
// This ensures the `*Replica` will be materialized on the Store when it
// returns.
ReplicaAddStopAfterLearnerSnapshot func() bool
ReplicaAddStopAfterLearnerSnapshot func([]roachpb.ReplicationTarget) bool
// ReplicaSkipLearnerSnapshot causes snapshots to never be sent to learners
// if the func returns true. Adding replicas proceeds as usual, though if
// the added replica has no prior state which can be caught up from the raft
// log, the result will be an voter that is unable to participate in quorum.
ReplicaSkipLearnerSnapshot func() bool
// ReplicaAddStopAfterJointConfig causes replica addition to return early if
// the func returns true. This happens before transitioning out of a joint
// configuration, after the joint configuration has been entered by means
Expand Down

0 comments on commit 4cdf0f9

Please sign in to comment.