Skip to content

Commit

Permalink
kvserver/loqrecovery: persist new replica ID in RaftReplicaID
Browse files Browse the repository at this point in the history
The recently introduced local `RaftReplicaIDKey` was not updated when
loss of quorum recovery changed the replica's ID. This could lead to
assertion failures.

Release note: None
  • Loading branch information
erikgrinaker committed Apr 25, 2022
1 parent cdc4623 commit 0c98606
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 0 deletions.
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/loqrecovery/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ func applyReplicaUpdate(
report.OldReplica, _ = report.RemovedReplicas.RemoveReplica(
update.NewReplica.NodeID, update.NewReplica.StoreID)

// Persist the new replica ID.
if err := sl.SetRaftReplicaID(ctx, readWriter, update.NewReplica.ReplicaID); err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "setting new replica ID")
}

// Refresh stats
if err := sl.SetMVCCStats(ctx, readWriter, &ms); err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "updating MVCCStats")
Expand Down
19 changes: 19 additions & 0 deletions pkg/kv/kvserver/loqrecovery/recovery_env_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type storeView struct {
StoreID roachpb.StoreID `yaml:"StoreID"`

Descriptors []storeDescriptorView `yaml:"Descriptors"`
LocalData []localDataView `yaml:"LocalData"`
}

// storeDescriptorView contains important fields from the range
Expand Down Expand Up @@ -119,6 +120,12 @@ func (r replicaDescriptorView) asReplicaDescriptor() roachpb.ReplicaDescriptor {
}
}

// localDataView contains interesting local store data for each range.
type localDataView struct {
RangeID roachpb.RangeID `yaml:"RangeID"`
RaftReplicaID int `yaml:"RaftReplicaID"`
}

// Store with its owning NodeID for easier grouping by owning nodes.
type wrappedStore struct {
engine storage.Engine
Expand Down Expand Up @@ -520,10 +527,21 @@ func (e *quorumRecoveryEnv) handleDumpStore(t *testing.T, d datadriven.TestData)
var storesView []storeView
for _, storeID := range stores {
var descriptorViews []storeDescriptorView
var localDataViews []localDataView
store := e.stores[storeID]
err := kvserver.IterateRangeDescriptorsFromDisk(ctx, store.engine,
func(desc roachpb.RangeDescriptor) error {
descriptorViews = append(descriptorViews, descriptorView(desc))

sl := stateloader.Make(desc.RangeID)
raftReplicaID, _, err := sl.LoadRaftReplicaID(ctx, store.engine)
if err != nil {
t.Fatalf("failed to load Raft replica ID: %v", err)
}
localDataViews = append(localDataViews, localDataView{
RangeID: desc.RangeID,
RaftReplicaID: int(raftReplicaID.ReplicaID),
})
return nil
})
if err != nil {
Expand All @@ -533,6 +551,7 @@ func (e *quorumRecoveryEnv) handleDumpStore(t *testing.T, d datadriven.TestData)
NodeID: e.stores[storeID].nodeID,
StoreID: storeID,
Descriptors: descriptorViews,
LocalData: localDataViews,
})
}
out, err := yaml.Marshal(storesView)
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/learners_lose
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ dump-store stores=(1,2)
StartKey: /Min
Replicas:
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -72,6 +75,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 0

# Second use case where we can't make a decision and fail keyspace coverage as
# only a single learner is left, there is no way to recover.
Expand Down
24 changes: 24 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/max_applied_voter_wins
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ dump-store stores=(1,2)
StartKey: /Min
Replicas:
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -70,6 +73,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 0

dump-events stores=(1,2)
----
Expand Down Expand Up @@ -184,6 +190,11 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- RangeID: 2
RaftReplicaID: 0
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -193,6 +204,9 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- NodeID: 5
StoreID: 5
Descriptors:
Expand All @@ -208,6 +222,11 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- RangeID: 2
RaftReplicaID: 0
- NodeID: 6
StoreID: 6
Descriptors:
Expand All @@ -223,3 +242,8 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- RangeID: 2
RaftReplicaID: 0
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/max_store_voter_wins
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,16 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- NodeID: 2
StoreID: 2
Descriptors:
- RangeID: 1
StartKey: /Min
Replicas:
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -67,6 +70,9 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- NodeID: 3
StoreID: 3
Descriptors:
Expand All @@ -76,3 +82,6 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 0
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/no_change_when_quorum
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 0
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -56,3 +59,6 @@ dump-store stores=(1,2)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 0

0 comments on commit 0c98606

Please sign in to comment.