From a59f2463cc89b8016e7a5ad4dc2e6243c2a89fc3 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Mon, 2 Sep 2019 21:03:36 +0200 Subject: [PATCH 1/2] storage: use nil instead of &VOTER_FULL 19.1 nodes will run into trouble with their range descriptor CPuts if they have to handle &VOTER_FULL which is assigned to a field that they're not aware of. Categorically use `nil` instead; we can remove this in the 20.1 cycle. Release note: None --- pkg/roachpb/metadata.go | 14 ++++++++++++-- pkg/storage/replica_command.go | 9 ++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 0c8a86e5985d..7929bb778711 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -145,7 +145,12 @@ func (r *RangeDescriptor) SetReplicaType( desc := &r.InternalReplicas[i] if desc.StoreID == storeID && desc.NodeID == nodeID { prevTyp := desc.GetType() - desc.Type = &typ + if typ != VOTER_FULL { + desc.Type = &typ + } else { + // For 19.1 compatibility. + desc.Type = nil + } return *desc, prevTyp, true } } @@ -157,11 +162,16 @@ func (r *RangeDescriptor) SetReplicaType( func (r *RangeDescriptor) AddReplica( nodeID NodeID, storeID StoreID, typ ReplicaType, ) ReplicaDescriptor { + var typPtr *ReplicaType + // For 19.1 compatibility, use nil instead of VOTER_FULL. + if typ != VOTER_FULL { + typPtr = &typ + } toAdd := ReplicaDescriptor{ NodeID: nodeID, StoreID: storeID, ReplicaID: r.NextReplicaID, - Type: &typ, + Type: typPtr, } rs := r.Replicas() rs.AddReplica(toAdd) diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 49a0bb1e0862..5a6b15bcad3d 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1266,9 +1266,12 @@ func (r *Replica) addReplicaLegacyPreemptiveSnapshot( // complete. See #10409. { preemptiveRepDesc := roachpb.ReplicaDescriptor{ - NodeID: target.NodeID, - StoreID: target.StoreID, - Type: roachpb.ReplicaTypeVoterFull(), + NodeID: target.NodeID, + StoreID: target.StoreID, + // NB: if we're still sending preemptive snapshot, the recipient is + // very likely a 19.1 node and does not understand this field. It + // won't matter to set it here, but don't anyway. + Type: nil, ReplicaID: 0, // intentional } if err := r.sendSnapshot(ctx, preemptiveRepDesc, SnapshotRequest_PREEMPTIVE, priority); err != nil { From 5049bc3216ac2aed659a5fd3e6e43cbae10e8137 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 3 Sep 2019 10:35:59 +0200 Subject: [PATCH 2/2] storage: fix DeprecatedReplicas migration In 52333d19388206c77037370f57c91b58b24a3e9a we made the incorrect assumption that a 19.2 node running a ChangeReplicas invocation is also the node which would evaluate the corresponding EndTransaction request, but this is only often true, not generally true. If a 19.1 node ended up evaluating a trigger composed by a 19.2 node, it would see an empty UpdatedReplicas slice and a zero NextReplicaID and would go ahead and construct a broken descriptor from that which it would then go on and disseminate through Raft. This caused the affected range to be fatally corrupted. The solution is simple: populate the old field for now as well and remove that only in the next release. Release note: None --- pkg/storage/batcheval/cmd_end_transaction.go | 5 +++-- pkg/storage/replica_command.go | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/storage/batcheval/cmd_end_transaction.go b/pkg/storage/batcheval/cmd_end_transaction.go index 7ba85f0ecab5..e4ac95b1f9e3 100644 --- a/pkg/storage/batcheval/cmd_end_transaction.go +++ b/pkg/storage/batcheval/cmd_end_transaction.go @@ -1109,15 +1109,16 @@ func changeReplicasTrigger( var desc roachpb.RangeDescriptor if change.Desc != nil { + // Trigger proposed by a 19.2+ node (and we're a 19.2+ node as well). desc = *change.Desc } else { + // Trigger proposed by a 19.1 node. Reconstruct descriptor from deprecated + // fields. desc = *rec.Desc() desc.SetReplicas(roachpb.MakeReplicaDescriptors(change.DeprecatedUpdatedReplicas)) desc.NextReplicaID = change.DeprecatedNextReplicaID } - // TODO(tschottdorf): duplication of Desc with the trigger below, should - // likely remove it from the trigger. pd.Replicated.State = &storagepb.ReplicaState{ Desc: &desc, } diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 5a6b15bcad3d..b0ff7e189032 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1406,9 +1406,10 @@ func execChangeReplicasTxn( deprecatedRepDesc = removed[0] } crt = &roachpb.ChangeReplicasTrigger{ - DeprecatedChangeType: deprecatedChangeType, - DeprecatedReplica: deprecatedRepDesc, - Desc: &updatedDesc, + DeprecatedChangeType: deprecatedChangeType, + DeprecatedReplica: deprecatedRepDesc, + DeprecatedUpdatedReplicas: updatedDesc.Replicas().All(), + DeprecatedNextReplicaID: updatedDesc.NextReplicaID, } } else { crt = &roachpb.ChangeReplicasTrigger{