From 20600dc49a03258af8a63699d3c7608783c0e5b5 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Thu, 12 Dec 2019 15:32:12 +0100 Subject: [PATCH] storage: respect closed timestamp in tryReproposeWithNewLeaseIndex Prior to this commit, tryReproposeWithNewLeaseIndex would not consult with the closed timestamp tracker. This meant that a proposal may end up with a new lease index at a timestamp that consumers of closed timestamp updates had already considered immutable. In other words, this broke closed timestamp guarantees and would by extension also have the potential to break CDC's. This path is fairly rare and it isn't one likely to cause problems in practice, though it certainly could have. See https://github.com/cockroachdb/cockroach/issues/42821 for details. Fixes #42821 Release note (bug fix): fix a bug which could lead to follower reads or CDC updates that did not reflect the full set of data at the timestamp. This bug was never observed in practice and should be rare to cause issues, one of the necessary ingredients being an aggressive closed timestamp interval. --- pkg/storage/replica_application_result.go | 24 ++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pkg/storage/replica_application_result.go b/pkg/storage/replica_application_result.go index 7c0371142315..46c77e344b7b 100644 --- a/pkg/storage/replica_application_result.go +++ b/pkg/storage/replica_application_result.go @@ -14,6 +14,7 @@ import ( "context" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/storage/closedts/ctpb" "github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb" "github.com/cockroachdb/cockroach/pkg/storage/storagebase" "github.com/cockroachdb/cockroach/pkg/storage/storagepb" @@ -149,6 +150,7 @@ func (r *Replica) prepareLocalResult(ctx context.Context, cmd *replicatedCmd) { // the proposal would be a user-visible error. pErr = r.tryReproposeWithNewLeaseIndex(ctx, cmd) if pErr != nil { + log.Warningf(ctx, "failed to repropose with new lease index: %s", pErr) cmd.response.Err = pErr } else { // Unbind the entry's local proposal because we just succeeded @@ -207,13 +209,33 @@ func (r *Replica) tryReproposeWithNewLeaseIndex( // succeeding in the Raft log for a given command. return nil } + + minTS, untrack := r.store.cfg.ClosedTimestamp.Tracker.Track(ctx) + defer untrack(ctx, 0, 0, 0) // covers all error paths below + // NB: p.Request.Timestamp reflects the action of ba.SetActiveTimestamp. + if p.Request.Timestamp.Less(minTS) { + // The tracker wants us to forward the request timestamp, but we can't + // do that without re-evaluating, so give up. The error returned here + // will go to back to DistSender, so send something it can digest. + lhErr := roachpb.NewError(newNotLeaseHolderError( + r.mu.state.Lease, + r.store.StoreID(), + r.mu.state.Desc, + )) + + return lhErr + } // Some tests check for this log message in the trace. log.VEventf(ctx, 2, "retry: proposalIllegalLeaseIndex") + maxLeaseIndex, pErr := r.propose(ctx, p) if pErr != nil { - log.Warningf(ctx, "failed to repropose with new lease index: %s", pErr) return pErr } + // NB: The caller already promises that the lease check succeeded, meaning + // the sequence numbers match, implying that the lease epoch hasn't changed + // from what it was under the proposal-time lease. + untrack(ctx, ctpb.Epoch(r.mu.state.Lease.Epoch), r.RangeID, ctpb.LAI(maxLeaseIndex)) log.VEventf(ctx, 2, "reproposed command %x at maxLeaseIndex=%d", cmd.idKey, maxLeaseIndex) return nil }