Skip to content

Commit

Permalink
kv: promote expiration-based lease to epoch without sequence number c…
Browse files Browse the repository at this point in the history
…hange

Fixes cockroachdb#117630.
Fixes cockroachdb#90656.
Fixes cockroachdb#98553.

Informs cockroachdb#61986.
Informs cockroachdb#115191.

This commit updates the post-lease transfer promotion of expiration-based leases
to epoch-based leases to not change the sequence number of the lease. This
avoids invalidating all requests proposed under the original expiration-based
lease, which can lead to `RETRY_ASYNC_WRITE_FAILURE` errors.

The change accomplishes this by updating the `Lease.Equivalent` method to
consider an expiration-based lease to be equivalent to an epoch-based lease that
is held by the same replica and has the same start time. Doing so requires some
care, because lease equivalency is checked below Raft and needs to remain
deterministic across binary versions.

This change requires a cluster version check, so it cannot be backported.

Release note (bug fix): Improved an interaction during range lease transfers
which could previously cause `RETRY_ASYNC_WRITE_FAILURE` errors to be returned
to clients.
  • Loading branch information
nvanbenschoten committed Jan 16, 2024
1 parent f46f8e9 commit 54ebbac
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 25 deletions.
4 changes: 3 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
Expand Down Expand Up @@ -101,8 +102,9 @@ func evalNewLease(
Message: "sequence number should not be set",
}
}
isV24_1 := rec.ClusterSettings().Version.IsActive(ctx, clusterversion.V24_1Start)
var priorReadSum *rspb.ReadSummary
if prevLease.Equivalent(lease) {
if prevLease.Equivalent(lease, isV24_1 /* expToEpochEquiv */) {
// If the proposed lease is equivalent to the previous lease, it is
// given the same sequence number. This is subtle, but is important
// to ensure that leases which are meant to be considered the same
Expand Down
11 changes: 8 additions & 3 deletions pkg/kv/kvserver/client_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1557,12 +1557,17 @@ func TestLeaseTransfersUseExpirationLeasesAndBumpToEpochBasedOnes(t *testing.T)
})

// Expect it to be upgraded to an epoch based lease.
tc.WaitForLeaseUpgrade(ctx, t, desc)
epochL := tc.WaitForLeaseUpgrade(ctx, t, desc)
require.Equal(t, roachpb.LeaseEpoch, epochL.Type())

// Expect it to have been upgraded from an expiration based lease.
mu.Lock()
defer mu.Unlock()
require.Equal(t, roachpb.LeaseExpiration, mu.lease.Type())
expirationL := mu.lease
mu.Unlock()
require.Equal(t, roachpb.LeaseExpiration, expirationL.Type())

// Expect the two leases to have the same sequence number.
require.Equal(t, expirationL.Sequence, epochL.Sequence)
}

// TestLeaseRequestBumpsEpoch tests that a non-cooperative lease acquisition of
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/client_replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1639,7 +1639,7 @@ func TestLeaseExpirationBasedRangeTransfer(t *testing.T) {
t.Fatal(err)
}
newLease, _ := l.replica0.GetLease()
if !origLease.Equivalent(newLease) {
if !origLease.Equivalent(newLease, true /* expToEpochEquiv */) {
t.Fatalf("original lease %v and new lease %v not equivalent", origLease, newLease)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvserver/kvserverbase/forced_error.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ func CheckForcedErr(
if replicaState.Lease.Sequence == requestedLease.Sequence {
// It is only possible for this to fail when expiration-based
// lease extensions are proposed concurrently.
leaseMismatch = !replicaState.Lease.Equivalent(requestedLease)
expToEpochEquiv := raftCmd.ReplicatedEvalResult.IsLeaseRequestWithExpirationToEpochEquivalent
leaseMismatch = !replicaState.Lease.Equivalent(requestedLease, expToEpochEquiv)
}

// This is a check to see if the lease we proposed this lease request
Expand Down
22 changes: 22 additions & 0 deletions pkg/kv/kvserver/kvserverpb/proposer_kv.proto
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,28 @@ message ReplicatedEvalResult {
Merge merge = 4;
ComputeChecksum compute_checksum = 21;
bool is_lease_request = 6;
// Set to true for lease requests in v24.1 clusters, where new behavior in
// Lease.Equivalent is supported which allows for expiration-based leases to
// be promoted to epoch-based leases under the same lease sequence.
//
// The field is used to avoid divergence between replicas of a range that
// apply lease requests while running different versions of the binary.
//
// Deprecation notes:
// - in v24.1, we introduced this field and set it to true for lease requests
// when the cluster version is detected to be v24.1 or higher. This ensures
// that all replicas who may apply the lease request will correctly handle
// the field.
// - in v24.2, we can set this field to true unconditionally for lease request
// proposals. It must still be consulted during lease application, because
// the raft proposal may have been performed by an older node.
// - in v24.2 or later, we run a Barrier migration to flush out old raft
// entries from all replica's raft logs. This ensures that no replica will
// ever apply a lease request with this field set to false.
// - in v25.1, we stop consulting this field below raft but keep setting it
// to true above raft for mixed-version compatibility with v24.2 nodes.
// - in v25.2, we delete the field.
bool is_lease_request_with_expiration_to_epoch_equivalent = 26;
bool is_probe = 23;
// The timestamp at which this command is writing. Used to verify the validity
// of the command against the GC threshold and to update the followers'
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/replica_application_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func clearTrivialReplicatedEvalResultFields(r *kvserverpb.ReplicatedEvalResult)
// they don't trigger an assertion at the end of the application process
// (which checks that all fields were handled).
r.IsLeaseRequest = false
r.IsLeaseRequestWithExpirationToEpochEquivalent = false
r.WriteTimestamp = hlc.Timestamp{}
r.PrevLeaseProposal = nil
// The state fields cleared here were already applied to the in-memory view of
Expand Down
18 changes: 17 additions & 1 deletion pkg/kv/kvserver/replica_proposal.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"path/filepath"
"time"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
Expand Down Expand Up @@ -348,7 +349,13 @@ func (r *Replica) leasePostApplyLocked(
// the same lease. This can happen when callers are using
// leasePostApply for some of its side effects, like with
// splitPostApply. It can also happen during lease extensions.
if !prevLease.Equivalent(*newLease) {
//
// NOTE: we pass true for expToEpochEquiv because we may be in a cluster
// where some node has detected the version upgrade and is considering
// this lease type promotion to be valid, even if our local node has not
// yet detected the upgrade. Passing true broadens the definition of
// equivalence and weakens the assertion.
if !prevLease.Equivalent(*newLease, true /* expToEpochEquiv */) {
log.Fatalf(ctx, "sequence identical for different leases, prevLease=%s, newLease=%s",
redact.Safe(prevLease), redact.Safe(newLease))
}
Expand Down Expand Up @@ -977,6 +984,15 @@ func (r *Replica) evaluateProposal(
// Set the proposal's replicated result, which contains metadata and
// side-effects that are to be replicated to all replicas.
res.Replicated.IsLeaseRequest = ba.IsSingleRequestLeaseRequest()
if res.Replicated.IsLeaseRequest {
// NOTE: this cluster version check may return true even after the
// corresponding check in evalNewLease has returned false. That's ok, as
// this check is used to inform raft about whether an expiration-based
// lease **can** be promoted to an epoch-based lease without a sequence
// change, not that it **is** being promoted without a sequence change.
isV24_1 := r.ClusterSettings().Version.IsActive(ctx, clusterversion.V24_1Start)
res.Replicated.IsLeaseRequestWithExpirationToEpochEquivalent = isV24_1
}
if ba.AppliesTimestampCache() {
res.Replicated.WriteTimestamp = ba.WriteTimestamp()
}
Expand Down
45 changes: 35 additions & 10 deletions pkg/roachpb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -1875,10 +1875,15 @@ func (l Lease) Speculative() bool {
// based leases, the start time of the lease is sufficient to
// avoid using an older lease with same epoch.
//
// expToEpochEquiv indicates whether an expiration-based lease
// can be considered equivalent to an epoch-based lease during
// a promotion from expiration-based to epoch-based. It is used
// for mixed-version compatibility.
//
// NB: Lease.Equivalent is NOT symmetric. For expiration-based
// leases, a lease is equivalent to another with an equal or
// later expiration, but not an earlier expiration.
func (l Lease) Equivalent(newL Lease) bool {
func (l Lease) Equivalent(newL Lease, expToEpochEquiv bool) bool {
// Ignore proposed timestamp & deprecated start stasis.
l.ProposedTS, newL.ProposedTS = nil, nil
l.DeprecatedStartStasis, newL.DeprecatedStartStasis = nil, nil
Expand Down Expand Up @@ -1907,15 +1912,35 @@ func (l Lease) Equivalent(newL Lease) bool {
l.Epoch, newL.Epoch = 0, 0
}
case LeaseExpiration:
// See the comment above, though this field's nullability wasn't
// changed. We nil it out for completeness only.
l.Epoch, newL.Epoch = 0, 0

// For expiration-based leases, extensions are considered equivalent.
// This is the one case where Equivalent is not commutative and, as
// such, requires special handling beneath Raft (see checkForcedErr).
if l.GetExpiration().LessEq(newL.GetExpiration()) {
l.Expiration, newL.Expiration = nil, nil
switch newL.Type() {
case LeaseEpoch:
// An expiration-based lease being promoted to an epoch-based lease. This
// transition occurs after a successful lease transfer if the setting
// kv.transfer_expiration_leases_first.enabled is enabled.
//
// Expiration-based leases carry a local expiration timestamp. Epoch-based
// leases store their expiration indirectly in NodeLiveness. We assume that
// this promotion is only proposed if the liveness expiration is later than
// previous expiration carried by the expiration-based lease.
//
// Ignore epoch and expiration. The remaining fields which are compared
// are Replica and Start.
if expToEpochEquiv {
l.Epoch, newL.Epoch = 0, 0
l.Expiration, newL.Expiration = nil, nil
}

case LeaseExpiration:
// See the comment above, though this field's nullability wasn't
// changed. We nil it out for completeness only.
l.Epoch, newL.Epoch = 0, 0

// For expiration-based leases, extensions are considered equivalent.
// This is one case where Equivalent is not commutative and, as such,
// requires special handling beneath Raft (see checkForcedErr).
if l.GetExpiration().LessEq(newL.GetExpiration()) {
l.Expiration, newL.Expiration = nil, nil
}
}
}
return l == newL
Expand Down
19 changes: 14 additions & 5 deletions pkg/roachpb/data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1070,8 +1070,12 @@ func TestLeaseEquivalence(t *testing.T) {
{expire1, expire2R2TS2, false}, // different expiration leases
{expire1, expire2, true}, // same expiration lease, extended
{expire2, expire1, false}, // same expiration lease, extended but backwards
{epoch1, expire1, false}, // epoch and expiration leases
{expire1, epoch1, false}, // expiration and epoch leases
{epoch1, expire1, false}, // epoch and expiration leases, same replica and start time
{epoch1, expire1R2, false}, // epoch and expiration leases, different replica
{epoch1, expire1TS2, false}, // epoch and expiration leases, different start time
{expire1, epoch1, true}, // expiration and epoch leases, same replica and start time
{expire1, epoch1R2, false}, // expiration and epoch leases, different replica
{expire1, epoch1TS2, false}, // expiration and epoch leases, different start time
{proposed1, proposed1, true}, // exact leases with identical timestamps
{proposed1, proposed2, false}, // same proposed timestamps, but diff epochs
{proposed1, proposed3, true}, // different proposed timestamps, same lease
Expand All @@ -1082,8 +1086,13 @@ func TestLeaseEquivalence(t *testing.T) {
}

for i, tc := range testCases {
if ok := tc.l.Equivalent(tc.ol); tc.expSuccess != ok {
t.Errorf("%d: expected success? %t; got %t", i, tc.expSuccess, ok)
// Test expToEpochEquiv = true.
require.Equal(t, tc.expSuccess, tc.l.Equivalent(tc.ol, true /* expToEpochEquiv */), "%d", i)
if tc.l == expire1 && tc.ol == epoch1 {
// The one case where expToEpochEquiv = false makes a difference.
require.Equal(t, !tc.expSuccess, tc.l.Equivalent(tc.ol, false /* expToEpochEquiv */), "%d", i)
} else {
require.Equal(t, tc.expSuccess, tc.l.Equivalent(tc.ol, false /* expToEpochEquiv */), "%d", i)
}
}

Expand All @@ -1105,7 +1114,7 @@ func TestLeaseEquivalence(t *testing.T) {
postPRLease.DeprecatedStartStasis = nil
postPRLease.Expiration = nil

if !postPRLease.Equivalent(prePRLease) || !prePRLease.Equivalent(postPRLease) {
if !postPRLease.Equivalent(prePRLease, true) || !prePRLease.Equivalent(postPRLease, true) {
t.Fatalf("leases not equivalent but should be despite diff(pre,post) = %s", pretty.Diff(prePRLease, postPRLease))
}
}
Expand Down
9 changes: 6 additions & 3 deletions pkg/testutils/testcluster/testcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1140,18 +1140,21 @@ func (tc *TestCluster) MaybeWaitForLeaseUpgrade(
// is upgraded to an epoch-based one.
func (tc *TestCluster) WaitForLeaseUpgrade(
ctx context.Context, t serverutils.TestFataler, desc roachpb.RangeDescriptor,
) {
) roachpb.Lease {
require.False(t, kvserver.ExpirationLeasesOnly.Get(&tc.Server(0).ClusterSettings().SV),
"cluster configured to only use expiration leases")
var l roachpb.Lease
testutils.SucceedsSoon(t, func() error {
li, _, err := tc.FindRangeLeaseEx(ctx, desc, nil)
require.NoError(t, err)
if li.Current().Type() != roachpb.LeaseEpoch {
l = li.Current()
if l.Type() != roachpb.LeaseEpoch {
return errors.Errorf("lease still an expiration based lease")
}
require.Equal(t, int64(1), li.Current().Epoch)
require.Equal(t, int64(1), l.Epoch)
return nil
})
return l
}

// RemoveLeaseHolderOrFatal is a convenience version of TransferRangeLease and RemoveVoter
Expand Down

0 comments on commit 54ebbac

Please sign in to comment.