Skip to content

Commit

Permalink
Merge #36942
Browse files Browse the repository at this point in the history
36942: storage: Don't swallow ErrEpochAlreadyIncremented r=tbg a=bdarnell

IncrementEpoch was failing to distinguish between the request that
caused the increment and another caller making the same increment.
This is incorrect since a successful increment tells you *when* the
node was confirmed dead, while relying on another node's increment
leaves this uncertain.

In rare cases involving a badly overloaded cluster, this could result
in inconsistencies (non-serializable transactions) due to incorrect
timestamp cache management.

Fixes #35986

Release note (bug fix): Fix a rare inconsistency that could occur on
badly overloaded clusters.

Co-authored-by: Ben Darnell <ben@bendarnell.com>
  • Loading branch information
craig[bot] and bdarnell committed Apr 19, 2019
2 parents 80c5b09 + 8de884a commit 4e60001
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 10 deletions.
12 changes: 6 additions & 6 deletions pkg/storage/node_liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ var (
// the underlying liveness record has had its epoch incremented.
ErrEpochIncremented = errors.New("heartbeat failed on epoch increment")

// ErrEpochAlreadyIncremented is returned by IncrementEpoch when
// someone else has already incremented the epoch to the desired
// value.
ErrEpochAlreadyIncremented = errors.New("epoch already incremented")

errLiveClockNotLive = errors.New("not live")
)

Expand Down Expand Up @@ -685,8 +690,6 @@ func (nl *NodeLiveness) getLivenessLocked(nodeID roachpb.NodeID) (*storagepb.Liv
return nil, ErrNoLivenessRecord
}

var errEpochAlreadyIncremented = errors.New("epoch already incremented")

// IncrementEpoch is called to increment the current liveness epoch,
// thereby invalidating anything relying on the liveness of the
// previous epoch. This method does a conditional put on the node
Expand Down Expand Up @@ -714,15 +717,12 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness *storagepb.
if err := nl.updateLiveness(ctx, update, liveness, func(actual storagepb.Liveness) error {
defer nl.maybeUpdate(actual)
if actual.Epoch > liveness.Epoch {
return errEpochAlreadyIncremented
return ErrEpochAlreadyIncremented
} else if actual.Epoch < liveness.Epoch {
return errors.Errorf("unexpected liveness epoch %d; expected >= %d", actual.Epoch, liveness.Epoch)
}
return errors.Errorf("mismatch incrementing epoch for %+v; actual is %+v", *liveness, actual)
}); err != nil {
if err == errEpochAlreadyIncremented {
return nil
}
return err
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/storage/node_liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ func TestNodeLivenessEpochIncrement(t *testing.T) {
t.Errorf("expected epoch increment == 1; got %d", c)
}

// Verify noop on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil {
// Verify error on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("unexpected error incrementing a non-live node: %s", err)
}

Expand Down Expand Up @@ -610,7 +610,7 @@ func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) {
}()
}
for i := 0; i < concurrency; i++ {
if err := <-errCh; err != nil {
if err := <-errCh; err != nil && err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("concurrent increment epoch %d failed: %s", i, err)
}
}
Expand Down
33 changes: 32 additions & 1 deletion pkg/storage/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,38 @@ func (p *pendingLeaseRequest) requestLeaseAsync(
log.Info(ctx, err)
}
} else if err = p.repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.Liveness); err != nil {
log.Error(ctx, err)
// If we get ErrEpochAlreadyIncremented, someone else beat
// us to it. This proves that the target node is truly
// dead *now*, but it doesn't prove that it was dead at
// status.Timestamp (which we've encoded into our lease
// request). It's possible that the node was temporarily
// considered dead but revived without having its epoch
// incremented, i.e. that it was in fact live at
// status.Timestamp.
//
// It would be incorrect to simply proceed to sending our
// lease request since our lease.Start may precede the
// effective end timestamp of the predecessor lease (the
// expiration of the last successful heartbeat before the
// epoch increment), and so under this lease this node's
// timestamp cache would not necessarily reflect all reads
// served by the prior leaseholder.
//
// It would be correct to bump the timestamp in the lease
// request and proceed, but that just sets up another race
// between this node and the one that already incremented
// the epoch. They're probably going to beat us this time
// too, so just return the NotLeaseHolderError here
// instead of trying to fix up the timestamps and submit
// the lease request.
//
// ErrEpochAlreadyIncremented is not an unusual situation,
// so we don't log it as an error.
//
// https://github.com/cockroachdb/cockroach/issues/35986
if err != ErrEpochAlreadyIncremented {
log.Error(ctx, err)
}
}
}
// Set error for propagation to all waiters below.
Expand Down

0 comments on commit 4e60001

Please sign in to comment.