Skip to content

Commit

Permalink
storage: Don't swallow ErrEpochAlreadyIncremented
Browse files Browse the repository at this point in the history
IncrementEpoch was failing to distinguish between the request that
caused the increment and another caller making the same increment.
This is incorrect since a successful increment tells you *when* the
node was confirmed dead, while relying on another node's increment
leaves this uncertain.

In rare cases involving a badly overloaded cluster, this could result
in inconsistencies (non-serializable transactions) due to incorrect
timestamp cache management.

Fixes #35986

Release note (bug fix): Fix a rare inconsistency that could occur on
badly overloaded clusters.
  • Loading branch information
bdarnell committed Apr 19, 2019
1 parent b47269e commit 8de884a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 10 deletions.
12 changes: 6 additions & 6 deletions pkg/storage/node_liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ var (
// the underlying liveness record has had its epoch incremented.
ErrEpochIncremented = errors.New("heartbeat failed on epoch increment")

// ErrEpochAlreadyIncremented is returned by IncrementEpoch when
// someone else has already incremented the epoch to the desired
// value.
ErrEpochAlreadyIncremented = errors.New("epoch already incremented")

errLiveClockNotLive = errors.New("not live")
)

Expand Down Expand Up @@ -685,8 +690,6 @@ func (nl *NodeLiveness) getLivenessLocked(nodeID roachpb.NodeID) (*storagepb.Liv
return nil, ErrNoLivenessRecord
}

var errEpochAlreadyIncremented = errors.New("epoch already incremented")

// IncrementEpoch is called to increment the current liveness epoch,
// thereby invalidating anything relying on the liveness of the
// previous epoch. This method does a conditional put on the node
Expand Down Expand Up @@ -714,15 +717,12 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness *storagepb.
if err := nl.updateLiveness(ctx, update, liveness, func(actual storagepb.Liveness) error {
defer nl.maybeUpdate(actual)
if actual.Epoch > liveness.Epoch {
return errEpochAlreadyIncremented
return ErrEpochAlreadyIncremented
} else if actual.Epoch < liveness.Epoch {
return errors.Errorf("unexpected liveness epoch %d; expected >= %d", actual.Epoch, liveness.Epoch)
}
return errors.Errorf("mismatch incrementing epoch for %+v; actual is %+v", *liveness, actual)
}); err != nil {
if err == errEpochAlreadyIncremented {
return nil
}
return err
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/storage/node_liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ func TestNodeLivenessEpochIncrement(t *testing.T) {
t.Errorf("expected epoch increment == 1; got %d", c)
}

// Verify noop on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil {
// Verify error on incrementing an already-incremented epoch.
if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("unexpected error incrementing a non-live node: %s", err)
}

Expand Down Expand Up @@ -610,7 +610,7 @@ func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) {
}()
}
for i := 0; i < concurrency; i++ {
if err := <-errCh; err != nil {
if err := <-errCh; err != nil && err != storage.ErrEpochAlreadyIncremented {
t.Fatalf("concurrent increment epoch %d failed: %s", i, err)
}
}
Expand Down
33 changes: 32 additions & 1 deletion pkg/storage/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,38 @@ func (p *pendingLeaseRequest) requestLeaseAsync(
log.Info(ctx, err)
}
} else if err = p.repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.Liveness); err != nil {
log.Error(ctx, err)
// If we get ErrEpochAlreadyIncremented, someone else beat
// us to it. This proves that the target node is truly
// dead *now*, but it doesn't prove that it was dead at
// status.Timestamp (which we've encoded into our lease
// request). It's possible that the node was temporarily
// considered dead but revived without having its epoch
// incremented, i.e. that it was in fact live at
// status.Timestamp.
//
// It would be incorrect to simply proceed to sending our
// lease request since our lease.Start may precede the
// effective end timestamp of the predecessor lease (the
// expiration of the last successful heartbeat before the
// epoch increment), and so under this lease this node's
// timestamp cache would not necessarily reflect all reads
// served by the prior leaseholder.
//
// It would be correct to bump the timestamp in the lease
// request and proceed, but that just sets up another race
// between this node and the one that already incremented
// the epoch. They're probably going to beat us this time
// too, so just return the NotLeaseHolderError here
// instead of trying to fix up the timestamps and submit
// the lease request.
//
// ErrEpochAlreadyIncremented is not an unusual situation,
// so we don't log it as an error.
//
// https://github.com/cockroachdb/cockroach/issues/35986
if err != ErrEpochAlreadyIncremented {
log.Error(ctx, err)
}
}
}
// Set error for propagation to all waiters below.
Expand Down

0 comments on commit 8de884a

Please sign in to comment.