Skip to content

Commit

Permalink
storage/engine: return WriteIntentError for intents in uncertainty in…
Browse files Browse the repository at this point in the history
…tervals

This commit fixes the most common failure case in all of the following Jepsen
failures. I'm not closing them here though, because they also should be failing
due to #36431.
Would fix #37394.
Would fix #37545.
Would fix #37930.
Would fix #37932.
Would fix #37956.
Would fix #38126.
Would fix #38763.

Before this fix, we were not considering intents in a scan's uncertainty
interval to be uncertain. This had the potential to cause stale reads because
an unresolved intent doesn't indicate that its transaction hasn’t been committed
and isn’t a causal ancestor of the scan. This was causing the `jepsen/multi-register`
tests to fail, which I had previously incorrectly attributed entirely to #36431.

This commit fixes this by returning `WriteIntentError`s for intents when they
are above the read timestamp of a scan but below the max timestamp of a scan.
This could have also been fixed by returning `ReadWithinUncertaintyIntervalError`s
in this situation. Both would eventually have the same effect, but it seems
preferable to kick off concurrency control immediately in this situation and
only fall back to uncertainty handling for committed values. If the intent
ends up being aborted, this could allow the read to avoid moving its timestamp.

This commit will need to be backported all the way back to v2.0.

Release note (bug fix): Consider intents in a read's uncertainty
interval to be uncertain just as if they were committed values. This
removes the potential for stale reads when a causally dependent
transaction runs into the not-yet resolved intents from a causal
ancestor.
  • Loading branch information
nvanbenschoten committed Sep 9, 2019
1 parent 82de988 commit d20419d
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 41 deletions.
5 changes: 4 additions & 1 deletion c-deps/libroach/mvcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,10 @@ template <bool reverse> class mvccScanner {
// values); this timestamp is prev_timestamp.
const DBTimestamp prev_timestamp =
timestamp_ < meta_timestamp ? timestamp_ : PrevTimestamp(meta_timestamp);
if (timestamp_ < meta_timestamp && !own_intent) {
// Intents for other transactions are visible at or below:
// max(txn.max_timestamp, read_timestamp)
const DBTimestamp max_visible_timestamp = check_uncertainty_ ? txn_max_timestamp_ : timestamp_;
if (max_visible_timestamp < meta_timestamp && !own_intent) {
// 5. The key contains an intent, but we're reading before the
// intent. Seek to the desired version. Note that if we own the
// intent (i.e. we're reading transactionally) we want to read
Expand Down
29 changes: 20 additions & 9 deletions pkg/storage/engine/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -867,18 +867,29 @@ func mvccGetInternal(
timestamp = metaTimestamp.Prev()
}

ownIntent := IsIntentOf(meta, txn) // false if txn == nil
if !timestamp.Less(metaTimestamp) && meta.Txn != nil && !ownIntent {
// Trying to read the last value, but it's another transaction's intent;
// the reader will have to act on this.
return nil, nil, safeValue, &roachpb.WriteIntentError{
Intents: []roachpb.Intent{{Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn}},
checkUncertainty := txn != nil && timestamp.Less(txn.MaxTimestamp)
isIntent := meta.Txn != nil
ownIntent := IsIntentOf(meta, txn) // false if !isIntent
if isIntent && !ownIntent {
// Trying to read the last value, but it's another transaction's intent.
// The reader will have to act on this if the intent has a low enough
// timestamp. Intents for other transactions are visible at or below:
// max(txn.MaxTimestamp, timestamp)
maxVisibleTimestamp := timestamp
if checkUncertainty {
maxVisibleTimestamp = txn.MaxTimestamp
}
if !maxVisibleTimestamp.Less(metaTimestamp) {
return nil, nil, safeValue, &roachpb.WriteIntentError{
Intents: []roachpb.Intent{{
Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn,
}},
}
}
}

var checkValueTimestamp bool
seekKey := metaKey

checkValueTimestamp := false
if !timestamp.Less(metaTimestamp) || ownIntent {
// We are reading the latest value, which is either an intent written
// by this transaction or not an intent at all (so there's no
Expand Down Expand Up @@ -907,7 +918,7 @@ func mvccGetInternal(
seekKey.Timestamp = metaTimestamp.Prev()
}
}
} else if txn != nil && timestamp.Less(txn.MaxTimestamp) {
} else if checkUncertainty {
// In this branch, the latest timestamp is ahead, and so the read of an
// "old" value in a transactional context at time (timestamp, MaxTimestamp]
// occurs, leading to a clock uncertainty error if a version exists in
Expand Down
240 changes: 209 additions & 31 deletions pkg/storage/engine/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,90 +802,268 @@ func TestMVCCGetUncertainty(t *testing.T) {
engine := createTestEngine()
defer engine.Close()

// Txn with read timestamp 7 and MaxTimestamp 10.
txn := &roachpb.Transaction{
TxnMeta: enginepb.TxnMeta{
ID: uuid.MakeV4(),
Timestamp: hlc.Timestamp{WallTime: 5},
Timestamp: hlc.Timestamp{WallTime: 7},
},
MaxTimestamp: hlc.Timestamp{WallTime: 10},
}
// Put a value from the past.
getOptsTxn := MVCCGetOptions{Txn: txn}
scanOptsTxn := MVCCScanOptions{Txn: txn}

// Same txn but with a MaxTimestamp reduced to 9.
txnMaxTS9 := txn.Clone()
txnMaxTS9.MaxTimestamp = hlc.Timestamp{WallTime: 9}
getOptsTxnMaxTS9 := MVCCGetOptions{Txn: txnMaxTS9}
scanOptsTxnMaxTS9 := MVCCScanOptions{Txn: txnMaxTS9}

// Same txn but with a MaxTimestamp reduced to 7.
txnMaxTS7 := txn.Clone()
txnMaxTS7.MaxTimestamp = hlc.Timestamp{WallTime: 7}
getOptsTxnMaxTS7 := MVCCGetOptions{Txn: txnMaxTS7}
scanOptsTxnMaxTS7 := MVCCScanOptions{Txn: txnMaxTS7}

// Case 1: One value in the past, one value in the future of read
// and ahead of MaxTimestamp of read. Neither should interfere.
//
// -----------------
// - 12: val2
// |
// - 10: max timestamp
// |
// - 7: read timestamp
// |
// - 1: val1
// -----------------
if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil {
t.Fatal(err)
}
// Put a value that is ahead of MaxTimestamp, it should not interfere.
if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 12}, value2, nil); err != nil {
t.Fatal(err)
}
// Read with transaction, should get a value back.
val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{
Txn: txn,
})
if err != nil {
if val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, getOptsTxn); err != nil {
t.Fatal(err)
} else if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) {
t.Fatalf("wanted %q, got %v", value1.RawBytes, val)
}
if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) {
if kvs, _, _, err := MVCCScan(
ctx, engine, testKey1, testKey1.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn,
); err != nil {
t.Fatal(err)
} else if len(kvs) != 1 {
t.Fatalf("wanted 1 kv, got %d", len(kvs))
} else if val := kvs[0].Value; !bytes.Equal(val.RawBytes, value1.RawBytes) {
t.Fatalf("wanted %q, got %v", value1.RawBytes, val)
}

// Now using testKey2.
// Put a value that conflicts with MaxTimestamp.
// Case 2a: One value in the future of read but below MaxTimestamp
// of read. Should result in a ReadWithinUncertaintyIntervalError
// when reading.
//
// -----------------
// - 10: max timestamp
// - 9: val2
// |
// - 7: read timestamp
// -----------------
if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil {
t.Fatal(err)
}
// Read with transaction, should get error back.
if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{
Txn: txn,
}); err == nil {
if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn},
ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
// Adjust MaxTimestamp and retry.
txn.MaxTimestamp = hlc.Timestamp{WallTime: 7}
if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{
Txn: txn,
}); err != nil {
// Case 2b: Reduce MaxTimestamp to exactly that of value in future.
// Should result in a ReadWithinUncertaintyIntervalError when
// reading.
//
// -----------------
// - 9: val2 & max timestamp
// |
// - 7: read timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
// Case 2c: Reduce MaxTimestamp below value in future. Value should
// no longer interfere when reading.
//
// -----------------
// - 9: val2
// |
// - 7: read timestamp & max timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil {
t.Fatal(err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7,
); err != nil {
t.Fatal(err)
}

// Case 3a: One intent in the future of read but below MaxTimestamp
// of read. Should result in a WriteIntentError when reading.
//
// -----------------
// - 10: max timestamp
// - 9: val2 (intent)
// |
// - 7: read timestamp
// -----------------
intentTxn := &roachpb.Transaction{
TxnMeta: enginepb.TxnMeta{
ID: uuid.MakeV4(),
Timestamp: hlc.Timestamp{WallTime: 9},
},
OrigTimestamp: hlc.Timestamp{WallTime: 9},
}
if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, intentTxn); err != nil {
t.Fatal(err)
}
// Read with transaction, should get error back.
if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.WriteIntentError); !ok {
t.Fatalf("wanted a WriteIntentError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.WriteIntentError); !ok {
t.Fatalf("wanted a WriteIntentError, got %+v", err)
}
// Case 3b: Reduce MaxTimestamp to exactly that of intent in future.
// Should result in a WriteIntentError when reading.
//
// -----------------
// - 9: val2 (intent) & max timestamp
// |
// - 7: read timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.WriteIntentError); !ok {
t.Fatalf("wanted a WriteIntentError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.WriteIntentError); !ok {
t.Fatalf("wanted a WriteIntentError, got %+v", err)
}
// Case 3c: Reduce MaxTimestamp below intent in future. Intent should
// no longer interfere when reading.
//
// -----------------
// - 9: val2 (intent)
// |
// - 7: read timestamp & max timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil {
t.Fatal(err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn},
ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7,
); err != nil {
t.Fatal(err)
}

txn.MaxTimestamp = hlc.Timestamp{WallTime: 10}
// Now using testKey3.
// Put a value that conflicts with MaxTimestamp and another write further
// ahead and not conflicting any longer. The first write should still ruin
// it.
if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil {
// Case 4a: Two values in future of read. One is ahead of
// MaxTimestamp of read and one is below MaxTimestamp of read. The
// value within the read's uncertainty interval should result in a
// ReadWithinUncertaintyIntervalError when reading.
//
// -----------------
// - 99: val3
// |
// - 10: max timestamp
// - 9: val2
// |
// - 7: read timestamp
// -----------------
if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil {
t.Fatal(err)
}
if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 99}, value2, nil); err != nil {
if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 99}, value3, nil); err != nil {
t.Fatal(err)
}
if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil {
t.Fatalf("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn},
ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{
Txn: txn,
}); err == nil {
// Case 4b: Reduce MaxTimestamp to exactly that of second value in
// future. The value within the read's uncertainty interval should
// result in a ReadWithinUncertaintyIntervalError when reading.
//
// -----------------
// - 99: val3
// |
// - 9: val2 & max timestamp
// |
// - 7: read timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil {
t.Fatalf("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9,
); err == nil {
t.Fatal("wanted an error")
} else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok {
t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err)
}
// Case 4c: Reduce MaxTimestamp below second value in future. Value should
// no longer interfere when reading.
//
// -----------------
// - 99: val3
// |
// - 9: val2
// |
// - 7: read timestamp & max timestamp
// -----------------
if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil {
t.Fatal(err)
}
if _, _, _, err := MVCCScan(
ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7,
); err != nil {
t.Fatal(err)
}
})
}
}
Expand Down

0 comments on commit d20419d

Please sign in to comment.