diff --git a/c-deps/libroach/mvcc.h b/c-deps/libroach/mvcc.h index 57043b0e7df4..46f5ea9b4f1d 100644 --- a/c-deps/libroach/mvcc.h +++ b/c-deps/libroach/mvcc.h @@ -261,7 +261,12 @@ template class mvccScanner { // values); this timestamp is prev_timestamp. const DBTimestamp prev_timestamp = timestamp_ < meta_timestamp ? timestamp_ : PrevTimestamp(meta_timestamp); - if (timestamp_ < meta_timestamp && !own_intent) { + // If we don't consider the txn's uncertainty interval then intents are only + // visible to it at or below the txn's read timestamp. However, if the txn's + // read timestamp is less than the max timestamp seen by the txn then intents + // are visible to the txn all the way up to its max timestamp. + const DBTimestamp max_visible_timestamp = check_uncertainty_ ? txn_max_timestamp_ : timestamp_; + if (max_visible_timestamp < meta_timestamp && !own_intent) { // 5. The key contains an intent, but we're reading before the // intent. Seek to the desired version. Note that if we own the // intent (i.e. we're reading transactionally) we want to read diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go index 0b026228f025..d822cbd1b456 100644 --- a/pkg/storage/engine/mvcc.go +++ b/pkg/storage/engine/mvcc.go @@ -867,18 +867,34 @@ func mvccGetInternal( timestamp = metaTimestamp.Prev() } - ownIntent := IsIntentOf(meta, txn) // false if txn == nil - if !timestamp.Less(metaTimestamp) && meta.Txn != nil && !ownIntent { + checkUncertainty := txn != nil && timestamp.Less(txn.MaxTimestamp) + isIntent := meta.Txn != nil + ownIntent := IsIntentOf(meta, txn) // false if !isIntent + if isIntent && !ownIntent { // Trying to read the last value, but it's another transaction's intent; - // the reader will have to act on this. - return nil, nil, safeValue, &roachpb.WriteIntentError{ - Intents: []roachpb.Intent{{Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn}}, + // the reader will have to act on this if the intent has a low enough + // timestamp. + // + // If we don't consider the reading transaction's uncertainty interval + // then intents are only visible to it at or below the read timestamp. + // However, if the reader transaction's read timestamp is less than the + // max timestamp seen by the transaction then intents are visible to the + // transaction all the way up to its max timestamp. + maxVisibleTimestamp := timestamp + if checkUncertainty { + maxVisibleTimestamp.Forward(txn.MaxTimestamp) + } + if !maxVisibleTimestamp.Less(metaTimestamp) { + return nil, nil, safeValue, &roachpb.WriteIntentError{ + Intents: []roachpb.Intent{{ + Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn, + }}, + } } } - var checkValueTimestamp bool seekKey := metaKey - + checkValueTimestamp := false if !timestamp.Less(metaTimestamp) || ownIntent { // We are reading the latest value, which is either an intent written // by this transaction or not an intent at all (so there's no @@ -907,7 +923,7 @@ func mvccGetInternal( seekKey.Timestamp = metaTimestamp.Prev() } } - } else if txn != nil && timestamp.Less(txn.MaxTimestamp) { + } else if checkUncertainty { // In this branch, the latest timestamp is ahead, and so the read of an // "old" value in a transactional context at time (timestamp, MaxTimestamp] // occurs, leading to a clock uncertainty error if a version exists in diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go index 12382da28628..a7b8e51f64da 100644 --- a/pkg/storage/engine/mvcc_test.go +++ b/pkg/storage/engine/mvcc_test.go @@ -802,90 +802,198 @@ func TestMVCCGetUncertainty(t *testing.T) { engine := createTestEngine() defer engine.Close() + // Txn with read timestamp 7 and MaxTimestamp 10. txn := &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ ID: uuid.MakeV4(), - Timestamp: hlc.Timestamp{WallTime: 5}, + Timestamp: hlc.Timestamp{WallTime: 7}, }, MaxTimestamp: hlc.Timestamp{WallTime: 10}, } - // Put a value from the past. + getOptsTxn := MVCCGetOptions{Txn: txn} + scanOptsTxn := MVCCScanOptions{Txn: txn} + + // Same txn but with a lower MaxTimestamp at 7. + txnWithLowerMaxTS := txn.Clone() + txnWithLowerMaxTS.MaxTimestamp = hlc.Timestamp{WallTime: 7} + getOptsTxnWithLowerMaxTS := MVCCGetOptions{Txn: txnWithLowerMaxTS} + scanOptsTxnWithLowerMaxTS := MVCCScanOptions{Txn: txnWithLowerMaxTS} + + // Case 1: One value in the past, one value in the future of read + // and ahead of MaxTimestamp of read. Neither should interfere. + // + // ----------------- + // - 12: val2 + // | + // - 10: max timestamp + // | + // - 7: read timestamp + // | + // - 1: val1 + // ----------------- if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil { t.Fatal(err) } - // Put a value that is ahead of MaxTimestamp, it should not interfere. if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 12}, value2, nil); err != nil { t.Fatal(err) } // Read with transaction, should get a value back. - val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }) - if err != nil { + if val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, getOptsTxn); err != nil { t.Fatal(err) + } else if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) { + t.Fatalf("wanted %q, got %v", value1.RawBytes, val) } - if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) { + if kvs, _, _, err := MVCCScan( + ctx, engine, testKey1, testKey1.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, + ); err != nil { + t.Fatal(err) + } else if len(kvs) != 1 { + t.Fatalf("wanted 1 kv, got %d", len(kvs)) + } else if val := kvs[0].Value; !bytes.Equal(val.RawBytes, value1.RawBytes) { t.Fatalf("wanted %q, got %v", value1.RawBytes, val) } - // Now using testKey2. - // Put a value that conflicts with MaxTimestamp. + // Case 2a: One value in the future of read but below MaxTimestamp + // of read. Should result in a ReadWithinUncertaintyIntervalError + // when reading. + // + // ----------------- + // - 10: max timestamp + // - 9: val2 + // | + // - 7: read timestamp + // ----------------- if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { t.Fatal(err) } // Read with transaction, should get error back. - if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err == nil { + if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } if _, _, _, err := MVCCScan( - ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, ); err == nil { t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } - // Adjust MaxTimestamp and retry. - txn.MaxTimestamp = hlc.Timestamp{WallTime: 7} - if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err != nil { + // Case 2b: Reduce MaxTimestamp below value in future. Value should + // no longer interfere when reading. + // + // ----------------- + // - 9: val2 + // | + // - 7: read/max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxnWithLowerMaxTS); err != nil { t.Fatal(err) } if _, _, _, err := MVCCScan( - ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnWithLowerMaxTS, ); err != nil { t.Fatal(err) } - txn.MaxTimestamp = hlc.Timestamp{WallTime: 10} - // Now using testKey3. - // Put a value that conflicts with MaxTimestamp and another write further - // ahead and not conflicting any longer. The first write should still ruin - // it. - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { - t.Fatal(err) + // Case 3a: One intent in the future of read but below MaxTimestamp + // of read. Should result in a WriteIntentError when reading. + // + // ----------------- + // - 10: max timestamp + // - 9: val2 (intent) + // | + // - 7: read timestamp + // ----------------- + txn2 := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + ID: uuid.MakeV4(), + Timestamp: hlc.Timestamp{WallTime: 9}, + }, + OrigTimestamp: hlc.Timestamp{WallTime: 9}, } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 99}, value2, nil); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, txn2); err != nil { t.Fatal(err) } + // Read with transaction, should get error back. + if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } if _, _, _, err := MVCCScan( - ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, ); err == nil { t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } + // Case 3b: Reduce MaxTimestamp below intent in future. Intent should + // no longer interfere when reading. + // + // ----------------- + // - 9: val2 (intent) + // | + // - 7: read/max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxnWithLowerMaxTS); err != nil { + t.Fatal(err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnWithLowerMaxTS, + ); err != nil { + t.Fatal(err) + } + + // Case 4a: Two values in future of read. One is ahead of + // MaxTimestamp of read and one is below MaxTimestamp of read. The + // value within the read's uncertainty interval should result in a + // ReadWithinUncertaintyIntervalError when reading. + // + // ----------------- + // - 99: val3 + // | + // - 10: max timestamp + // - 9: val2 + // | + // - 7: read timestamp + // ----------------- + if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { + t.Fatal(err) + } + if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 99}, value3, nil); err != nil { + t.Fatal(err) + } + if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { + t.Fatalf("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } - if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err == nil { - t.Fatalf("wanted an error") + if _, _, _, err := MVCCScan( + ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, + ); err == nil { + t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } + // Case 4b: Reduce MaxTimestamp below second value in future. Value should + // no longer interfere when reading. + // + // ----------------- + // - 99: val3 + // | + // - 9: val2 + // | + // - 7: read/max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxnWithLowerMaxTS); err != nil { + t.Fatal(err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnWithLowerMaxTS, + ); err != nil { + t.Fatal(err) + } }) } }