diff --git a/c-deps/libroach/mvcc.h b/c-deps/libroach/mvcc.h index 57043b0e7df4..77aa2be07892 100644 --- a/c-deps/libroach/mvcc.h +++ b/c-deps/libroach/mvcc.h @@ -261,7 +261,10 @@ template class mvccScanner { // values); this timestamp is prev_timestamp. const DBTimestamp prev_timestamp = timestamp_ < meta_timestamp ? timestamp_ : PrevTimestamp(meta_timestamp); - if (timestamp_ < meta_timestamp && !own_intent) { + // Intents for other transactions are visible at or below: + // max(txn.max_timestamp, read_timestamp) + const DBTimestamp max_visible_timestamp = check_uncertainty_ ? txn_max_timestamp_ : timestamp_; + if (max_visible_timestamp < meta_timestamp && !own_intent) { // 5. The key contains an intent, but we're reading before the // intent. Seek to the desired version. Note that if we own the // intent (i.e. we're reading transactionally) we want to read diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go index 0b026228f025..b6b7f0b0f620 100644 --- a/pkg/storage/engine/mvcc.go +++ b/pkg/storage/engine/mvcc.go @@ -867,18 +867,29 @@ func mvccGetInternal( timestamp = metaTimestamp.Prev() } - ownIntent := IsIntentOf(meta, txn) // false if txn == nil - if !timestamp.Less(metaTimestamp) && meta.Txn != nil && !ownIntent { - // Trying to read the last value, but it's another transaction's intent; - // the reader will have to act on this. - return nil, nil, safeValue, &roachpb.WriteIntentError{ - Intents: []roachpb.Intent{{Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn}}, + checkUncertainty := txn != nil && timestamp.Less(txn.MaxTimestamp) + isIntent := meta.Txn != nil + ownIntent := IsIntentOf(meta, txn) // false if !isIntent + if isIntent && !ownIntent { + // Trying to read the last value, but it's another transaction's intent. + // The reader will have to act on this if the intent has a low enough + // timestamp. Intents for other transactions are visible at or below: + // max(txn.MaxTimestamp, timestamp) + maxVisibleTimestamp := timestamp + if checkUncertainty { + maxVisibleTimestamp = txn.MaxTimestamp + } + if !maxVisibleTimestamp.Less(metaTimestamp) { + return nil, nil, safeValue, &roachpb.WriteIntentError{ + Intents: []roachpb.Intent{{ + Span: roachpb.Span{Key: metaKey.Key}, Status: roachpb.PENDING, Txn: *meta.Txn, + }}, + } } } - var checkValueTimestamp bool seekKey := metaKey - + checkValueTimestamp := false if !timestamp.Less(metaTimestamp) || ownIntent { // We are reading the latest value, which is either an intent written // by this transaction or not an intent at all (so there's no @@ -907,7 +918,7 @@ func mvccGetInternal( seekKey.Timestamp = metaTimestamp.Prev() } } - } else if txn != nil && timestamp.Less(txn.MaxTimestamp) { + } else if checkUncertainty { // In this branch, the latest timestamp is ahead, and so the read of an // "old" value in a transactional context at time (timestamp, MaxTimestamp] // occurs, leading to a clock uncertainty error if a version exists in diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go index 12382da28628..8dbb45be83e0 100644 --- a/pkg/storage/engine/mvcc_test.go +++ b/pkg/storage/engine/mvcc_test.go @@ -802,90 +802,268 @@ func TestMVCCGetUncertainty(t *testing.T) { engine := createTestEngine() defer engine.Close() + // Txn with read timestamp 7 and MaxTimestamp 10. txn := &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ ID: uuid.MakeV4(), - Timestamp: hlc.Timestamp{WallTime: 5}, + Timestamp: hlc.Timestamp{WallTime: 7}, }, MaxTimestamp: hlc.Timestamp{WallTime: 10}, } - // Put a value from the past. + getOptsTxn := MVCCGetOptions{Txn: txn} + scanOptsTxn := MVCCScanOptions{Txn: txn} + + // Same txn but with a MaxTimestamp reduced to 9. + txnMaxTS9 := txn.Clone() + txnMaxTS9.MaxTimestamp = hlc.Timestamp{WallTime: 9} + getOptsTxnMaxTS9 := MVCCGetOptions{Txn: txnMaxTS9} + scanOptsTxnMaxTS9 := MVCCScanOptions{Txn: txnMaxTS9} + + // Same txn but with a MaxTimestamp reduced to 7. + txnMaxTS7 := txn.Clone() + txnMaxTS7.MaxTimestamp = hlc.Timestamp{WallTime: 7} + getOptsTxnMaxTS7 := MVCCGetOptions{Txn: txnMaxTS7} + scanOptsTxnMaxTS7 := MVCCScanOptions{Txn: txnMaxTS7} + + // Case 1: One value in the past, one value in the future of read + // and ahead of MaxTimestamp of read. Neither should interfere. + // + // ----------------- + // - 12: val2 + // | + // - 10: max timestamp + // | + // - 7: read timestamp + // | + // - 1: val1 + // ----------------- if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil { t.Fatal(err) } - // Put a value that is ahead of MaxTimestamp, it should not interfere. if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 12}, value2, nil); err != nil { t.Fatal(err) } // Read with transaction, should get a value back. - val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }) - if err != nil { + if val, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 7}, getOptsTxn); err != nil { t.Fatal(err) + } else if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) { + t.Fatalf("wanted %q, got %v", value1.RawBytes, val) } - if val == nil || !bytes.Equal(val.RawBytes, value1.RawBytes) { + if kvs, _, _, err := MVCCScan( + ctx, engine, testKey1, testKey1.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, + ); err != nil { + t.Fatal(err) + } else if len(kvs) != 1 { + t.Fatalf("wanted 1 kv, got %d", len(kvs)) + } else if val := kvs[0].Value; !bytes.Equal(val.RawBytes, value1.RawBytes) { t.Fatalf("wanted %q, got %v", value1.RawBytes, val) } - // Now using testKey2. - // Put a value that conflicts with MaxTimestamp. + // Case 2a: One value in the future of read but below MaxTimestamp + // of read. Should result in a ReadWithinUncertaintyIntervalError + // when reading. + // + // ----------------- + // - 10: max timestamp + // - 9: val2 + // | + // - 7: read timestamp + // ----------------- if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { t.Fatal(err) } // Read with transaction, should get error back. - if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err == nil { + if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } if _, _, _, err := MVCCScan( - ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, ); err == nil { t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } - // Adjust MaxTimestamp and retry. - txn.MaxTimestamp = hlc.Timestamp{WallTime: 7} - if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err != nil { + // Case 2b: Reduce MaxTimestamp to exactly that of value in future. + // Should result in a ReadWithinUncertaintyIntervalError when + // reading. + // + // ----------------- + // - 9: val2 & max timestamp + // | + // - 7: read timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { + t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9, + ); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { + t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) + } + // Case 2c: Reduce MaxTimestamp below value in future. Value should + // no longer interfere when reading. + // + // ----------------- + // - 9: val2 + // | + // - 7: read timestamp & max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil { + t.Fatal(err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7, + ); err != nil { + t.Fatal(err) + } + + // Case 3a: One intent in the future of read but below MaxTimestamp + // of read. Should result in a WriteIntentError when reading. + // + // ----------------- + // - 10: max timestamp + // - 9: val2 (intent) + // | + // - 7: read timestamp + // ----------------- + intentTxn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + ID: uuid.MakeV4(), + Timestamp: hlc.Timestamp{WallTime: 9}, + }, + OrigTimestamp: hlc.Timestamp{WallTime: 9}, + } + if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, intentTxn); err != nil { + t.Fatal(err) + } + // Read with transaction, should get error back. + if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, + ); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } + // Case 3b: Reduce MaxTimestamp to exactly that of intent in future. + // Should result in a WriteIntentError when reading. + // + // ----------------- + // - 9: val2 (intent) & max timestamp + // | + // - 7: read timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9, + ); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.WriteIntentError); !ok { + t.Fatalf("wanted a WriteIntentError, got %+v", err) + } + // Case 3c: Reduce MaxTimestamp below intent in future. Intent should + // no longer interfere when reading. + // + // ----------------- + // - 9: val2 (intent) + // | + // - 7: read timestamp & max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil { t.Fatal(err) } if _, _, _, err := MVCCScan( - ctx, engine, testKey2, testKey2.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7, ); err != nil { t.Fatal(err) } - txn.MaxTimestamp = hlc.Timestamp{WallTime: 10} - // Now using testKey3. - // Put a value that conflicts with MaxTimestamp and another write further - // ahead and not conflicting any longer. The first write should still ruin - // it. - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { + // Case 4a: Two values in future of read. One is ahead of + // MaxTimestamp of read and one is below MaxTimestamp of read. The + // value within the read's uncertainty interval should result in a + // ReadWithinUncertaintyIntervalError when reading. + // + // ----------------- + // - 99: val3 + // | + // - 10: max timestamp + // - 9: val2 + // | + // - 7: read timestamp + // ----------------- + if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 9}, value2, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 99}, value2, nil); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 99}, value3, nil); err != nil { t.Fatal(err) } + if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxn); err == nil { + t.Fatalf("wanted an error") + } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { + t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) + } if _, _, _, err := MVCCScan( - ctx, engine, testKey3, testKey3.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, MVCCScanOptions{Txn: txn}, + ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxn, ); err == nil { t.Fatal("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } - if _, _, err := mvccGet(ctx, engine, testKey3, hlc.Timestamp{WallTime: 7}, MVCCGetOptions{ - Txn: txn, - }); err == nil { + // Case 4b: Reduce MaxTimestamp to exactly that of second value in + // future. The value within the read's uncertainty interval should + // result in a ReadWithinUncertaintyIntervalError when reading. + // + // ----------------- + // - 99: val3 + // | + // - 9: val2 & max timestamp + // | + // - 7: read timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS9); err == nil { t.Fatalf("wanted an error") } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) } + if _, _, _, err := MVCCScan( + ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS9, + ); err == nil { + t.Fatal("wanted an error") + } else if _, ok := err.(*roachpb.ReadWithinUncertaintyIntervalError); !ok { + t.Fatalf("wanted a ReadWithinUncertaintyIntervalError, got %+v", err) + } + // Case 4c: Reduce MaxTimestamp below second value in future. Value should + // no longer interfere when reading. + // + // ----------------- + // - 99: val3 + // | + // - 9: val2 + // | + // - 7: read timestamp & max timestamp + // ----------------- + if _, _, err := mvccGet(ctx, engine, testKey4, hlc.Timestamp{WallTime: 7}, getOptsTxnMaxTS7); err != nil { + t.Fatal(err) + } + if _, _, _, err := MVCCScan( + ctx, engine, testKey4, testKey4.PrefixEnd(), 10, hlc.Timestamp{WallTime: 7}, scanOptsTxnMaxTS7, + ); err != nil { + t.Fatal(err) + } }) } }