Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
32421: engineccl: ignore intents beneath start in MVCCIncrementalIterator r=petermattis,nvanbenschoten a=benesch

As determined in cockroachdb#28358, using time-bound iterators is rife with
pitfalls. Specifically, the keys returned outside of the time bounds
might be wildly inconsistent. A iteration over time bounds [ts3, ts4]
might observe an intent at time ts2 or ts5 as still pending when in fact
it was resolved, just in an SST that was not considered by the iterator.
The only guarantee is that the snapshot of keys within the [ts3, ts4]
time bounds is consistent. (Currently this isn't quite true, thanks to
another bug, but this is the guarantee that time-bound iterators should
be providing.)

MVCCIncrementalIterator mostly handled these pitfalls correctly. It
properly ignored all non-metadata keys outside of the timestamp bounds,
as well as metadata keys (i.e., intents) above the upper timestamp
bound. It was not, however, ignoring intents beneath the lower timestamp
bound. Since these intents might be inconsistent (i.e., they might
already be resolved), the iterator must ignore them.

The most problematic symptom was that ExportRequests, which use an
MVCCIncrementalIterator, could get stuck trying to resolve an
already-resolved intent. The ExportRequest would return a
WriteIntentError because it would observe the intent and not its
resolution, but resolving the intent would be a no-op because the intent
was, in fact, already resolved, and so retrying the ExportRequest would
run into the same problem. The situation would eventually unstick when a
RocksDB compaction rearranged SSTs such that the ExportRequest observed
both the intent and its resolution.

Note that 1eb3b2a disabled the use of time-bound iterators in all
other code paths that would have had a similar problem.

Release note: None

Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
  • Loading branch information
craig[bot] and benesch committed Nov 16, 2018
2 parents 5e0b6d9 + 765df40 commit 9f54fc1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/engineccl/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (i *MVCCIncrementalIterator) advance() {

metaTimestamp := hlc.Timestamp(i.meta.Timestamp)
if i.meta.Txn != nil {
if !i.endTime.Less(metaTimestamp) {
if i.startTime.Less(metaTimestamp) && !i.endTime.Less(metaTimestamp) {
i.err = &roachpb.WriteIntentError{
Intents: []roachpb.Intent{{
Span: roachpb.Span{Key: i.iter.Key().Key},
Expand Down
31 changes: 23 additions & 8 deletions pkg/ccl/storageccl/engineccl/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func assertEqualKVs(
expected []engine.MVCCKeyValue,
) func(*testing.T) {
return func(t *testing.T) {
t.Helper()
iter := NewMVCCIncrementalIterator(e, IterOptions{
StartTime: startTime,
EndTime: endTime,
Expand Down Expand Up @@ -180,11 +181,18 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil {
t.Fatal(err)
}
t.Run("intents1",
t.Run("intents",
iterateExpectErr(e, fn, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents2",
t.Run("intents",
iterateExpectErr(e, fn, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents3", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv2_2_2)))
t.Run("intents",
iterateExpectErr(e, fn, keyMin, keyMax, tsMin, ts4, "conflicting intents"))
// Intents above the upper time bound or beneath the lower time bound must
// be ignored (#28358). Note that the lower time bound is exclusive while
// the upper time bound is inclusive.
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4, tsMax, kvs()))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4.Next(), tsMax, kvs()))

intent1 := roachpb.Intent{Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta, Status: roachpb.COMMITTED}
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
Expand All @@ -194,7 +202,7 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
t.Fatal(err)
}
t.Run("intents4", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv2_2_2)))
})

t.Run("iterFn=Next", func(t *testing.T) {
Expand Down Expand Up @@ -252,11 +260,18 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil {
t.Fatal(err)
}
t.Run("intents1",
t.Run("intents",
iterateExpectErr(e, fn, testKey1, testKey1.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents2",
t.Run("intents",
iterateExpectErr(e, fn, testKey2, testKey2.PrefixEnd(), tsMin, tsMax, "conflicting intents"))
t.Run("intents3", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents",
iterateExpectErr(e, fn, keyMin, keyMax, tsMin, ts4, "conflicting intents"))
// Intents above the upper time bound or beneath the lower time bound must
// be ignored (#28358). Note that the lower time bound is exclusive while
// the upper time bound is inclusive.
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, ts3, kvs(kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4, tsMax, kvs()))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, ts4.Next(), tsMax, kvs()))

intent1 := roachpb.Intent{Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta, Status: roachpb.COMMITTED}
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent1); err != nil {
Expand All @@ -266,7 +281,7 @@ func TestMVCCIterateIncremental(t *testing.T) {
if err := engine.MVCCResolveWriteIntent(ctx, e, nil, intent2); err != nil {
t.Fatal(err)
}
t.Run("intents4", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
t.Run("intents", assertEqualKVs(e, fn, keyMin, keyMax, tsMin, tsMax, kvs(kv1_4_4, kv1_3Deleted, kv1_2_2, kv1_1_1, kv2_2_2)))
})
}

Expand Down

0 comments on commit 9f54fc1

Please sign in to comment.