Skip to content

Commit

Permalink
Merge pull request #32318 from bdarnell/backport-disable-timebound
Browse files Browse the repository at this point in the history
backport-2.1: batcheval: Disable time-bound iterators in KV commands
  • Loading branch information
bdarnell authored Nov 14, 2018
2 parents 2cd7749 + 02f35d1 commit 4523f98
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 50 deletions.
8 changes: 8 additions & 0 deletions c-deps/libroach/db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,14 @@ DBStatus DBSstFileWriterAdd(DBSstFileWriter* fw, DBKey key, DBSlice val) {
return kSuccess;
}

DBStatus DBSstFileWriterDelete(DBSstFileWriter* fw, DBKey key) {
rocksdb::Status status = fw->rep.Delete(EncodeKey(key));
if (!status.ok()) {
return ToDBStatus(status);
}
return kSuccess;
}

DBStatus DBSstFileWriterFinish(DBSstFileWriter* fw, DBString* data) {
rocksdb::Status status = fw->rep.Finish();
if (!status.ok()) {
Expand Down
3 changes: 3 additions & 0 deletions c-deps/libroach/include/libroach.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ DBStatus DBSstFileWriterOpen(DBSstFileWriter* fw);
// cannot have been called.
DBStatus DBSstFileWriterAdd(DBSstFileWriter* fw, DBKey key, DBSlice val);

// Adds a deletion tombstone to the sstable being built. See DBSstFileWriterAdd for more.
DBStatus DBSstFileWriterDelete(DBSstFileWriter* fw, DBKey key);

// Finalizes the writer and stores the constructed file's contents in *data. At
// least one kv entry must have been added. May only be called once.
DBStatus DBSstFileWriterFinish(DBSstFileWriter* fw, DBString* data);
Expand Down
4 changes: 4 additions & 0 deletions c-deps/libroach/mvcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ template <bool reverse> class mvccScanner {
return seekVersion(timestamp_, false);
}

if (cur_value_.size() == 0) {
return setStatus(FmtStatus("zero-length mvcc metadata"));
}

if (!meta_.ParseFromArray(cur_value_.data(), cur_value_.size())) {
return setStatus(FmtStatus("unable to decode MVCCMetadata"));
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/storageccl/engineccl/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ func TestMVCCIncrementalIteratorIntentStraddlesSStables(t *testing.T) {
ingest := func(it engine.Iterator, count int) {
sst, err := engine.MakeRocksDBSstFileWriter()
if err != nil {
t.Fatal(sst)
t.Fatal(err)
}
defer sst.Close()

Expand Down
5 changes: 1 addition & 4 deletions pkg/storage/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,11 +438,8 @@ func resolveLocalIntents(
desc = &mergeTrigger.LeftDesc
}

min, max := txn.InclusiveTimeBounds()
iter := batch.NewIterator(engine.IterOptions{
MinTimestampHint: min,
MaxTimestampHint: max,
UpperBound: desc.EndKey.AsRawKey(),
UpperBound: desc.EndKey.AsRawKey(),
})
iterAndBuf := engine.GetBufUsingIter(iter)
defer iterAndBuf.Cleanup()
Expand Down
33 changes: 1 addition & 32 deletions pkg/storage/batcheval/cmd_refresh_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,8 @@ func RefreshRange(
return result.Result{}, errors.Errorf("no transaction specified to %s", args.Method())
}

// Use a time-bounded iterator to avoid unnecessarily iterating over
// older data.
iter := batch.NewIterator(engine.IterOptions{
MinTimestampHint: h.Txn.OrigTimestamp,
MaxTimestampHint: h.Txn.Timestamp,
UpperBound: args.EndKey,
UpperBound: args.EndKey,
})
defer iter.Close()
// Iterate over values until we discover any value written at or
Expand Down Expand Up @@ -87,33 +83,6 @@ func RefreshRange(
if i.Txn.ID == h.Txn.ID {
continue
}

if i.Span.EndKey != nil {
return result.Result{}, errors.Errorf("unexpected range intent from MVCC storage")
}

// HACK(bdarnell): Time-bound iterators can return intents that
// shouldn't be there
// (https://github.com/cockroachdb/cockroach/issues/28358), and
// this can result in stalled traffic when it occurs in this
// method (https://github.com/cockroachdb/cockroach/issues/31823).
// When we get an intent, check with a regular iterator to ensure
// that it's really there.
_, realIntents, err := engine.MVCCGetWithTombstone(
ctx,
batch,
i.Span.Key,
h.Txn.Timestamp,
false, /* consistent */
nil, /* txn */
)
if err != nil {
return result.Result{}, err
}
if len(realIntents) == 0 {
continue
}

// Return an error if an intent was written to the span.
return result.Result{}, errors.Errorf("encountered recently written intent %s @%s",
i.Span.Key, i.Txn.Timestamp)
Expand Down
200 changes: 200 additions & 0 deletions pkg/storage/batcheval/cmd_refresh_range_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License. See the AUTHORS file
// for names of contributors.

package batcheval

import (
"context"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)

// TestRefreshRangeTimeBoundIterator is a regression test for
// https://github.com/cockroachdb/cockroach/issues/31823. RefreshRange
// uses a time-bound iterator, which has a bug that can cause old
// resolved intents to incorrectly appear to be pending. This test
// constructs the necessary arrangement of sstables to reproduce the
// bug and ensures that the workaround (and later, the permanent fix)
// are effective.
//
// The bug is that resolving an intent does not contribute to the
// sstable's timestamp bounds, so that if there is no other
// timestamped data expanding the bounds, time-bound iterators may
// open fewer sstables than necessary and only see the intent, not its
// resolution.
//
// This test creates two sstables. The first contains a pending intent
// at ts1 and another key at ts4, giving it timestamp bounds 1-4 (and
// putting it in scope for transactions at timestamps higher than
// ts1).
func TestRefreshRangeTimeBoundIterator(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx := context.Background()
k := roachpb.Key("a")
v := roachpb.MakeValueFromString("hi")
ts1 := hlc.Timestamp{WallTime: 1}
ts2 := hlc.Timestamp{WallTime: 2}
ts3 := hlc.Timestamp{WallTime: 3}
ts4 := hlc.Timestamp{WallTime: 4}

// Create an sstable containing an unresolved intent. To reduce the
// amount of knowledge of MVCC internals we must embed here, we
// write to a temporary engine and extract the RocksDB KV data. The
// sstable also contains an unrelated key at a higher timestamp to
// widen its bounds.
intentSSTContents := func() []byte {
db := engine.NewInMem(roachpb.Attributes{}, 10<<20)
defer db.Close()

txn := &roachpb.Transaction{
TxnMeta: enginepb.TxnMeta{
Key: k,
ID: uuid.MakeV4(),
Epoch: 1,
Timestamp: ts1,
},
}
if err := engine.MVCCPut(ctx, db, nil, k, txn.Timestamp, v, txn); err != nil {
t.Fatal(err)
}

if err := engine.MVCCPut(ctx, db, nil, roachpb.Key("unused1"), ts4, v, nil); err != nil {
t.Fatal(err)
}

sstWriter, err := engine.MakeRocksDBSstFileWriter()
if err != nil {
t.Fatal(err)
}
it := db.NewIterator(engine.IterOptions{
UpperBound: keys.MaxKey,
})
defer it.Close()
it.Seek(engine.MVCCKey{Key: keys.MinKey})
for {
ok, err := it.Valid()
if err != nil {
t.Fatal(err)
}
if !ok {
break
}
if err := sstWriter.Add(engine.MVCCKeyValue{Key: it.Key(), Value: it.Value()}); err != nil {
t.Fatal(err)
}
it.Next()
}

sstContents, err := sstWriter.Finish()
if err != nil {
t.Fatal(err)
}

return sstContents
}()

// Create a second sstable containing the resolution of the intent
// (committed). This is a rocksdb tombstone and there's no good way
// to construct that as we did above, so we do it by hand. The
// sstable also has a second write at a different (older) timestamp,
// because if it were empty other than the deletion tombstone, it
// would not have any timestamp bounds and would be selected for
// every read.
resolveSSTContents := func() []byte {
sstWriter, err := engine.MakeRocksDBSstFileWriter()
if err != nil {
t.Fatal(err)
}
if err := sstWriter.Delete(engine.MakeMVCCMetadataKey(k)); err != nil {
t.Fatal(err)
}
if err := sstWriter.Add(engine.MVCCKeyValue{
Key: engine.MVCCKey{
Key: roachpb.Key("unused2"),
Timestamp: ts1,
},
Value: nil,
}); err != nil {
t.Fatal(err)
}
sstContents, err := sstWriter.Finish()
if err != nil {
t.Fatal(err)
}
return sstContents
}()

// Create a new DB and ingest our two sstables.
db := engine.NewInMem(roachpb.Attributes{}, 10<<20)
defer db.Close()

for i, contents := range [][]byte{intentSSTContents, resolveSSTContents} {
filename := fmt.Sprintf("intent-%d", i)
if err := db.WriteFile(filename, contents); err != nil {
t.Fatal(err)
}
if err := db.IngestExternalFiles(ctx, []string{filename}, true); err != nil {
t.Fatal(err)
}
}

// We should now have a committed value at k@ts1. Read it back to make
// sure our fake intent resolution did the right thing.
if val, intents, err := engine.MVCCGet(ctx, db, k, ts1, true, nil); err != nil {
t.Fatal(err)
} else if len(intents) > 0 {
t.Fatalf("got unexpected intents: %v", intents)
} else if !val.EqualData(v) {
t.Fatalf("expected %s, got %s", v, val)
}

// Now the real test: a transaction at ts2 has been pushed to ts3
// and must refresh. It overlaps with our committed intent on k@ts1,
// which is fine because our timestamp is higher (but if that intent
// were still pending, the new txn would be blocked). Prior to
// https://github.com/cockroachdb/cockroach/pull/32211, a bug in the
// time-bound iterator meant that we would see the first sstable but
// not the second and incorrectly report the intent as pending,
// resulting in an error from RefreshRange.
var resp roachpb.RefreshRangeResponse
_, err := RefreshRange(ctx, db, CommandArgs{
Args: &roachpb.RefreshRangeRequest{
RequestHeader: roachpb.RequestHeader{
Key: k,
EndKey: keys.MaxKey,
},
},
Header: roachpb.Header{
Txn: &roachpb.Transaction{
TxnMeta: enginepb.TxnMeta{
Timestamp: ts3,
},
OrigTimestamp: ts2,
},
},
}, &resp)
if err != nil {
t.Fatal(err)
}
}
14 changes: 1 addition & 13 deletions pkg/storage/batcheval/cmd_resolve_intent_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/batcheval/result"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/storage/spanset"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
)

func init() {
Expand Down Expand Up @@ -53,18 +52,7 @@ func ResolveIntentRange(
Status: args.Status,
}

// Use a time-bounded iterator as an optimization if indicated.
var iterAndBuf engine.IterAndBuf
if args.MinTimestamp != (hlc.Timestamp{}) {
iter := batch.NewIterator(engine.IterOptions{
MinTimestampHint: args.MinTimestamp,
MaxTimestampHint: args.IntentTxn.Timestamp,
UpperBound: args.EndKey,
})
iterAndBuf = engine.GetBufUsingIter(iter)
} else {
iterAndBuf = engine.GetIterAndBuf(batch, engine.IterOptions{UpperBound: args.EndKey})
}
iterAndBuf := engine.GetIterAndBuf(batch, engine.IterOptions{UpperBound: args.EndKey})
defer iterAndBuf.Cleanup()

numKeys, resumeSpan, err := engine.MVCCResolveWriteIntentRangeUsingIter(
Expand Down
10 changes: 10 additions & 0 deletions pkg/storage/engine/rocksdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -2744,6 +2744,16 @@ func (fw *RocksDBSstFileWriter) Add(kv MVCCKeyValue) error {
return statusToError(C.DBSstFileWriterAdd(fw.fw, goToCKey(kv.Key), goToCSlice(kv.Value)))
}

// Delete puts a deletion tombstone into the sstable being built. See
// the Add method for more.
func (fw *RocksDBSstFileWriter) Delete(k MVCCKey) error {
if fw.fw == nil {
return errors.New("cannot call Delete on a closed writer")
}
fw.DataSize += int64(len(k.Key))
return statusToError(C.DBSstFileWriterDelete(fw.fw, goToCKey(k)))
}

// Finish finalizes the writer and returns the constructed file's contents. At
// least one kv entry must have been added.
func (fw *RocksDBSstFileWriter) Finish() ([]byte, error) {
Expand Down

0 comments on commit 4523f98

Please sign in to comment.