From d0da8ef83dfa7905cc5c13183cb6bf0588842dc1 Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Thu, 30 Apr 2020 13:21:04 -0400 Subject: [PATCH] storage: Don't advance keys in MVCCGet with Pebble Previously, when doing an MVCCGet with the Pebble MVCC scanner, we would advance keys after getting (or not getting) the value we're looking for. In some cases this could result in a seek, such as if there are too many revisions of that MVCC key. Seeks are costly and at best avoided when it's ultimatey unnecessary. Another side effect of doing this seek was that it would trip an assertion in spanSet.Iterator that checked if all reads were happening on allowed key spans. This caused TestDumpData to fail on Pebble (but not on RocksDB since the RocksDB MVCC scanner is specialized in C++ and doesn't flow through the same assertion for seeks). There's one additional change, stemming from a follow-up conversation in #48160, around unifying logic for terminating an MVCC scan. The getFromIntentHistory() case also calls into addAndAdvance() now. Fixes #48147. Release note: None. --- pkg/storage/pebble_mvcc_scanner.go | 35 +++++++++++------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/pkg/storage/pebble_mvcc_scanner.go b/pkg/storage/pebble_mvcc_scanner.go index 331109d4b5a5..b536dcaf3f33 100644 --- a/pkg/storage/pebble_mvcc_scanner.go +++ b/pkg/storage/pebble_mvcc_scanner.go @@ -117,6 +117,7 @@ type pebbleMVCCScanner struct { inconsistent, tombstones bool failOnMoreRecent bool checkUncertainty bool + isGet bool keyBuf []byte savedBuf []byte // cur* variables store the "current" record we're pointing to. Updated in @@ -155,6 +156,7 @@ func (p *pebbleMVCCScanner) init(txn *roachpb.Transaction) { // get iterates exactly once and adds one KV to the result set. func (p *pebbleMVCCScanner) get() { + p.isGet = true p.parent.SeekGE(MVCCKey{Key: p.start}) if !p.updateCurrent() { return @@ -165,6 +167,7 @@ func (p *pebbleMVCCScanner) get() { // scan iterates until maxKeys records are in results, or the underlying // iterator is exhausted, or an error is encountered. func (p *pebbleMVCCScanner) scan() (*roachpb.Span, error) { + p.isGet = false if p.reverse { if !p.iterSeekReverse(MVCCKey{Key: p.end}) { return nil, p.err @@ -214,9 +217,8 @@ func (p *pebbleMVCCScanner) decrementItersBeforeSeek() { } // Try to read from the current value's intent history. Assumes p.meta has been -// unmarshalled already. Returns true if a value was read and added to the -// result set. -func (p *pebbleMVCCScanner) getFromIntentHistory() bool { +// unmarshalled already. Returns found = true if a value was found and returned. +func (p *pebbleMVCCScanner) getFromIntentHistory() (value []byte, found bool) { intentHistory := p.meta.IntentHistory // upIdx is the index of the first intent in intentHistory with a sequence // number greater than our transaction's sequence number. Subtract 1 from it @@ -238,13 +240,10 @@ func (p *pebbleMVCCScanner) getFromIntentHistory() bool { // It is possible that no intent exists such that the sequence is less // than the read sequence, and is not ignored by this transaction. // In this case, we cannot read a value from the intent history. - return false - } - intent := p.meta.IntentHistory[upIdx-1] - if len(intent.Value) > 0 || p.tombstones { - p.results.put(p.curMVCCKey(), intent.Value) + return nil, false } - return true + intent := &p.meta.IntentHistory[upIdx-1] + return intent.Value, true } // Returns a write too old error with the specified timestamp. @@ -408,19 +407,8 @@ func (p *pebbleMVCCScanner) getAndAdvance() bool { // numbers) that we should read. If there exists a value in the intent // history that has a sequence number equal to or less than the read // sequence, read that value. - if p.getFromIntentHistory() { - if p.targetBytes > 0 && p.results.bytes >= p.targetBytes { - // When the target bytes are met or exceeded, stop producing more - // keys. We implement this by reducing maxKeys to the current - // number of keys. - // - // TODO(bilal): see if this can be implemented more transparently. - p.maxKeys = p.results.count - } - if p.maxKeys > 0 && p.results.count == p.maxKeys { - return false - } - return p.advanceKey() + if value, found := p.getFromIntentHistory(); found { + return p.addAndAdvance(value) } // 11. If no value in the intent history has a sequence number equal to // or less than the read, we must ignore the intents laid down by the @@ -521,6 +509,9 @@ func (p *pebbleMVCCScanner) prevKey(key []byte) bool { // advanceKey advances to the next key in the iterator's direction. func (p *pebbleMVCCScanner) advanceKey() bool { + if p.isGet { + return false + } if p.reverse { return p.prevKey(p.curKey) }