Skip to content

Commit

Permalink
internal/base: add SETWITHDEL key kind
Browse files Browse the repository at this point in the history
FIXME(travers): Add commit message.
  • Loading branch information
nicktrav committed Sep 13, 2021
1 parent b85224b commit dc04f9d
Show file tree
Hide file tree
Showing 12 changed files with 288 additions and 57 deletions.
73 changes: 61 additions & 12 deletions compaction_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package pebble
import (
"io"
"sort"
"strconv"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
Expand Down Expand Up @@ -140,6 +141,10 @@ type compactionIter struct {
// determine when iteration has advanced to a new user key and thus a new
// snapshot stripe.
keyBuf []byte
// Temporary buffer used for storing the previous value, which may be an
// unsafe, i.iter-owned slice that could be altered when the iterator is
// advanced.
valueBuf []byte
// Is the current entry valid?
valid bool
iterKey *InternalKey
Expand Down Expand Up @@ -303,12 +308,8 @@ func (i *compactionIter) Next() (*InternalKey, []byte) {
continue
}

case InternalKeyKindSet:
i.saveKey()
i.value = i.iterValue
i.valid = true
i.skip = true
i.maybeZeroSeqnum(i.curSnapshotIdx)
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
i.setNext()
return &i.key, i.value

case InternalKeyKindMerge:
Expand Down Expand Up @@ -444,6 +445,52 @@ func (i *compactionIter) nextInStripe() stripeChangeType {
return newStripe
}

func (i *compactionIter) setNext() {
// Save the current key.
i.saveKey()
// iterValue is owned by i.iter and could change after the Next() call, so
// use valueBuf instead. Note that valueBuf is only used in this one
// instance; everywhere else, we just point i.value to the unsafe
// i.iter-owned value buffer.
i.valueBuf = append(i.valueBuf[:0], i.iterValue...)
i.value = i.valueBuf
i.valid = true
i.maybeZeroSeqnum(i.curSnapshotIdx)

// If this key is a SETWITHDEL, we can emit it immediately. Records with the
// same key are safe to skip.
if i.iterKey.Kind() == InternalKeyKindSetWithDelete {
i.skip = true
return
}

// Else, we continue to loop through records in the stripe.
for {
switch t := i.nextInStripe(); t {
case newStripe, sameStripeNonSkippable:
i.pos = iterPosNext
if t == sameStripeNonSkippable {
// This is a key we cannot skip. We can safely emit a SETWITHDEL
// here, though it is suboptimal due to ... FIXME
// TODO(travers): Optimize this after undertaking a larger
// refactoring to make this check simpler.
i.key.SetKind(InternalKeyKindSetWithDelete)
}
return
case sameStripeSkippable:
// We're still in the same stripe. If this is a DEL, we stop looking
// and emit a SETWITHDEL. Subsequent keys are eligible for skipping.
if i.iterKey.Kind() == InternalKeyKindDelete {
i.key.SetKind(InternalKeyKindSetWithDelete)
i.skip = true
return
}
default:
panic("pebble: unexpected stripeChangeType: " + strconv.Itoa(int(t)))
}
}
}

func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType {
// Save the current key.
i.saveKey()
Expand Down Expand Up @@ -479,7 +526,7 @@ func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType {
i.skip = true
return sameStripeSkippable

case InternalKeyKindSet:
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
if i.rangeDelFrag.Deleted(*key, i.curSnapshotSeqNum) {
// We change the kind of the result key to a Set so that it shadows
// keys in lower levels. That is, MERGE+RANGEDEL -> SET. This isn't
Expand All @@ -490,9 +537,10 @@ func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType {
return sameStripeSkippable
}

// We've hit a Set value. Merge with the existing value and return. We
// change the kind of the resulting key to a Set so that it shadows keys
// in lower levels. That is, MERGE+SET -> SET.
// We've hit a Set or SetWithDel value. Merge with the existing
// value and return. We change the kind of the resulting key to a
// Set so that it shadows keys in lower levels. That is:
// MERGE + (SET*) -> SET.
i.err = valueMerger.MergeOlder(i.iterValue)
if i.err != nil {
i.valid = false
Expand Down Expand Up @@ -544,8 +592,9 @@ func (i *compactionIter) singleDeleteNext() bool {

key := i.iterKey
switch key.Kind() {
case InternalKeyKindDelete, InternalKeyKindMerge:
// We've hit a Delete or Merge, transform the SingleDelete into a full Delete.
case InternalKeyKindDelete, InternalKeyKindMerge, InternalKeyKindSetWithDelete:
// We've hit a Delete, Merge or SetWithDelete, transform the
// SingleDelete into a full Delete.
i.key.SetKind(InternalKeyKindDelete)
i.skip = true
return true
Expand Down
4 changes: 2 additions & 2 deletions error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func TestRequireReadError(t *testing.T) {
require.NoError(t, d.Flush())
expectLSM(`
0.0:
000007:[a1#4,SET-a2#72057594037927935,RANGEDEL]
000007:[a1#4,SETWITHDEL-a2#72057594037927935,RANGEDEL]
6:
000005:[a1#1,SET-a2#2,SET]
`, d, t)
Expand Down Expand Up @@ -281,7 +281,7 @@ func TestCorruptReadError(t *testing.T) {
require.NoError(t, d.Flush())
expectLSM(`
0.0:
000007:[a1#4,SET-a2#72057594037927935,RANGEDEL]
000007:[a1#4,SETWITHDEL-a2#72057594037927935,RANGEDEL]
6:
000005:[a1#1,SET-a2#2,SET]
`, d, t)
Expand Down
1 change: 1 addition & 0 deletions internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
InternalKeyKindSingleDelete = base.InternalKeyKindSingleDelete
InternalKeyKindRangeDelete = base.InternalKeyKindRangeDelete
InternalKeyKindMax = base.InternalKeyKindMax
InternalKeyKindSetWithDelete = base.InternalKeyKindSetWithDelete
InternalKeyKindInvalid = base.InternalKeyKindInvalid
InternalKeySeqNumBatch = base.InternalKeySeqNumBatch
InternalKeySeqNumMax = base.InternalKeySeqNumMax
Expand Down
37 changes: 22 additions & 15 deletions internal/base/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ const (
// seqNum.
InternalKeyKindMax InternalKeyKind = 17

// InternalKeyKindSetWithDelete keys are SET keys that have met with a
// DELETE key in a prior compaction. This key kind is specific to Pebble.
// See https://github.com/cockroachdb/pebble/issues/1255.
InternalKeyKindSetWithDelete InternalKeyKind = 18

// A marker for an invalid key.
InternalKeyKindInvalid InternalKeyKind = 255

Expand All @@ -66,14 +71,15 @@ const (
)

var internalKeyKindNames = []string{
InternalKeyKindDelete: "DEL",
InternalKeyKindSet: "SET",
InternalKeyKindMerge: "MERGE",
InternalKeyKindLogData: "LOGDATA",
InternalKeyKindSingleDelete: "SINGLEDEL",
InternalKeyKindRangeDelete: "RANGEDEL",
InternalKeyKindMax: "MAX",
InternalKeyKindInvalid: "INVALID",
InternalKeyKindDelete: "DEL",
InternalKeyKindSet: "SET",
InternalKeyKindMerge: "MERGE",
InternalKeyKindLogData: "LOGDATA",
InternalKeyKindSingleDelete: "SINGLEDEL",
InternalKeyKindRangeDelete: "RANGEDEL",
InternalKeyKindMax: "MAX",
InternalKeyKindSetWithDelete: "SETWITHDEL",
InternalKeyKindInvalid: "INVALID",
}

func (k InternalKeyKind) String() string {
Expand Down Expand Up @@ -130,13 +136,14 @@ func MakeRangeDeleteSentinelKey(userKey []byte) InternalKey {
}

var kindsMap = map[string]InternalKeyKind{
"DEL": InternalKeyKindDelete,
"SINGLEDEL": InternalKeyKindSingleDelete,
"RANGEDEL": InternalKeyKindRangeDelete,
"SET": InternalKeyKindSet,
"MERGE": InternalKeyKindMerge,
"INVALID": InternalKeyKindInvalid,
"MAX": InternalKeyKindMax,
"DEL": InternalKeyKindDelete,
"SINGLEDEL": InternalKeyKindSingleDelete,
"RANGEDEL": InternalKeyKindRangeDelete,
"SET": InternalKeyKindSet,
"MERGE": InternalKeyKindMerge,
"INVALID": InternalKeyKindInvalid,
"MAX": InternalKeyKindMax,
"SETWITHDEL": InternalKeyKindSetWithDelete,
}

// ParseInternalKey parses the string representation of an internal key. The
Expand Down
6 changes: 3 additions & 3 deletions iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (i *Iterator) findNextEntry(limit []byte) {
i.nextUserKey()
continue

case InternalKeyKindSet:
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
i.key = i.keyBuf
i.value = i.iterValue
Expand Down Expand Up @@ -459,7 +459,7 @@ func (i *Iterator) findPrevEntry(limit []byte) {
}
continue

case InternalKeyKindSet:
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
i.keyBuf = append(i.keyBuf[:0], key.UserKey...)
i.key = i.keyBuf
// iterValue is owned by i.iter and could change after the Prev()
Expand Down Expand Up @@ -570,7 +570,7 @@ func (i *Iterator) mergeNext(key InternalKey, valueMerger ValueMerger) {
// point.
return

case InternalKeyKindSet:
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
// We've hit a Set value. Merge with the existing value and return.
i.err = valueMerger.MergeOlder(i.iterValue)
return
Expand Down
2 changes: 1 addition & 1 deletion level_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (m *simpleMergingIter) step() bool {
m.err = closer.Close()
}
m.valueMerger = nil
case InternalKeyKindSet:
case InternalKeyKindSet, InternalKeyKindSetWithDelete:
m.err = m.valueMerger.MergeOlder(item.value)
if m.err == nil {
var closer io.Closer
Expand Down
4 changes: 2 additions & 2 deletions range_del_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ func TestRangeDelCompactionTruncation(t *testing.T) {
1:
000012:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL]
3:
000017:[b#4,SET-b#4,SET]
000017:[b#4,SETWITHDEL-b#4,SETWITHDEL]
000018:[b#3,RANGEDEL-c#72057594037927935,RANGEDEL]
000019:[c#5,SET-d#72057594037927935,RANGEDEL]
000019:[c#5,SETWITHDEL-d#72057594037927935,RANGEDEL]
`)

// The L1 table still contains a tombstone from [a,d) which will improperly
Expand Down
Loading

0 comments on commit dc04f9d

Please sign in to comment.