From e70f26e11e3536849234e7f5f0fd80bf8bab9fb7 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 30 Nov 2020 15:21:29 -0500 Subject: [PATCH 1/4] kv/tscache: create new IntervalSkl encode/decode benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` name time/op IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 11.7ns ± 2% IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 11.7ns ± 1% IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 11.8ns ± 1% IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 11.7ns ± 0% IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 11.7ns ± 2% IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 11.7ns ± 1% IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 11.7ns ± 1% IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 11.7ns ± 2% IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 6.25ns ± 1% IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 6.25ns ± 1% IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 6.23ns ± 2% IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 6.23ns ± 2% IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 6.26ns ± 3% IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 6.24ns ± 1% IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 6.29ns ± 3% IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 6.23ns ± 2% name alloc/op IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00B IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00B IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00B IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00B IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00B IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00B IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00B IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00B IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00B IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00B IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00B IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00B IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00B IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00B IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00B IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00B name allocs/op IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00 IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00 IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00 IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00 IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00 IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00 IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00 IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00 IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00 IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00 IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00 IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00 IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00 IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00 IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00 IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00 ``` --- pkg/kv/kvserver/tscache/interval_skl_test.go | 41 ++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pkg/kv/kvserver/tscache/interval_skl_test.go b/pkg/kv/kvserver/tscache/interval_skl_test.go index 7598e8644201..603a4a01706b 100644 --- a/pkg/kv/kvserver/tscache/interval_skl_test.go +++ b/pkg/kv/kvserver/tscache/interval_skl_test.go @@ -1381,3 +1381,44 @@ func randRange(rng *rand.Rand, slots int) (from, middle, to []byte) { func randRangeOpt(rng *rand.Rand) rangeOptions { return rangeOptions(rng.Intn(int(excludeFrom|excludeTo) + 1)) } + +func BenchmarkIntervalSklDecodeValue(b *testing.B) { + runBenchWithVals(b, func(b *testing.B, val cacheValue) { + var arr [encodedValSize]byte + enc := encodeValue(arr[:0], val) + for i := 0; i < b.N; i++ { + _, _ = decodeValue(enc) + } + }) +} + +func BenchmarkIntervalSklEncodeValue(b *testing.B) { + runBenchWithVals(b, func(b *testing.B, val cacheValue) { + var arr [encodedValSize]byte + for i := 0; i < b.N; i++ { + _ = encodeValue(arr[:0], val) + } + }) +} + +func runBenchWithVals(b *testing.B, fn func(*testing.B, cacheValue)) { + for _, withLogical := range []bool{false, true} { + for _, withSynthetic := range []bool{false, true} { + for _, withTxnID := range []bool{false, true} { + var val cacheValue + val.ts.WallTime = 15 + if withLogical { + val.ts.Logical = 10 + } + if withSynthetic { + val.ts.Synthetic = true + } + if withTxnID { + val.txnID = uuid.MakeV4() + } + name := fmt.Sprintf("logical=%t,synthetic=%t,txnID=%t", withLogical, withSynthetic, withTxnID) + b.Run(name, func(b *testing.B) { fn(b, val) }) + } + } + } +} From cc61e09f579ad479bda695f60b0610d99a391fb6 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Thu, 10 Dec 2020 22:09:14 -0500 Subject: [PATCH 2/4] kv/tscache: propagate synthetic timestamps through timestamp cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #57683. Before this change, a read with a synthetic timestamp would not pass the synthetic flag through the timestamp cache to conflicting writes. This could cause issues where a write that was bumped by one of these future timestamps would not notice that its write timestamp was now synthetic. This change fixes this by propagating the synthetic flag through the timestamp cache. We have to play a few tricks to make this efficient around `sklPage.ratchetMaxTimestamp`, which attempts to keeping a running maximum of the timestamps in a given skiplist using atomics. The added complexity here is encapsulated in a new `ratchetingTime` type. The testing around here is actually pretty solid. `TestIntervalSklConcurrency` and `TestIntervalSklConcurrentVsSequential` both perform long series of randomized operations and assert against ratchet inversions. This change adds some randomization around synthetic timestamps to the tests and introduces synthetic timestamps to the ratcheting policy (which wasn't otherwise necessary) to get solid randomized test coverage. This made encoding and decoding timestamp cache values slightly slower, but on the order of nanoseconds. ``` name old time/op new time/op delta IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 11.7ns ± 2% 12.1ns ± 2% +3.52% (p=0.000 n=9+10) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 11.7ns ± 1% 12.0ns ± 1% +2.66% (p=0.000 n=10+9) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 11.8ns ± 1% 12.1ns ± 2% +3.27% (p=0.000 n=10+9) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 11.7ns ± 0% 12.0ns ± 1% +2.82% (p=0.000 n=6+10) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 11.7ns ± 2% 12.1ns ± 1% +3.08% (p=0.000 n=10+10) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 11.7ns ± 1% 12.1ns ± 2% +3.59% (p=0.000 n=10+10) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 11.7ns ± 1% 12.2ns ± 4% +4.11% (p=0.000 n=10+10) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 11.7ns ± 2% 12.1ns ± 1% +3.42% (p=0.000 n=10+10) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 6.25ns ± 1% 6.52ns ± 1% +4.38% (p=0.000 n=9+10) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 6.25ns ± 1% 6.56ns ± 1% +5.00% (p=0.000 n=9+9) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 6.23ns ± 2% 6.55ns ± 2% +5.07% (p=0.000 n=10+10) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 6.23ns ± 2% 6.56ns ± 4% +5.28% (p=0.000 n=10+10) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 6.26ns ± 3% 6.55ns ± 1% +4.66% (p=0.000 n=10+10) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 6.24ns ± 1% 6.52ns ± 1% +4.53% (p=0.000 n=9+10) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 6.29ns ± 3% 6.53ns ± 2% +3.71% (p=0.000 n=9+10) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 6.23ns ± 2% 6.56ns ± 0% +5.26% (p=0.000 n=10+8) name old alloc/op new alloc/op delta IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) ``` --- pkg/kv/kvserver/tscache/cache.go | 24 ++- pkg/kv/kvserver/tscache/interval_skl.go | 144 +++++++++---- pkg/kv/kvserver/tscache/interval_skl_test.go | 204 ++++++++++++------- 3 files changed, 247 insertions(+), 125 deletions(-) diff --git a/pkg/kv/kvserver/tscache/cache.go b/pkg/kv/kvserver/tscache/cache.go index f2872f614a6c..b9c38878e834 100644 --- a/pkg/kv/kvserver/tscache/cache.go +++ b/pkg/kv/kvserver/tscache/cache.go @@ -114,20 +114,26 @@ func (v cacheValue) String() string { // // This ratcheting policy is shared across all Cache implementations, even if // they do not use this function directly. -func ratchetValue(old, new cacheValue) (cacheValue, bool) { +func ratchetValue(old, new cacheValue) (res cacheValue, updated bool) { if old.ts.Less(new.ts) { - // Ratchet to new value. + // New value newer. Ratchet to new value. return new, true } else if new.ts.Less(old.ts) { - // Nothing to update. + // Old value newer. Nothing to update. return old, false - } else if new.txnID != old.txnID { + } + + // Equal times. + if new.ts.Synthetic != old.ts.Synthetic { + // old.ts == new.ts but the values have different synthetic flags. + // Remove the synthetic flag from the resulting value. + new.ts.Synthetic = false + } + if new.txnID != old.txnID { // old.ts == new.ts but the values have different txnIDs. Remove the - // transaction ID from the value so that it is no longer owned by any - // transaction. + // transaction ID from the resulting value so that it is no longer owned + // by any transaction. new.txnID = noTxnID - return new, old.txnID != noTxnID } - // old == new. - return old, false + return new, new != old } diff --git a/pkg/kv/kvserver/tscache/interval_skl.go b/pkg/kv/kvserver/tscache/interval_skl.go index 88d5217c4bbb..f51f0b174bd3 100644 --- a/pkg/kv/kvserver/tscache/interval_skl.go +++ b/pkg/kv/kvserver/tscache/interval_skl.go @@ -19,7 +19,6 @@ import ( "fmt" "sync/atomic" "time" - "unsafe" "github.com/andy-kimball/arenaskl" "github.com/cockroachdb/cockroach/pkg/util" @@ -90,8 +89,8 @@ const ( ) const ( - encodedTsSize = int(unsafe.Sizeof(int64(0)) + unsafe.Sizeof(int32(0))) - encodedTxnIDSize = int(unsafe.Sizeof(uuid.UUID{})) + encodedTsSize = 8 + 4 + 1 // walltime + logical + flags + encodedTxnIDSize = uuid.Size encodedValSize = encodedTsSize + encodedTxnIDSize // initialSklPageSize is the initial size of each page in the sklImpl's @@ -202,7 +201,7 @@ func newIntervalSkl(clock *hlc.Clock, minRet time.Duration, metrics sklMetrics) minPages: defaultMinSklPages, metrics: metrics, } - s.pushNewPage(0 /* maxWallTime */, nil /* arena */) + s.pushNewPage(0 /* maxTime */, nil /* arena */) s.metrics.Pages.Update(1) return &s } @@ -301,9 +300,10 @@ func (s *intervalSkl) addRange(from, to []byte, opt rangeOptions, val cacheValue s.rotMutex.RLock() defer s.rotMutex.RUnlock() - // If floor ts is >= requested timestamp, then no need to perform a search - // or add any records. - if val.ts.LessEq(s.floorTS) { + // If floor ts is greater than the requested timestamp, then no need to + // perform a search or add any records. We don't return early when the + // timestamps are equal, because their flags may differ. + if val.ts.Less(s.floorTS) { return nil } @@ -385,7 +385,7 @@ func (s *intervalSkl) frontPage() *sklPage { // pushNewPage prepends a new empty page to the front of the pages list. It // accepts an optional arena argument to facilitate re-use. -func (s *intervalSkl) pushNewPage(maxWallTime int64, arena *arenaskl.Arena) { +func (s *intervalSkl) pushNewPage(maxTime ratchetingTime, arena *arenaskl.Arena) { size := s.nextPageSize() if arena != nil && arena.Cap() == size { // Re-use the provided arena, if possible. @@ -395,7 +395,7 @@ func (s *intervalSkl) pushNewPage(maxWallTime int64, arena *arenaskl.Arena) { arena = arenaskl.NewArena(size) } p := newSklPage(arena) - p.maxWallTime = maxWallTime + p.maxTime = maxTime s.pages.PushFront(p) } @@ -455,7 +455,7 @@ func (s *intervalSkl) rotatePages(filledPage *sklPage) { var oldArena *arenaskl.Arena for s.pages.Len() >= s.minPages { bp := back.Value.(*sklPage) - bpMaxTS := hlc.Timestamp{WallTime: bp.maxWallTime} + bpMaxTS := bp.getMaxTimestamp() if minTSToRetain.LessEq(bpMaxTS) { // The back page's maximum timestamp is within the time // window we've promised to retain, so we can't evict it. @@ -473,12 +473,12 @@ func (s *intervalSkl) rotatePages(filledPage *sklPage) { } // Push a new empty page on the front of the pages list. We give this page - // the maxWallTime of the old front page. This assures that the maxWallTime - // for a page is always equal to or greater than that for all earlier pages. - // In other words, it assures that the maxWallTime for a page is not only - // the maximum timestamp for all values it contains, but also for all values - // any earlier pages contain. - s.pushNewPage(fp.maxWallTime, oldArena) + // the maxTime of the old front page. This assures that the maxTime for a + // page is always equal to or greater than that for all earlier pages. In + // other words, it assures that the maxTime for a page is not only the + // maximum timestamp for all values it contains, but also for all values any + // earlier pages contain. + s.pushNewPage(fp.maxTime, oldArena) // Update metrics. s.metrics.Pages.Update(int64(s.pages.Len())) @@ -526,7 +526,7 @@ func (s *intervalSkl) LookupTimestampRange(from, to []byte, opt rangeOptions) ca // different txnID than our current cacheValue result (val), then we // need to remove the txnID from our result, per the ratcheting policy // for cacheValues. This is tested in TestIntervalSklMaxPageTS. - maxTS := hlc.Timestamp{WallTime: atomic.LoadInt64(&p.maxWallTime)} + maxTS := p.getMaxTimestamp() if maxTS.Less(val.ts) { break } @@ -554,9 +554,9 @@ func (s *intervalSkl) FloorTS() hlc.Timestamp { // filled up, it returns arenaskl.ErrArenaFull. At that point, a new fixed page // must be allocated and used instead. type sklPage struct { - list *arenaskl.Skiplist - maxWallTime int64 // accessed atomically - isFull int32 // accessed atomically + list *arenaskl.Skiplist + maxTime ratchetingTime // accessed atomically + isFull int32 // accessed atomically } func newSklPage(arena *arenaskl.Arena) *sklPage { @@ -802,6 +802,54 @@ func (p *sklPage) ensureFloorValue(it *arenaskl.Iterator, to []byte, val cacheVa } func (p *sklPage) ratchetMaxTimestamp(ts hlc.Timestamp) { + new := makeRatchetingTime(ts) + for { + old := ratchetingTime(atomic.LoadInt64((*int64)(&p.maxTime))) + if new <= old { + break + } + + if atomic.CompareAndSwapInt64((*int64)(&p.maxTime), int64(old), int64(new)) { + break + } + } +} + +func (p *sklPage) getMaxTimestamp() hlc.Timestamp { + return ratchetingTime(atomic.LoadInt64((*int64)(&p.maxTime))).get() +} + +// ratchetingTime is a compressed representation of an hlc.Timestamp, reduced +// down to 64 bits to support atomic access. +// +// ratchetingTime implements compression such that any loss of information when +// passing through the type results in the resulting Timestamp being ratcheted +// to a larger value. This provides the guarantee that the following relation +// holds, regardless of the value of x: +// +// x.LessEq(makeRatchetingTime(x).get()) +// +// It also provides the guarantee that if the synthetic flag is set on the +// initial timestamp, then this flag is set on the resulting Timestamp. So the +// following relation is guaranteed to hold, regardless of the value of x: +// +// x.IsFlagSet(SYNTHETIC) == makeRatchetingTime(x).get().IsFlagSet(SYNTHETIC) +// +// Compressed ratchetingTime values compare such that taking the maximum of any +// two ratchetingTime values and converting that back to a Timestamp is always +// equal to or larger than the equivalent call through the Timestamp.Forward +// method. So the following relation is guaranteed to hold, regardless of the +// value of x or y: +// +// z := max(makeRatchetingTime(x), makeRatchetingTime(y)).get() +// x.Forward(y).LessEq(z) +// +// Bit layout (LSB to MSB): +// bits 0: inverted synthetic flag +// bits 1 - 63: upper 63 bits of wall time +type ratchetingTime int64 + +func makeRatchetingTime(ts hlc.Timestamp) ratchetingTime { // Cheat and just use the max wall time portion of the timestamp, since it's // fine for the max timestamp to be a bit too large. This is the case // because it's always safe to increase the timestamp in a range. It's also @@ -815,23 +863,38 @@ func (p *sklPage) ratchetMaxTimestamp(ts hlc.Timestamp) { // We could use an atomic.Value to store a "MaxValue" cacheValue for a given // page, but this would be more expensive and it's not clear that it would // be worth it. - new := ts.WallTime + rt := ratchetingTime(ts.WallTime) if ts.Logical > 0 { - new++ + rt++ } - // TODO(nvanbenschoten): propagate the timestamp synthetic bit through the - // page's max time. - for { - old := atomic.LoadInt64(&p.maxWallTime) - if new <= old { - break - } + // Similarly, cheat and use the last bit in the wall time to indicate + // whether the timestamp is synthetic or not. Do so by first rounding up the + // last bit of the wall time so that it is empty. This is safe for the same + // reason that rounding up the logical portion of the timestamp in the wall + // time is safe (see above). + // + // We use the last bit to indicate that the flag is NOT set. This ensures + // that if two timestamps have the same ordering but different values for + // the synthetic flag, the timestamp without the synthetic flag has a larger + // ratchetingTime value. This follows how Timestamp.Forward treats the flag. + if rt&1 == 1 { + rt++ + } + if !ts.Synthetic { + rt |= 1 + } - if atomic.CompareAndSwapInt64(&p.maxWallTime, old, new) { - break - } + return rt +} + +func (rt ratchetingTime) get() hlc.Timestamp { + var ts hlc.Timestamp + ts.WallTime = int64(rt &^ 1) + if rt&1 == 0 { + ts.Synthetic = true } + return ts } // ratchetPolicy defines the behavior a ratcheting attempt should take when @@ -899,12 +962,9 @@ func (p *sklPage) ratchetValueSet( // must handle with care. // Ratchet the max timestamp. - keyTs, gapTs := keyVal.ts, gapVal.ts - if gapTs.Less(keyTs) { - p.ratchetMaxTimestamp(keyTs) - } else { - p.ratchetMaxTimestamp(gapTs) - } + maxTs := keyVal.ts + maxTs.Forward(gapVal.ts) + p.ratchetMaxTimestamp(maxTs) // Remove the hasKey and hasGap flags from the meta. These will be // replaced below. @@ -1151,9 +1211,9 @@ func encodeValueSet(b []byte, keyVal, gapVal cacheValue) (ret []byte, meta uint1 } func decodeValue(b []byte) (ret []byte, val cacheValue) { - // TODO(nvanbenschoten): decode the timestamp synthetic bit. val.ts.WallTime = int64(binary.BigEndian.Uint64(b)) val.ts.Logical = int32(binary.BigEndian.Uint32(b[8:])) + val.ts.Synthetic = b[12] != 0 var err error if val.txnID, err = uuid.FromBytes(b[encodedTsSize:encodedValSize]); err != nil { panic(err) @@ -1163,11 +1223,15 @@ func decodeValue(b []byte) (ret []byte, val cacheValue) { } func encodeValue(b []byte, val cacheValue) []byte { - // TODO(nvanbenschoten): encode the timestamp synthetic bit. l := len(b) b = b[:l+encodedValSize] binary.BigEndian.PutUint64(b[l:], uint64(val.ts.WallTime)) binary.BigEndian.PutUint32(b[l+8:], uint32(val.ts.Logical)) + syn := byte(0) + if val.ts.Synthetic { + syn = 1 + } + b[l+12] = syn if _, err := val.txnID.MarshalTo(b[l+encodedTsSize:]); err != nil { panic(err) } diff --git a/pkg/kv/kvserver/tscache/interval_skl_test.go b/pkg/kv/kvserver/tscache/interval_skl_test.go index 603a4a01706b..1134d1a74ad4 100644 --- a/pkg/kv/kvserver/tscache/interval_skl_test.go +++ b/pkg/kv/kvserver/tscache/interval_skl_test.go @@ -69,7 +69,7 @@ func (s *intervalSkl) setFixedPageSize(pageSize uint32) { s.pageSize = pageSize s.pageSizeFixed = true s.pages.Init() // clear - s.pushNewPage(0 /* maxWallTime */, nil /* arena */) + s.pushNewPage(0 /* maxTime */, nil /* arena */) } // setMinPages sets the minimum number of pages intervalSkl will evict down to. @@ -80,27 +80,29 @@ func (s *intervalSkl) setMinPages(minPages int) { } func TestIntervalSklAdd(t *testing.T) { - ts1 := makeTS(200, 0) - ts2 := makeTS(200, 201) - ts3Ceil := makeTS(201, 0) + testutils.RunTrueAndFalse(t, "synthetic", func(t *testing.T, synthetic bool) { + ts1 := makeTS(200, 0).WithSynthetic(synthetic) + ts2 := makeTS(200, 201).WithSynthetic(synthetic) + ts2Ceil := makeTS(202, 0).WithSynthetic(synthetic) - val1 := makeVal(ts1, "1") - val2 := makeVal(ts2, "2") + val1 := makeVal(ts1, "1") + val2 := makeVal(ts2, "2") - s := newIntervalSkl(nil /* clock */, 0 /* minRet */, makeSklMetrics()) - - s.Add([]byte("apricot"), val1) - require.Equal(t, ts1.WallTime, s.frontPage().maxWallTime) - require.Equal(t, emptyVal, s.LookupTimestamp([]byte("apple"))) - require.Equal(t, val1, s.LookupTimestamp([]byte("apricot"))) - require.Equal(t, emptyVal, s.LookupTimestamp([]byte("banana"))) + s := newIntervalSkl(nil /* clock */, 0 /* minRet */, makeSklMetrics()) - s.Add([]byte("banana"), val2) - require.Equal(t, ts3Ceil.WallTime, s.frontPage().maxWallTime) - require.Equal(t, emptyVal, s.LookupTimestamp([]byte("apple"))) - require.Equal(t, val1, s.LookupTimestamp([]byte("apricot"))) - require.Equal(t, val2, s.LookupTimestamp([]byte("banana"))) - require.Equal(t, emptyVal, s.LookupTimestamp([]byte("cherry"))) + s.Add([]byte("apricot"), val1) + require.Equal(t, ts1, s.frontPage().maxTime.get()) + require.Equal(t, emptyVal, s.LookupTimestamp([]byte("apple"))) + require.Equal(t, val1, s.LookupTimestamp([]byte("apricot"))) + require.Equal(t, emptyVal, s.LookupTimestamp([]byte("banana"))) + + s.Add([]byte("banana"), val2) + require.Equal(t, ts2Ceil, s.frontPage().maxTime.get()) + require.Equal(t, emptyVal, s.LookupTimestamp([]byte("apple"))) + require.Equal(t, val1, s.LookupTimestamp([]byte("apricot"))) + require.Equal(t, val2, s.LookupTimestamp([]byte("banana"))) + require.Equal(t, emptyVal, s.LookupTimestamp([]byte("cherry"))) + }) } func TestIntervalSklSingleRange(t *testing.T) { @@ -450,7 +452,7 @@ func TestIntervalSklSingleKeyRanges(t *testing.T) { // Don't allow inverted ranges. require.Panics(t, func() { s.AddRange([]byte("kiwi"), []byte("apple"), 0, val1) }) - require.Equal(t, int64(0), s.frontPage().maxWallTime) + require.Equal(t, ratchetingTime(0), s.frontPage().maxTime) require.Equal(t, emptyVal, s.LookupTimestamp([]byte("apple"))) require.Equal(t, emptyVal, s.LookupTimestamp([]byte("banana"))) require.Equal(t, emptyVal, s.LookupTimestamp([]byte("kiwi"))) @@ -734,71 +736,76 @@ func TestIntervalSklLookupRangeSingleKeyRanges(t *testing.T) { }) } -// TestIntervalSklLookupEqualsEarlierMaxWallTime tests that we properly handle +// TestIntervalSklLookupEqualsEarlierMaxTime tests that we properly handle // the lookup when the timestamp for a range found in the later page is equal to -// the maxWallTime of the earlier page. -func TestIntervalSklLookupEqualsEarlierMaxWallTime(t *testing.T) { +// the maxTime of the earlier page. +func TestIntervalSklLookupEqualsEarlierMaxTime(t *testing.T) { ts1 := makeTS(200, 0) // without Logical part ts2 := makeTS(200, 1) // with Logical part - ts2Ceil := makeTS(201, 0) + ts2Ceil := makeTS(202, 0) txnID1 := "1" txnID2 := "2" - testutils.RunTrueAndFalse(t, "tsWithLogicalPart", func(t *testing.T, logicalPart bool) { - s := newIntervalSkl(nil /* clock */, 0 /* minRet */, makeSklMetrics()) - s.floorTS = floorTS + testutils.RunTrueAndFalse(t, "logical", func(t *testing.T, logical bool) { + testutils.RunTrueAndFalse(t, "synthetic", func(t *testing.T, synthetic bool) { - // Insert an initial value into intervalSkl. - initTS := ts1 - if logicalPart { - initTS = ts2 - } - origVal := makeVal(initTS, txnID1) - s.AddRange([]byte("banana"), []byte("orange"), 0, origVal) + s := newIntervalSkl(nil /* clock */, 0 /* minRet */, makeSklMetrics()) + s.floorTS = floorTS - // Verify the later page's maxWallTime is what we expect. - expMaxTS := ts1 - if logicalPart { - expMaxTS = ts2Ceil - } - require.Equal(t, expMaxTS.WallTime, s.frontPage().maxWallTime) - - // Rotate the page so that new writes will go to a different page. - s.rotatePages(s.frontPage()) - - // Write to overlapping and non-overlapping parts of the new page with - // the values that have the same timestamp as the maxWallTime of the - // earlier page. One value has the same txnID as the previous write in - // the earlier page and one has a different txnID. - valSameID := makeVal(expMaxTS, txnID1) - valDiffID := makeVal(expMaxTS, txnID2) - valNoID := makeValWithoutID(expMaxTS) - s.Add([]byte("apricot"), valSameID) - s.Add([]byte("banana"), valSameID) - s.Add([]byte("orange"), valDiffID) - s.Add([]byte("raspberry"), valDiffID) - - require.Equal(t, valSameID, s.LookupTimestamp([]byte("apricot"))) - require.Equal(t, valSameID, s.LookupTimestamp([]byte("banana"))) - if logicalPart { - // If the initial timestamp had a logical part then - // s.earlier.maxWallTime is inexact (see ratchetMaxTimestamp). When - // we search in the earlier page, we'll find the exact timestamp of - // the overlapping range and realize that its not the same as the - // timestamp of the range in the later page. Because of this, - // ratchetValue WON'T remove the txnID. - require.Equal(t, valDiffID, s.LookupTimestamp([]byte("orange"))) - } else { - // If the initial timestamp did not have a logical part then - // s.earlier.maxWallTime is exact. When we search in the earlier - // page, we'll find the overlapping range and realize that it is the - // same as the timestamp of the range in the later page. Because of - // this, ratchetValue WILL remove the txnID. - require.Equal(t, valNoID, s.LookupTimestamp([]byte("orange"))) - } - require.Equal(t, valDiffID, s.LookupTimestamp([]byte("raspberry"))) - require.Equal(t, floorVal, s.LookupTimestamp([]byte("tomato"))) + // Insert an initial value into intervalSkl. + initTS := ts1 + if logical { + initTS = ts2 + } + initTS = initTS.WithSynthetic(synthetic) + origVal := makeVal(initTS, txnID1) + s.AddRange([]byte("banana"), []byte("orange"), 0, origVal) + + // Verify the later page's maxTime is what we expect. + expMaxTS := ts1 + if logical { + expMaxTS = ts2Ceil + } + expMaxTS = expMaxTS.WithSynthetic(synthetic) + require.Equal(t, expMaxTS, s.frontPage().maxTime.get()) + + // Rotate the page so that new writes will go to a different page. + s.rotatePages(s.frontPage()) + + // Write to overlapping and non-overlapping parts of the new page + // with the values that have the same timestamp as the maxTime of + // the earlier page. One value has the same txnID as the previous + // write in the earlier page and one has a different txnID. + valSameID := makeVal(expMaxTS, txnID1) + valDiffID := makeVal(expMaxTS, txnID2) + valNoID := makeValWithoutID(expMaxTS) + s.Add([]byte("apricot"), valSameID) + s.Add([]byte("banana"), valSameID) + s.Add([]byte("orange"), valDiffID) + s.Add([]byte("raspberry"), valDiffID) + + require.Equal(t, valSameID, s.LookupTimestamp([]byte("apricot"))) + require.Equal(t, valSameID, s.LookupTimestamp([]byte("banana"))) + if logical { + // If the initial timestamp had a logical part then + // s.earlier.maxTime is inexact (see ratchetMaxTimestamp). When + // we search in the earlier page, we'll find the exact timestamp + // of the overlapping range and realize that its not the same as + // the timestamp of the range in the later page. Because of + // this, ratchetValue WON'T remove the txnID. + require.Equal(t, valDiffID, s.LookupTimestamp([]byte("orange"))) + } else { + // If the initial timestamp did not have a logical part then + // s.earlier.maxTime is exact. When we search in the earlier + // page, we'll find the overlapping range and realize that it is + // the same as the timestamp of the range in the later page. + // Because of this, ratchetValue WILL remove the txnID. + require.Equal(t, valNoID, s.LookupTimestamp([]byte("orange"))) + } + require.Equal(t, valDiffID, s.LookupTimestamp([]byte("raspberry"))) + require.Equal(t, floorVal, s.LookupTimestamp([]byte("tomato"))) + }) }) } @@ -901,6 +908,45 @@ func TestIntervalSklMinRetentionWindow(t *testing.T) { require.Equal(t, s.pages.Len(), s.minPages) } +// TestIntervalSklRotateWithSyntheticTimestamps tests that if a page is evicted +// and subsumed by the floor timestamp, then the floor timestamp will continue +// to carry the synthtic flag, if necessary. +func TestIntervalSklRotateWithSyntheticTimestamps(t *testing.T) { + manual := hlc.NewManualClock(200) + clock := hlc.NewClock(manual.UnixNano, time.Nanosecond) + + const minRet = 500 + s := newIntervalSkl(clock, minRet, makeSklMetrics()) + s.setFixedPageSize(1500) + s.floorTS = floorTS + + // Add an initial value with a synthetic timestamp. + // Rotate the page so it's alone. + origKey := []byte("banana") + origTS := clock.Now().WithSynthetic(true) + origVal := makeVal(origTS, "1") + s.Add(origKey, origVal) + s.rotatePages(s.frontPage()) + + // We should still be able to look up the initial value. + require.Equal(t, origVal, s.LookupTimestamp(origKey)) + + // Increment the clock so that the original value is not in the minimum + // retention window. Rotate the pages and the back page should be evicted. + manual.Increment(600) + s.rotatePages(s.frontPage()) + + // The initial value's page was evicted, so it should no longer exist. + // However, since it had the highest timestamp of all values added, its + // timestamp should still exist. Critically, this timestamp should still + // be marked as synthetic. + newVal := s.LookupTimestamp(origKey) + require.NotEqual(t, origVal, newVal, "the original value should be evicted") + require.Equal(t, uuid.Nil, newVal.txnID, "the original value's txn ID should be lost") + require.Equal(t, origVal.ts, newVal.ts, "the original value's timestamp should persist") + require.True(t, newVal.ts.Synthetic, "the synthetic flag should persist") +} + func TestIntervalSklConcurrency(t *testing.T) { defer leaktest.AfterTest(t)() defer util.EnableRacePreemptionPoints()() @@ -974,6 +1020,9 @@ func TestIntervalSklConcurrency(t *testing.T) { if useClock { ts = clock.Now() } + if rng.Intn(2) == 0 { + ts = ts.WithSynthetic(true) + } nowVal := cacheValue{ts: ts, txnID: txnID} s.AddRange(from, to, opt, nowVal) @@ -1075,6 +1124,9 @@ func TestIntervalSklConcurrentVsSequential(t *testing.T) { if useClock { ts = clock.Now() } + if rng.Intn(2) == 0 { + ts = ts.WithSynthetic(true) + } a.val = cacheValue{ts: ts, txnID: txnIDs[i]} // This is a lot of log output so only un-comment to debug. From cb1bb98ca8cb2bbe907b46a8acc7b0622d752c63 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 5 Jan 2021 14:21:01 -0500 Subject: [PATCH 3/4] kv/tscache: clean up per-op allocations in IntervalSklAdd benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These allocations in the benchmark harness were obscuring the behavior of the timestamp cache. Also, remove the unused `parallel` knob in BenchmarkIntervalSklAddAndLookup. ``` name old time/op new time/op delta IntervalSklAddAndLookup/frac_0-16 2.20µs ± 3% 0.61µs ± 5% -72.27% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_1-16 2.07µs ± 3% 0.61µs ± 6% -70.80% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_2-16 1.94µs ± 1% 0.60µs ± 8% -69.02% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_3-16 1.86µs ± 4% 0.59µs ± 9% -68.03% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_4-16 1.76µs ± 5% 0.61µs ± 7% -65.68% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_5-16 1.57µs ± 7% 0.60µs ± 4% -61.64% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_6-16 1.37µs ± 7% 0.60µs ± 6% -56.41% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_7-16 1.18µs ± 1% 0.60µs ± 7% -49.05% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_8-16 1.05µs ± 1% 0.61µs ± 9% -42.30% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_9-16 919ns ± 4% 643ns ±29% -30.02% (p=0.008 n=5+5) IntervalSklAdd/size_1-16 2.31µs ± 1% 1.77µs ± 2% -23.35% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_10-16 775ns ± 9% 603ns ± 7% -22.21% (p=0.008 n=5+5) IntervalSklAdd/size_10-16 3.96µs ± 2% 3.21µs ± 2% -18.80% (p=0.008 n=5+5) IntervalSklAdd/size_10000-16 4.31µs ± 1% 3.52µs ± 2% -18.43% (p=0.008 n=5+5) IntervalSklAdd/size_1000-16 3.96µs ± 2% 3.32µs ± 3% -16.21% (p=0.008 n=5+5) IntervalSklAdd/size_100-16 3.85µs ± 1% 3.25µs ± 4% -15.51% (p=0.008 n=5+5) IntervalSklAdd/size_1000000-16 8.02µs ± 3% 7.28µs ± 1% -9.27% (p=0.008 n=5+5) IntervalSklAdd/size_10000000-16 16.5µs ± 2% 15.8µs ± 2% -3.91% (p=0.008 n=5+5) IntervalSklAdd/size_100000-16 5.44µs ± 9% 4.82µs ±14% ~ (p=0.095 n=5+5) IntervalSklAdd/size_100000000-16 42.1µs ± 4% 41.4µs ± 2% ~ (p=0.421 n=5+5) name old alloc/op new alloc/op delta IntervalSklAddAndLookup/frac_0-16 789B ± 4% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_1-16 716B ± 3% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_2-16 632B ± 2% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_3-16 565B ± 5% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_4-16 473B ± 2% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_5-16 323B ±74% 0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_6-16 75.8B ± 1% 0.0B -100.00% (p=0.000 n=4+5) IntervalSklAddAndLookup/frac_7-16 65.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_8-16 55.4B ± 1% 0.0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_9-16 45.6B ± 1% 0.0B -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_10-16 36.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_1-16 240B ± 2% 99B ± 0% -58.82% (p=0.008 n=5+5) IntervalSklAdd/size_10-16 373B ± 7% 202B ± 2% -45.73% (p=0.008 n=5+5) IntervalSklAdd/size_100-16 339B ± 4% 188B ± 3% -44.52% (p=0.008 n=5+5) IntervalSklAdd/size_10000-16 526B ± 8% 302B ±23% -42.63% (p=0.008 n=5+5) IntervalSklAdd/size_1000-16 365B ± 5% 215B ± 3% -41.02% (p=0.008 n=5+5) IntervalSklAdd/size_100000-16 919B ± 8% 763B ± 4% -16.99% (p=0.008 n=5+5) IntervalSklAdd/size_1000000-16 2.25kB ± 4% 1.92kB ± 6% -14.45% (p=0.008 n=5+5) IntervalSklAdd/size_10000000-16 6.11kB ± 6% 5.65kB ± 9% ~ (p=0.056 n=5+5) IntervalSklAdd/size_100000000-16 16.7kB ± 5% 17.0kB ±12% ~ (p=0.841 n=5+5) name old allocs/op new allocs/op delta IntervalSklAdd/size_1-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_10-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_100-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_1000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_10000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_100000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_1000000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_10000000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAdd/size_100000000-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_0-16 6.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_1-16 5.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_2-16 5.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_3-16 4.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_4-16 4.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_5-16 3.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_6-16 3.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_7-16 3.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_8-16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_9-16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) IntervalSklAddAndLookup/frac_10-16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) ``` --- pkg/kv/kvserver/tscache/interval_skl_test.go | 76 ++++++++++++-------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/pkg/kv/kvserver/tscache/interval_skl_test.go b/pkg/kv/kvserver/tscache/interval_skl_test.go index 1134d1a74ad4..7eb02934877e 100644 --- a/pkg/kv/kvserver/tscache/interval_skl_test.go +++ b/pkg/kv/kvserver/tscache/interval_skl_test.go @@ -1326,11 +1326,23 @@ func BenchmarkIntervalSklAdd(b *testing.B) { size := 1 for i := 0; i < 9; i++ { b.Run(fmt.Sprintf("size_%d", size), func(b *testing.B) { - for iter := 0; iter < b.N; iter++ { + type op struct { + from, to []byte + val cacheValue + } + ops := make([]op, b.N) + for i := range ops { rnd := int64(rng.Int31n(max)) - from := []byte(fmt.Sprintf("%020d", rnd)) - to := []byte(fmt.Sprintf("%020d", rnd+int64(size-1))) - s.AddRange(from, to, 0, makeVal(clock.Now(), txnID)) + ops[i] = op{ + from: []byte(fmt.Sprintf("%020d", rnd)), + to: []byte(fmt.Sprintf("%020d", rnd+int64(size-1))), + val: makeVal(clock.Now(), txnID), + } + } + + b.ResetTimer() + for _, op := range ops { + s.AddRange(op.from, op.to, 0, op.val) } }) @@ -1339,7 +1351,6 @@ func BenchmarkIntervalSklAdd(b *testing.B) { } func BenchmarkIntervalSklAddAndLookup(b *testing.B) { - const parallel = 1 const max = 1000000000 // max size of range const data = 500000 // number of ranges const txnID = "123" @@ -1356,33 +1367,40 @@ func BenchmarkIntervalSklAddAndLookup(b *testing.B) { for i := 0; i <= 10; i++ { b.Run(fmt.Sprintf("frac_%d", i), func(b *testing.B) { - var wg sync.WaitGroup - - for p := 0; p < parallel; p++ { - wg.Add(1) - - go func(i int) { - defer wg.Done() - - rng := rand.New(rand.NewSource(timeutil.Now().UnixNano())) - - for n := 0; n < b.N/parallel; n++ { - readFrac := rng.Int31n(10) - keyNum := rng.Int31n(max) - - if readFrac < int32(i) { - key := []byte(fmt.Sprintf("%020d", keyNum)) - s.LookupTimestamp(key) - } else { - from, to := makeRange(keyNum) - nowVal := makeVal(clock.Now(), txnID) - s.AddRange(from, to, excludeFrom|excludeTo, nowVal) - } + type op struct { + read bool + from, to []byte + val cacheValue + } + ops := make([]op, b.N) + for i := range ops { + readFrac := rng.Int31n(10) + keyNum := rng.Int31n(max) + + if readFrac < int32(i) { + ops[i] = op{ + read: true, + from: []byte(fmt.Sprintf("%020d", keyNum)), + } + } else { + from, to := makeRange(keyNum) + ops[i] = op{ + read: false, + from: from, + to: to, + val: makeVal(clock.Now(), txnID), } - }(i) + } } - wg.Wait() + b.ResetTimer() + for _, op := range ops { + if op.read { + s.LookupTimestamp(op.from) + } else { + s.AddRange(op.from, op.to, excludeFrom|excludeTo, op.val) + } + } }) } } From 1db343cb9c935adc946e408f73eb8f8319f21942 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 5 Jan 2021 17:25:02 -0500 Subject: [PATCH 4/4] kv/tscache: use unsafe transmute for cacheValue encoding/decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit switches the byte encoding of a cacheValue from a custom encoding to an encoding that directly matches the in-memory representation of the struct. This allows for encoding and decoding to use an unsafe re-interpretation of the struct as a bytes slice, eliminating all bounds checks and byte assignments and replacing them with a single memcpy. The effect of this is dramatically faster encoding and decoding time in exchange for a slightly larger encoded size. Encoding time (run once when adding to the timestamp cache) drops from **9ns** to **.3ns**. Decoding time (run once per conflicting key when adding and reading to the timestamp cache) drops from **16ns** to **3ns**. In exchange, the actual encoded byte size increases from 29 bytes to 32 bytes. This isn't great as it means that skiplist pages will fill up **10%** faster, but that seems well worth the gains in encoding/decoding speed. ``` name old time/op new time/op delta IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 9.27ns ±47% 0.32ns ±37% -96.58% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 9.66ns ±54% 0.33ns ±37% -96.57% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 9.31ns ±40% 0.32ns ±42% -96.56% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 8.73ns ±31% 0.30ns ±39% -96.53% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 8.93ns ±34% 0.32ns ±28% -96.44% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 9.03ns ±33% 0.32ns ±50% -96.42% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 9.14ns ±32% 0.33ns ±35% -96.36% (p=0.008 n=5+5) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 8.72ns ±27% 0.33ns ±43% -96.17% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 16.5ns ±30% 2.8ns ±28% -82.77% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 16.0ns ±28% 2.8ns ±25% -82.73% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 16.3ns ±36% 2.8ns ±36% -82.64% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 16.4ns ±25% 2.9ns ±31% -82.59% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 16.2ns ±33% 2.8ns ±29% -82.52% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 16.2ns ±31% 2.8ns ±29% -82.43% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 15.9ns ±25% 2.8ns ±28% -82.20% (p=0.008 n=5+5) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 16.4ns ±35% 3.1ns ±60% -81.25% (p=0.008 n=5+5) name old alloc/op new alloc/op delta IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00B 0.00B ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta IntervalSklDecodeValue/logical=false,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=false,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklDecodeValue/logical=true,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=false,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=false,txnID=true-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=false-16 0.00 0.00 ~ (all equal) IntervalSklEncodeValue/logical=true,synthetic=true,txnID=true-16 0.00 0.00 ~ (all equal) ``` We see these effects in the rest of the timestamp cache benchmarks. In the benchmarks with the most overlaps (most calls to decodeValue per op) we see up to a **22%** speedup. Meanwhile, over the lifetime of those benchmarks, we see up to a **21%** increase in total allocated bytes (amortized across all ops). ``` name old time/op new time/op delta IntervalSklAdd/size_100000000-16 41.4µs ± 2% 32.3µs ± 7% -22.12% (p=0.000 n=10+9) IntervalSklAdd/size_10000000-16 16.2µs ± 1% 12.7µs ± 4% -21.57% (p=0.000 n=9+10) IntervalSklAdd/size_1000000-16 7.33µs ± 1% 6.18µs ± 4% -15.62% (p=0.000 n=9+10) TimestampCacheInsertion-16 1.96µs ± 5% 1.72µs ± 6% -12.06% (p=0.000 n=10+10) IntervalSklAdd/size_100000-16 4.63µs ± 2% 4.18µs ± 1% -9.92% (p=0.000 n=10+8) IntervalSklAdd/size_1000-16 3.32µs ± 3% 3.16µs ± 3% -4.97% (p=0.000 n=10+10) IntervalSklAdd/size_100-16 3.21µs ± 2% 3.06µs ± 4% -4.84% (p=0.000 n=10+10) IntervalSklAdd/size_10-16 3.19µs ± 3% 3.05µs ± 3% -4.25% (p=0.000 n=10+10) IntervalSklAdd/size_10000-16 3.52µs ± 3% 3.43µs ± 3% -2.47% (p=0.005 n=10+10) IntervalSklAdd/size_1-16 1.70µs ± 1% 1.67µs ± 3% -1.57% (p=0.016 n=9+10) IntervalSklAddAndLookup/frac_0-16 592ns ± 6% 509ns ±33% ~ (p=0.469 n=10+10) IntervalSklAddAndLookup/frac_1-16 592ns ± 8% 509ns ±32% ~ (p=0.424 n=10+10) IntervalSklAddAndLookup/frac_2-16 590ns ± 5% 510ns ±33% ~ (p=0.481 n=10+10) IntervalSklAddAndLookup/frac_3-16 585ns ± 5% 514ns ±32% ~ (p=0.484 n=9+10) IntervalSklAddAndLookup/frac_4-16 592ns ± 4% 511ns ±34% ~ (p=0.483 n=9+10) IntervalSklAddAndLookup/frac_5-16 591ns ± 8% 504ns ±35% ~ (p=0.165 n=10+10) IntervalSklAddAndLookup/frac_6-16 586ns ± 6% 508ns ±36% ~ (p=0.210 n=10+10) IntervalSklAddAndLookup/frac_7-16 588ns ± 6% 516ns ±46% ~ (p=0.344 n=9+10) IntervalSklAddAndLookup/frac_8-16 593ns ± 5% 521ns ±54% ~ (p=0.240 n=10+10) IntervalSklAddAndLookup/frac_9-16 594ns ± 5% 546ns ±54% ~ (p=0.739 n=10+10) IntervalSklAddAndLookup/frac_10-16 595ns ± 4% 525ns ±64% ~ (p=0.138 n=10+10) name old alloc/op new alloc/op delta IntervalSklAdd/size_1-16 99.0B ± 0% 99.0B ± 0% ~ (all equal) IntervalSklAdd/size_10-16 201B ± 5% 217B ±17% ~ (p=0.986 n=10+10) IntervalSklAdd/size_1000-16 243B ±26% 220B ±15% ~ (p=0.913 n=10+8) IntervalSklAdd/size_10000-16 325B ±19% 352B ±24% ~ (p=0.159 n=10+10) IntervalSklAdd/size_100000-16 715B ±26% 737B ±23% ~ (p=0.670 n=10+10) IntervalSklAdd/size_1000000-16 1.92kB ±12% 1.93kB ±20% ~ (p=0.781 n=10+10) IntervalSklAddAndLookup/frac_0-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_1-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_2-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_3-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_4-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_5-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_6-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_7-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_8-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_9-16 0.00B 0.00B ~ (all equal) IntervalSklAddAndLookup/frac_10-16 0.00B 0.00B ~ (all equal) IntervalSklAdd/size_100000000-16 16.3kB ± 4% 17.3kB ± 2% +5.85% (p=0.000 n=9+9) IntervalSklAdd/size_10000000-16 5.47kB ± 8% 5.83kB ±10% +6.53% (p=0.010 n=9+10) TimestampCacheInsertion-16 631B ± 2% 677B ± 3% +7.21% (p=0.000 n=10+10) IntervalSklAdd/size_100-16 194B ± 6% 236B ±28% +21.71% (p=0.012 n=10+10) name old allocs/op new allocs/op delta TimestampCacheInsertion-16 8.00 ± 0% 8.00 ± 0% ~ (all equal) IntervalSklAdd/size_1-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_10-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_100-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_1000-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_10000-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_100000-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_1000000-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_10000000-16 0.00 0.00 ~ (all equal) IntervalSklAdd/size_100000000-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_0-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_1-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_2-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_3-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_4-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_5-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_6-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_7-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_8-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_9-16 0.00 0.00 ~ (all equal) IntervalSklAddAndLookup/frac_10-16 0.00 0.00 ~ (all equal) ``` --- pkg/kv/kvserver/tscache/interval_skl.go | 36 ++++++-------------- pkg/kv/kvserver/tscache/interval_skl_test.go | 9 +++++ 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/pkg/kv/kvserver/tscache/interval_skl.go b/pkg/kv/kvserver/tscache/interval_skl.go index f51f0b174bd3..450bad6866b5 100644 --- a/pkg/kv/kvserver/tscache/interval_skl.go +++ b/pkg/kv/kvserver/tscache/interval_skl.go @@ -15,17 +15,16 @@ import ( "bytes" "container/list" "context" - "encoding/binary" "fmt" "sync/atomic" "time" + "unsafe" "github.com/andy-kimball/arenaskl" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/syncutil" - "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" ) @@ -89,9 +88,7 @@ const ( ) const ( - encodedTsSize = 8 + 4 + 1 // walltime + logical + flags - encodedTxnIDSize = uuid.Size - encodedValSize = encodedTsSize + encodedTxnIDSize + encodedValSize = int(unsafe.Sizeof(cacheValue{})) // initialSklPageSize is the initial size of each page in the sklImpl's // intervalSkl. The pages start small to limit the memory footprint of @@ -1211,30 +1208,19 @@ func encodeValueSet(b []byte, keyVal, gapVal cacheValue) (ret []byte, meta uint1 } func decodeValue(b []byte) (ret []byte, val cacheValue) { - val.ts.WallTime = int64(binary.BigEndian.Uint64(b)) - val.ts.Logical = int32(binary.BigEndian.Uint32(b[8:])) - val.ts.Synthetic = b[12] != 0 - var err error - if val.txnID, err = uuid.FromBytes(b[encodedTsSize:encodedValSize]); err != nil { - panic(err) - } + // Copy and interpret the byte slice as a cacheValue. + valPtr := (*[encodedValSize]byte)(unsafe.Pointer(&val)) + copy(valPtr[:], b) ret = b[encodedValSize:] - return + return ret, val } func encodeValue(b []byte, val cacheValue) []byte { - l := len(b) - b = b[:l+encodedValSize] - binary.BigEndian.PutUint64(b[l:], uint64(val.ts.WallTime)) - binary.BigEndian.PutUint32(b[l+8:], uint32(val.ts.Logical)) - syn := byte(0) - if val.ts.Synthetic { - syn = 1 - } - b[l+12] = syn - if _, err := val.txnID.MarshalTo(b[l+encodedTsSize:]); err != nil { - panic(err) - } + // Interpret the cacheValue as a byte slice and copy. + prev := len(b) + b = b[:prev+encodedValSize] + valPtr := (*[encodedValSize]byte)(unsafe.Pointer(&val)) + copy(b[prev:], valPtr[:]) return b } diff --git a/pkg/kv/kvserver/tscache/interval_skl_test.go b/pkg/kv/kvserver/tscache/interval_skl_test.go index 7eb02934877e..b6ef956a8c4c 100644 --- a/pkg/kv/kvserver/tscache/interval_skl_test.go +++ b/pkg/kv/kvserver/tscache/interval_skl_test.go @@ -20,6 +20,7 @@ import ( "sync" "testing" "time" + "unsafe" "github.com/andy-kimball/arenaskl" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -1315,6 +1316,14 @@ func TestArenaReuse(t *testing.T) { require.Equal(t, expArenas, len(arenas)) } +// TestEncodedValSize tests that the encodedValSize does not change unexpectedly +// due to changes in the cacheValue struct size. If the struct size does change, +// if should be done so deliberately. +func TestEncodedValSize(t *testing.T) { + require.Equal(t, encodedValSize, int(unsafe.Sizeof(cacheValue{})), "encodedValSize should equal sizeof(cacheValue{})") + require.Equal(t, 32, encodedValSize, "encodedValSize should not change unexpectedly") +} + func BenchmarkIntervalSklAdd(b *testing.B) { const max = 500000000 // max size of range const txnID = "123"