Skip to content

Commit

Permalink
perf(blooms): Avoid tiny string allocations for insert cache (#13487)
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
  • Loading branch information
chaudum authored Jul 11, 2024
1 parent 2c053ee commit 652ad24
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions pkg/storage/bloom/v1/bloom_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package v1

import (
"math"
"unsafe"

"github.com/go-kit/log/level"

Expand Down Expand Up @@ -216,10 +217,12 @@ outer:
for itr.Next() {
tok := itr.At()
tokens++

// TODO[owen-d]: [n]byte this
str := string(tok)
_, found := bt.cache[str] // A cache is used ahead of the SBF, as it cuts out the costly operations of scaling bloom filters
if found {
// To avoid allocations, an unsafe string can be used to check ownership in cache.
str := unsafe.String(unsafe.SliceData(tok), len(tok))
// A cache is used ahead of the SBF, as it cuts out the costly operations of scaling bloom filters
if _, found := bt.cache[str]; found {
cachedInserts++
continue
}
Expand All @@ -246,6 +249,7 @@ outer:

// only register the key in the cache if it was successfully added to the bloom
// as can prevent us from trying subsequent copies
str = string(tok)
bt.cache[str] = nil
if len(bt.cache) >= cacheSize { // While crude, this has proven efficient in performance testing. This speaks to the similarity in log lines near each other
clear(bt.cache)
Expand Down

0 comments on commit 652ad24

Please sign in to comment.