Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Put a limit on the size of sampler keys #1364

Merged
merged 4 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion sample/dynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ func (d *DynamicSampler) Start() error {
}

func (d *DynamicSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool, reason string, key string) {
key = d.key.build(trace)
key, n := d.key.build(trace)
if n == maxKeyLength {
d.Logger.Debug().Logf("trace key hit max length of %d, truncating", maxKeyLength)
}
count := int(trace.DescendantCount())
rate = uint(d.dynsampler.GetSampleRateMulti(key, count))
if rate < 1 { // protect against dynsampler being broken even though it shouldn't be
Expand Down
6 changes: 4 additions & 2 deletions sample/dynamic_ema.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ type EMADynamicSampler struct {
burstDetectionDelay uint
maxKeys int
prefix string
lastMetrics map[string]int64

key *traceKey
keyFields []string
Expand Down Expand Up @@ -74,7 +73,10 @@ func (d *EMADynamicSampler) Start() error {
}

func (d *EMADynamicSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool, reason string, key string) {
key = d.key.build(trace)
key, n := d.key.build(trace)
if n == maxKeyLength {
d.Logger.Debug().Logf("trace key hit max length of %d, truncating", maxKeyLength)
}
count := int(trace.DescendantCount())
rate = uint(d.dynsampler.GetSampleRateMulti(key, count))
if rate < 1 { // protect against dynsampler being broken even though it shouldn't be
Expand Down
5 changes: 4 additions & 1 deletion sample/ema_throughput.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ func (d *EMAThroughputSampler) SetClusterSize(size int) {
}

func (d *EMAThroughputSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool, reason string, key string) {
key = d.key.build(trace)
key, n := d.key.build(trace)
if n == maxKeyLength {
d.Logger.Debug().Logf("trace key hit max length of %d, truncating", maxKeyLength)
}
count := int(trace.DescendantCount())
rate = uint(d.dynsampler.GetSampleRateMulti(key, count))
if rate < 1 { // protect against dynsampler being broken even though it shouldn't be
Expand Down
5 changes: 4 additions & 1 deletion sample/totalthroughput.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ func (d *TotalThroughputSampler) SetClusterSize(size int) {
}

func (d *TotalThroughputSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool, reason string, key string) {
key = d.key.build(trace)
key, n := d.key.build(trace)
if n == maxKeyLength {
d.Logger.Debug().Logf("trace key hit max length of %d, truncating", maxKeyLength)
}
count := int(trace.DescendantCount())
rate = uint(d.dynsampler.GetSampleRateMulti(key, count))
if rate < 1 { // protect against dynsampler being broken even though it shouldn't be
Expand Down
42 changes: 33 additions & 9 deletions sample/trace_key.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@ import (
"strconv"
"strings"

"github.com/honeycombio/refinery/generics"
"github.com/honeycombio/refinery/types"
)

// once a key gets this many unique values, it's off the charts in terms of uniqueness
// so we just stop looking for more.
// This is a safety valve to prevent us from someone sending a high-cardinality field
// in a giant trace.
const maxKeyLength = 100

type traceKey struct {
fields []string
rootOnlyFields []string
Expand Down Expand Up @@ -37,49 +44,66 @@ func newTraceKey(fields []string, useTraceLength bool) *traceKey {
}

// build, builds the trace key based on the configuration of the traceKeyGenerator
func (d *traceKey) build(trace *types.Trace) string {
// returns the number of values used to build the key
func (d *traceKey) build(trace *types.Trace) (string, int) {
fieldCount := 0
// fieldCollector gets all values from the fields listed in the config, even
// if they happen multiple times.
fieldCollector := make(map[string][]string)

// for each field, for each span, get the value of that field
spans := trace.GetSpans()
uniques := generics.NewSetWithCapacity[string](maxKeyLength)
outer:
for _, field := range d.fields {
for _, span := range spans {
if val, ok := span.Data[field]; ok {
u := fmt.Sprintf("%s/%v", field, val)
// don't bother to add it if we've already seen it
if uniques.Contains(u) {
continue
}
uniques.Add(u)
if len(uniques) >= maxKeyLength {
MikeGoldsmith marked this conversation as resolved.
Show resolved Hide resolved
break outer
MikeGoldsmith marked this conversation as resolved.
Show resolved Hide resolved
}
fieldCollector[field] = append(fieldCollector[field], fmt.Sprintf("%v", val))
}
}
}
// ok, now we have a map of fields to a list of all values for that field.
// ok, now we have a map of fields to a list of all unique values for that field.
// (unless it was huge, in which case we have a bunch of them)

var key string
var key strings.Builder
for _, field := range d.fields {
// sort and collapse list
sort.Strings(fieldCollector[field])
var prevStr string
for _, str := range fieldCollector[field] {
if str != prevStr {
key += str + "•"
key.WriteString(str)
key.WriteRune('•')
fieldCount += 1
}
prevStr = str
}
// get ready for the next element
key += ","
key.WriteRune(',')
}

if trace.RootSpan != nil {
for _, field := range d.rootOnlyFields {

if val, ok := trace.RootSpan.Data[field]; ok {
key += fmt.Sprintf("%v,", val)
key.WriteString(fmt.Sprintf("%v,", val))
fieldCount += 1
}
}
}

if d.useTraceLength {
key += strconv.FormatInt(int64(len(spans)), 10)
key.WriteString(strconv.FormatInt(int64(len(spans)), 10))
fieldCount += 1
}

return key
return key.String(), fieldCount
}
50 changes: 46 additions & 4 deletions sample/trace_key_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package sample

import (
"fmt"
"testing"

"github.com/honeycombio/refinery/types"
Expand Down Expand Up @@ -28,7 +29,9 @@ func TestKeyGeneration(t *testing.T) {

expected := "2•,200•,true•,/{slug}/home•,1"

assert.Equal(t, expected, generator.build(trace))
key, n := generator.build(trace)
assert.Equal(t, expected, key)
assert.Equal(t, 5, n)

fields = []string{"http.status_code", "request.path", "app.team.id", "important_field"}
useTraceLength = true
Expand Down Expand Up @@ -71,7 +74,9 @@ func TestKeyGeneration(t *testing.T) {

expected = "2•,200•,true•,/{slug}/home•,4"

assert.Equal(t, expected, generator.build(trace))
key, n = generator.build(trace)
assert.Equal(t, expected, key)
assert.Equal(t, 5, n)

// now test that multiple values across spans are condensed correctly
fields = []string{"http.status_code"}
Expand Down Expand Up @@ -115,7 +120,9 @@ func TestKeyGeneration(t *testing.T) {

expected = "200•404•,4"

assert.Equal(t, expected, generator.build(trace))
key, n = generator.build(trace)
assert.Equal(t, expected, key)
assert.Equal(t, 3, n)

// test field list with root prefix, only include the field from on the root span
// if it exists
Expand Down Expand Up @@ -153,5 +160,40 @@ func TestKeyGeneration(t *testing.T) {

expected = "200•404•,test,2"

assert.Equal(t, expected, generator.build(trace))
key, n = generator.build(trace)
assert.Equal(t, expected, key)
assert.Equal(t, 4, n)
}

func TestKeyLimits(t *testing.T) {
fields := []string{"fieldA", "fieldB"}
useTraceLength := true

generator := newTraceKey(fields, useTraceLength)

trace := &types.Trace{}

// generate too many spans with different unique values
for i := 0; i < 160; i++ {
trace.AddSpan(&types.Span{
Event: types.Event{
Data: map[string]interface{}{
"fieldA": fmt.Sprintf("value%d", i),
"fieldB": i,
},
},
})
}

trace.RootSpan = &types.Span{
Event: types.Event{
Data: map[string]interface{}{
"service_name": "test",
},
},
}

_, n := generator.build(trace)
// we should have maxKeyLength unique values
assert.Equal(t, maxKeyLength, n)
}
5 changes: 4 additions & 1 deletion sample/windowed_throughput.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ func (d *WindowedThroughputSampler) SetClusterSize(size int) {
}

func (d *WindowedThroughputSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool, reason string, key string) {
key = d.key.build(trace)
key, n := d.key.build(trace)
if n == maxKeyLength {
d.Logger.Debug().Logf("trace key hit max length of %d, truncating", maxKeyLength)
}
count := int(trace.DescendantCount())
rate = uint(d.dynsampler.GetSampleRateMulti(key, count))
if rate < 1 { // protect against dynsampler being broken even though it shouldn't be
Expand Down
Loading