diff --git a/collect/cache/cuckoo.go b/collect/cache/cuckoo.go new file mode 100644 index 0000000000..4735c7a66b --- /dev/null +++ b/collect/cache/cuckoo.go @@ -0,0 +1,97 @@ +package cache + +import ( + "sync" + + "github.com/honeycombio/refinery/metrics" + cuckoo "github.com/panmari/cuckoofilter" +) + +// These are the names of metrics tracked for the cuckoo filter +const ( + CurrentLoadFactor = "cuckoo_current_load_factor" + FutureLoadFactor = "cuckoo_future_load_factor" + CurrentCapacity = "cuckoo_current_capacity" +) + +// This wraps a cuckoo filter implementation in a way that lets us keep it running forever +// without filling up. +// A cuckoo filter can't be emptied (you can delete individual items if you know what they are, +// but you can't get their names from the filter). Consequently, what we do is keep *two* filters, +// current and future. The current one is the one we use to check against, and when we add, we +// add to both. But the future one is started *after* the current one, so that when the current +// gets too full, we can discard it, replace it with future, and then start a new, empty future. +// This is why the future filter is nil until the current filter reaches .5. +// You must call Maintain() periodically, most likely from a goroutine. The call is cheap, +// and the timing isn't very critical. The effect of going above "capacity" is an increased +// false positive rate, but the filter continues to function. +type CuckooTraceChecker struct { + current *cuckoo.Filter + future *cuckoo.Filter + mut sync.RWMutex + capacity uint + met metrics.Metrics +} + +func NewCuckooTraceChecker(capacity uint, m metrics.Metrics) *CuckooTraceChecker { + return &CuckooTraceChecker{ + capacity: capacity, + current: cuckoo.NewFilter(capacity), + future: nil, + met: m, + } +} + +// Add puts a traceID into the filter. +func (c *CuckooTraceChecker) Add(traceID string) { + c.mut.Lock() + defer c.mut.Unlock() + c.current.Insert([]byte(traceID)) + // don't add anything to future if it doesn't exist yet + if c.future != nil { + c.future.Insert([]byte(traceID)) + } +} + +// Check tests if a traceID is (very probably) in the filter. +func (c *CuckooTraceChecker) Check(traceID string) bool { + b := []byte(traceID) + c.mut.RLock() + defer c.mut.RUnlock() + return c.current.Lookup(b) +} + +// Maintain should be called periodically; if the current filter is full, it replaces +// it with the future filter and creates a new future filter. +func (c *CuckooTraceChecker) Maintain() { + c.mut.RLock() + currentLoadFactor := c.current.LoadFactor() + c.met.Gauge(CurrentLoadFactor, currentLoadFactor) + if c.future != nil { + c.met.Gauge(FutureLoadFactor, c.future.LoadFactor()) + } + c.met.Gauge(CurrentCapacity, c.capacity) + c.mut.RUnlock() + + // once the current one is half loaded, we can start using the future one too + if c.future == nil && currentLoadFactor > 0.5 { + c.mut.Lock() + c.future = cuckoo.NewFilter(c.capacity) + c.mut.Unlock() + } + + // if the current one is full, cycle the filters + if currentLoadFactor > 0.99 { + c.mut.Lock() + defer c.mut.Unlock() + c.current = c.future + c.future = cuckoo.NewFilter(c.capacity) + } +} + +// SetNextCapacity adjusts the capacity that will be set for the future filter on the next replacement. +func (c *CuckooTraceChecker) SetNextCapacity(capacity uint) { + c.mut.Lock() + defer c.mut.Unlock() + c.capacity = capacity +} diff --git a/collect/cache/cuckooSentCache.go b/collect/cache/cuckooSentCache.go new file mode 100644 index 0000000000..820553b094 --- /dev/null +++ b/collect/cache/cuckooSentCache.go @@ -0,0 +1,189 @@ +package cache + +import ( + "sync" + "time" + + lru "github.com/hashicorp/golang-lru" + "github.com/honeycombio/refinery/config" + "github.com/honeycombio/refinery/metrics" + "github.com/honeycombio/refinery/types" +) + +// cuckooSentCache extends Refinery's legacy cache. It keeps the same records +// for kept traces but adds a pair of cuckoo filters to record dropped traces. +// This allows many more traces to be kept in the cache; now only kept records +// are retained in the cache of sentRecords. +// The size of the sent cache is still set based on the size of the live trace cache, +// and the size of the dropped cache is an independent value. + +// cuckooKeptRecord is an internal record we leave behind when keeping a trace to remember +// our decision for the future. We only store them if the record was kept. +type cuckooKeptRecord struct { + rate uint // sample rate used when sending the trace + spanCount uint // number of spans in the trace (we decorate the root span with this) +} + +func (t *cuckooKeptRecord) Kept() bool { + return true +} + +func (t *cuckooKeptRecord) Rate() uint { + return t.rate +} + +func (t *cuckooKeptRecord) DescendantCount() uint { + return uint(t.spanCount) +} + +func (t *cuckooKeptRecord) Count(*types.Span) { + t.spanCount++ +} + +// Make sure it implements TraceSentRecord +var _ TraceSentRecord = (*cuckooKeptRecord)(nil) + +// cuckooSentRecord is what we return when the trace was dropped. +// It's always the same one. +type cuckooDroppedRecord struct{} + +func (t *cuckooDroppedRecord) Kept() bool { + return false +} + +func (t *cuckooDroppedRecord) Rate() uint { + return 0 +} + +func (t *cuckooDroppedRecord) DescendantCount() uint { + return 0 +} + +func (t *cuckooDroppedRecord) Count(*types.Span) { +} + +// Make sure it implements TraceSentRecord +var _ TraceSentRecord = (*cuckooDroppedRecord)(nil) + +type cuckooSentCache struct { + kept *lru.Cache + dropped *CuckooTraceChecker + cfg config.SampleCacheConfig + + // The done channel is used to decide when to terminate the monitor + // goroutine. When resizing the cache, we write to the channel, but + // when terminating the system, call Stop() to close the channel. + // Either one causes the goroutine to shut down, and in resizing + // we then start a new monitor. + done chan struct{} + + // This mutex is for managing kept traces + keptMut sync.Mutex +} + +// Make sure it implements TraceSentCache +var _ TraceSentCache = (*cuckooSentCache)(nil) + +func NewCuckooSentCache(cfg config.SampleCacheConfig, met metrics.Metrics) (TraceSentCache, error) { + stc, err := lru.New(int(cfg.KeptSize)) + if err != nil { + return nil, err + } + dropped := NewCuckooTraceChecker(cfg.DroppedSize, met) + + cache := &cuckooSentCache{ + kept: stc, + dropped: dropped, + cfg: cfg, + done: make(chan struct{}), + } + go cache.monitor() + return cache, nil +} + +// goroutine to monitor the cache and cycle the size check periodically +func (c *cuckooSentCache) monitor() { + ticker := time.NewTicker(c.cfg.SizeCheckInterval) + for { + select { + case <-ticker.C: + c.dropped.Maintain() + case <-c.done: + return + } + } +} + +// Stop halts the monitor goroutine +func (c *cuckooSentCache) Stop() { + close(c.done) +} + +func (c *cuckooSentCache) Record(trace *types.Trace, keep bool) { + if keep { + // record this decision in the sent record LRU for future spans + sentRecord := cuckooKeptRecord{ + rate: trace.SampleRate, + spanCount: trace.DescendantCount(), + } + c.keptMut.Lock() + defer c.keptMut.Unlock() + c.kept.Add(trace.TraceID, &sentRecord) + return + } + // if we're not keeping it, save it in the dropped trace filter + c.dropped.Add(trace.TraceID) +} + +func (c *cuckooSentCache) Check(span *types.Span) (TraceSentRecord, bool) { + // was it dropped? + if c.dropped.Check(span.TraceID) { + // we recognize it as dropped, so just say so; there's nothing else to do + return &cuckooDroppedRecord{}, false + } + // was it kept? + c.keptMut.Lock() + defer c.keptMut.Unlock() + if sentRecord, found := c.kept.Get(span.TraceID); found { + if sr, ok := sentRecord.(*cuckooKeptRecord); ok { + // if we kept it, then this span being checked needs counting too + sr.Count(span) + return sr, true + } + } + // we have no memory of this place + return nil, false +} + +func (c *cuckooSentCache) Resize(cfg config.SampleCacheConfig) error { + stc, err := lru.New(int(cfg.KeptSize)) + if err != nil { + return err + } + + // grab all the items in the current cache; if it's larger than + // what will fit in the new one, discard the oldest ones + // (we don't have to do anything with the ones we discard, this is + // the trace decisions cache). + c.keptMut.Lock() + defer c.keptMut.Unlock() + keys := c.kept.Keys() + if len(keys) > int(cfg.KeptSize) { + keys = keys[len(keys)-int(cfg.KeptSize):] + } + // copy all the keys to the new cache in order + for _, k := range keys { + if v, found := c.kept.Get(k); found { + stc.Add(k, v) + } + } + c.kept = stc + + // also set up the drop cache size to change eventually + c.dropped.SetNextCapacity(cfg.DroppedSize) + + // shut down the old monitor and create a new one + c.done <- struct{}{} + go c.monitor() + return nil +} diff --git a/collect/cache/legacySentCache.go b/collect/cache/legacySentCache.go index b3a84834c0..b9e1f95c0d 100644 --- a/collect/cache/legacySentCache.go +++ b/collect/cache/legacySentCache.go @@ -2,6 +2,7 @@ package cache import ( lru "github.com/hashicorp/golang-lru" + "github.com/honeycombio/refinery/config" "github.com/honeycombio/refinery/types" ) @@ -71,3 +72,13 @@ func (c *legacySentCache) Check(span *types.Span) (TraceSentRecord, bool) { } return nil, false } + +// legacy Stop does nothing +// Stop halts the monitor goroutine +func (c *legacySentCache) Stop() { +} + +// legacy Resize does nothing +func (c *legacySentCache) Resize(cfg config.SampleCacheConfig) error { + return nil +} diff --git a/collect/cache/traceSentCache.go b/collect/cache/traceSentCache.go index e55e3b0def..b9e2d8f213 100644 --- a/collect/cache/traceSentCache.go +++ b/collect/cache/traceSentCache.go @@ -1,6 +1,7 @@ package cache import ( + "github.com/honeycombio/refinery/config" "github.com/honeycombio/refinery/types" ) @@ -21,4 +22,8 @@ type TraceSentCache interface { // Check tests if a trace corresponding to the span is in the cache; if found, it returns the appropriate TraceSentRecord and true, // else nil and false. Check(span *types.Span) (TraceSentRecord, bool) + // Stop halts the cache in preparation for shutdown + Stop() + // Resize adjusts the size of the cache according to the Config passed in + Resize(cfg config.SampleCacheConfig) error } diff --git a/collect/collect.go b/collect/collect.go index cf664bf9b9..62c9487946 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -72,7 +72,7 @@ type InMemCollector struct { cache cache.Cache datasetSamplers map[string]sample.Sampler - sentTraceCache cache.TraceSentCache + sampleTraceCache cache.TraceSentCache incoming chan *types.Span fromPeer chan *types.Span @@ -111,9 +111,23 @@ func (i *InMemCollector) Start() error { i.Metrics.Register(TraceSendEjectedFull, "counter") i.Metrics.Register(TraceSendEjectedMemsize, "counter") - i.sentTraceCache, err = cache.NewLegacySentCache(imcConfig.CacheCapacity * 5) // (keep 5x ring buffer size) - if err != nil { - return err + sampleCacheConfig := i.Config.GetSampleCacheConfig() + switch sampleCacheConfig.Type { + case "legacy", "": + i.sampleTraceCache, err = cache.NewLegacySentCache(imcConfig.CacheCapacity * 5) // (keep 5x ring buffer size) + if err != nil { + return err + } + case "cuckoo": + i.Metrics.Register(cache.CurrentCapacity, "gauge") + i.Metrics.Register(cache.FutureLoadFactor, "gauge") + i.Metrics.Register(cache.CurrentLoadFactor, "gauge") + i.sampleTraceCache, err = cache.NewCuckooSentCache(sampleCacheConfig, i.Metrics) + if err != nil { + return err + } + default: + return fmt.Errorf("validation failure - sampleTraceCache had invalid config type '%s'", sampleCacheConfig.Type) } i.incoming = make(chan *types.Span, imcConfig.CacheCapacity*3) @@ -165,8 +179,10 @@ func (i *InMemCollector) reloadConfigs() { } i.cache = c } else { - i.Logger.Debug().Logf("skipping reloading the cache on config reload because it hasn't changed capacity") + i.Logger.Debug().Logf("skipping reloading the in-memory cache on config reload because it hasn't changed capacity") } + + i.sampleTraceCache.Resize(i.Config.GetSampleCacheConfig()) } else { i.Logger.Error().WithField("cache", i.cache.(*cache.DefaultInMemCache)).Logf("skipping reloading the cache on config reload because it's not an in-memory cache") } @@ -414,7 +430,7 @@ func (i *InMemCollector) processSpan(sp *types.Span) { trace := i.cache.Get(sp.TraceID) if trace == nil { // if the trace has already been sent, just pass along the span - if sr, found := i.sentTraceCache.Check(sp); found { + if sr, found := i.sampleTraceCache.Check(sp); found { i.Metrics.Increment("trace_sent_cache_hit") // bump the count of records on this trace -- if the root span isn't // the last late span, then it won't be perfect, but it will be better than @@ -585,7 +601,7 @@ func (i *InMemCollector) send(trace *types.Trace, reason string) { trace.KeepSample = shouldSend logFields["reason"] = reason - i.sentTraceCache.Record(trace, shouldSend) + i.sampleTraceCache.Record(trace, shouldSend) // if we're supposed to drop this trace, and dry run mode is not enabled, then we're done. if !shouldSend && !i.Config.GetIsDryRun() { @@ -642,6 +658,9 @@ func (i *InMemCollector) Stop() error { if i.Transmission != nil { i.Transmission.Flush() } + + i.sampleTraceCache.Stop() + return nil } diff --git a/collect/collect_benchmark_test.go b/collect/collect_benchmark_test.go index 6171dd583d..2fccb5601b 100644 --- a/collect/collect_benchmark_test.go +++ b/collect/collect_benchmark_test.go @@ -46,12 +46,12 @@ func BenchmarkCollect(b *testing.B) { Config: conf, Logger: log, }, - BlockOnAddSpan: true, - cache: cache.NewInMemCache(3, metric, log), - incoming: make(chan *types.Span, 500), - fromPeer: make(chan *types.Span, 500), - datasetSamplers: make(map[string]sample.Sampler), - sentTraceCache: stc, + BlockOnAddSpan: true, + cache: cache.NewInMemCache(3, metric, log), + incoming: make(chan *types.Span, 500), + fromPeer: make(chan *types.Span, 500), + datasetSamplers: make(map[string]sample.Sampler), + sampleTraceCache: stc, } go coll.collect() diff --git a/collect/collect_test.go b/collect/collect_test.go index 0d444186ce..d7de953f37 100644 --- a/collect/collect_test.go +++ b/collect/collect_test.go @@ -49,7 +49,7 @@ func TestAddRootSpan(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -127,7 +127,7 @@ func TestOriginalSampleRateIsNotedInMetaField(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -185,7 +185,7 @@ func TestTransmittedSpansShouldHaveASampleRateOfAtLeastOne(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -246,7 +246,7 @@ func TestAddSpan(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -319,7 +319,7 @@ func TestDryRunMode(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -586,7 +586,7 @@ func TestOldMaxAlloc(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 1000) coll.fromPeer = make(chan *types.Span, 5) @@ -690,7 +690,7 @@ func TestStableMaxAlloc(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 1000) coll.fromPeer = make(chan *types.Span, 5) @@ -778,7 +778,7 @@ func TestAddSpanNoBlock(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 3) coll.fromPeer = make(chan *types.Span, 3) @@ -850,7 +850,7 @@ func TestAddSpanCount(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) @@ -919,7 +919,7 @@ func TestLateRootGetsSpanCount(t *testing.T) { coll.cache = c stc, err := cache.NewLegacySentCache(15) assert.NoError(t, err, "lru cache should start") - coll.sentTraceCache = stc + coll.sampleTraceCache = stc coll.incoming = make(chan *types.Span, 5) coll.fromPeer = make(chan *types.Span, 5) diff --git a/config/config.go b/config/config.go index 54ef588297..f250314976 100644 --- a/config/config.go +++ b/config/config.go @@ -172,6 +172,8 @@ type Config interface { GetCacheOverrunStrategy() string GetConfigMetadata() []ConfigMetadata + + GetSampleCacheConfig() SampleCacheConfig } type ConfigMetadata struct { diff --git a/config/config_test.go b/config/config_test.go index b86d186633..b147052afc 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -814,3 +814,58 @@ func TestHoneycombAdditionalErrorDefaults(t *testing.T) { assert.Equal(t, []string{"trace.span_id"}, c.GetAdditionalErrorFields()) } + +func TestSampleCacheParameters(t *testing.T) { + config, rules := createTempConfigs(t, ` + [InMemCollector] + CacheCapacity=1000 + + [HoneycombMetrics] + MetricsHoneycombAPI="http://honeycomb.io" + MetricsAPIKey="1234" + MetricsDataset="testDatasetName" + MetricsReportingInterval=3 + + `, "") + defer os.Remove(rules) + defer os.Remove(config) + + c, err := NewConfig(config, rules, func(err error) {}) + assert.NoError(t, err) + + s := c.GetSampleCacheConfig() + assert.Equal(t, "legacy", s.Type) + assert.Equal(t, uint(10_000), s.KeptSize) + assert.Equal(t, uint(1_000_000), s.DroppedSize) + assert.Equal(t, 10*time.Second, s.SizeCheckInterval) +} + +func TestSampleCacheParametersCuckoo(t *testing.T) { + config, rules := createTempConfigs(t, ` + [InMemCollector] + CacheCapacity=1000 + + [HoneycombMetrics] + MetricsHoneycombAPI="http://honeycomb.io" + MetricsAPIKey="1234" + MetricsDataset="testDatasetName" + MetricsReportingInterval=3 + + [SampleCache] + Type="cuckoo" + KeptSize=100_000 + DroppedSize=10_000_000 + SizeCheckInterval="60s" + `, "") + defer os.Remove(rules) + defer os.Remove(config) + + c, err := NewConfig(config, rules, func(err error) {}) + assert.NoError(t, err) + + s := c.GetSampleCacheConfig() + assert.Equal(t, "cuckoo", s.Type) + assert.Equal(t, uint(100_000), s.KeptSize) + assert.Equal(t, uint(10_000_000), s.DroppedSize) + assert.Equal(t, 1*time.Minute, s.SizeCheckInterval) +} diff --git a/config/file_config.go b/config/file_config.go index a00441cba4..13a3d9e8ca 100644 --- a/config/file_config.go +++ b/config/file_config.go @@ -62,6 +62,7 @@ type configContents struct { AdditionalErrorFields []string AddSpanCountToRoot bool CacheOverrunStrategy string + SampleCache SampleCacheConfig `validate:"required"` } type InMemoryCollectorCacheCapacity struct { @@ -106,6 +107,13 @@ type PeerManagementConfig struct { Timeout time.Duration } +type SampleCacheConfig struct { + Type string `validate:"required,oneof= legacy cuckoo"` + KeptSize uint `validate:"gte=500"` + DroppedSize uint `validate:"gte=100_000"` + SizeCheckInterval time.Duration `validate:"gte=1_000_000_000"` // 1 second minimum +} + // GRPCServerParameters allow you to configure the GRPC ServerParameters used // by refinery's own GRPC server: // https://pkg.go.dev/google.golang.org/grpc/keepalive#ServerParameters @@ -164,6 +172,10 @@ func NewConfig(config, rules string, errorCallback func(error)) (Config, error) c.SetDefault("AdditionalErrorFields", []string{"trace.span_id"}) c.SetDefault("AddSpanCountToRoot", false) c.SetDefault("CacheOverrunStrategy", "resize") + c.SetDefault("SampleCache.Type", "legacy") + c.SetDefault("SampleCache.KeptSize", 10_000) + c.SetDefault("SampleCache.DroppedSize", 1_000_000) + c.SetDefault("SampleCache.SizeCheckInterval", 10*time.Second) c.SetConfigFile(config) err := c.ReadInConfig() @@ -914,6 +926,13 @@ func (f *fileConfig) GetCacheOverrunStrategy() string { return f.conf.CacheOverrunStrategy } +func (f *fileConfig) GetSampleCacheConfig() SampleCacheConfig { + f.mux.RLock() + defer f.mux.RUnlock() + + return f.conf.SampleCache +} + // calculates an MD5 sum for a file that returns the same result as the md5sum command func calcMD5For(filename string) string { f, err := os.Open(filename) diff --git a/config/mock.go b/config/mock.go index 31c3371f93..9b47219de1 100644 --- a/config/mock.go +++ b/config/mock.go @@ -85,6 +85,7 @@ type MockConfig struct { AdditionalErrorFields []string AddSpanCountToRoot bool CacheOverrunStrategy string + SampleCache SampleCacheConfig CfgMetadata []ConfigMetadata Mux sync.RWMutex @@ -469,6 +470,13 @@ func (f *MockConfig) GetCacheOverrunStrategy() string { return f.CacheOverrunStrategy } +func (f *MockConfig) GetSampleCacheConfig() SampleCacheConfig { + f.Mux.RLock() + defer f.Mux.RUnlock() + + return f.SampleCache +} + func (f *MockConfig) GetConfigMetadata() []ConfigMetadata { f.Mux.RLock() defer f.Mux.RUnlock() diff --git a/config_complete.toml b/config_complete.toml index 972883a4f5..793288edf1 100644 --- a/config_complete.toml +++ b/config_complete.toml @@ -356,9 +356,9 @@ MetricsDataset = "Refinery Metrics" MetricsReportingInterval = 3 -#####################@## +####################### ## Prometheus Metrics ## -#####################@## +####################### [PrometheusMetrics] @@ -416,3 +416,50 @@ MetricsReportingInterval = 3 # https://github.com/grpc/grpc-go/blob/60a3a7e969c401ca16dbcd0108ad544fb35aa61c/internal/transport/http2_server.go#L231-L233 # Not eligible for live reload. # Timeout = "2s" + + + +################################ +## Sample Cache Configuration ## +################################ + +# Sample Cache Configuration controls the sample cache used to retain information about trace +# status after the sampling decision has been made. + +[SampleCacheConfig] + +# Type controls the type of sample cache used. +# "legacy" is a strategy where both keep and drop decisions are stored in a circular buffer that is +# 5x the size of the trace cache. This is Refinery's original sample cache strategy. +# "cuckoo" is a strategy where dropped traces are preserved in a "Cuckoo Filter", which can remember +# a much larger number of dropped traces, leaving capacity to retain a much larger number of kept traces. +# It is also more configurable. The cuckoo filter is recommended for most installations. +# Default is "legacy". +# Not eligible for live reload (you cannot change the type of cache with reload). +# Type = "cuckoo" + +# KeptSize controls the number of traces preserved in the cuckoo kept traces cache. +# Refinery keeps a record of each trace that was kept and sent to Honeycomb, along with some +# statistical information. This is most useful in cases where the trace was sent before sending +# the root span, so that the root span can be decorated with accurate metadata. +# Default is 10_000 traces (each trace in this cache consumes roughly 200 bytes). +# Does not apply to the "legacy" type of cache. +# Eligible for live reload. +# KeptSize = 10_000 + +# DroppedSize controls the size of the cuckoo dropped traces cache. +# This cache consumes 4-6 bytes per trace at a scale of millions of traces. +# Changing its size with live reload sets a future limit, but does not have an immediate effect. +# Default is 1_000_000 traces. +# Does not apply to the "legacy" type of cache. +# Eligible for live reload. +# DroppedSize = 1_000_000 + +# SizeCheckInterval controls the duration of how often the cuckoo cache re-evaluates +# the remaining capacity of its dropped traces cache and possibly cycles it. +# This cache is quite resilient so it doesn't need to happen very often, but the +# operation is also inexpensive. +# Default is 10 seconds. +# Does not apply to the "legacy" type of cache. +# Eligible for live reload. +# SizeCheckInterval = "10s" diff --git a/go.mod b/go.mod index 7d21988296..a051989d3d 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 // indirect github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52 // indirect @@ -51,6 +52,7 @@ require ( github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/panmari/cuckoofilter v1.0.3 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.2.0 // indirect diff --git a/go.sum b/go.sum index b9d33b4a72..d6c0adf5f5 100644 --- a/go.sum +++ b/go.sum @@ -63,6 +63,8 @@ github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnht github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E= +github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -232,6 +234,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/panmari/cuckoofilter v1.0.3 h1:MgTxXG2aP0YPWFyY1sKt1caWidUFREk9BaOnakDKZOU= +github.com/panmari/cuckoofilter v1.0.3/go.mod h1:O7+ZOHxwlADJ1So2/ZsKBExDwILNPZsyt77zN0ZTBLg= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.0.5 h1:ipoSadvV8oGUjnUbMub59IDPPwfxF694nG/jwbMiyQg=