Skip to content

Commit

Permalink
feat(config): add AddCountsToRoot to report counts data for traces (#910
Browse files Browse the repository at this point in the history
)

## Which problem is this PR solving?

- #555 

## Short description of the changes

This PR introduce a new config value `AddCountsToRoot`. If `true`, then
Refinery will ignore the `AddSpanCountToRoot` setting and add the
following fields to the root span based on the values at the time the
sampling decision was made:
- `meta.span_count`: the number of child spans on the trace 
- `meta.span_event_count`: the number of span events on the trace 
- `meta.span_link_count`: the number of span links on the trace 
- `meta.event_count`: the number of honeycomb events on the trace

fix #555
  • Loading branch information
VinozzZ authored Nov 30, 2023
1 parent dcae701 commit ba71dac
Show file tree
Hide file tree
Showing 14 changed files with 487 additions and 69 deletions.
90 changes: 71 additions & 19 deletions collect/cache/cuckooSentCache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,73 @@ import (
// The size of the sent cache is still set based on the size of the live trace cache,
// and the size of the dropped cache is an independent value.

// cuckooKeptRecord is an internal record we leave behind when keeping a trace to remember
// keptTraceCacheEntry is an internal record we leave behind when keeping a trace to remember
// our decision for the future. We only store them if the record was kept.
type cuckooKeptRecord struct {
rate uint // sample rate used when sending the trace
spanCount uint // number of spans in the trace (we decorate the root span with this)
type keptTraceCacheEntry struct {
rate uint32 // sample rate used when sending the trace
eventCount uint32 // number of descendants in the trace (we decorate the root span with this)
spanEventCount uint32 // number of span events in the trace
spanLinkCount uint32 // number of span links in the trace
spanCount uint32 // number of spans in the trace
}

func (t *cuckooKeptRecord) Kept() bool {
func NewKeptTraceCacheEntry(trace *types.Trace) *keptTraceCacheEntry {
if trace == nil {
return &keptTraceCacheEntry{}
}

return &keptTraceCacheEntry{
rate: uint32(trace.SampleRate),
eventCount: trace.DescendantCount(),
spanEventCount: trace.SpanEventCount(),
spanLinkCount: trace.SpanLinkCount(),
spanCount: trace.SpanCount(),
}
}

func (t *keptTraceCacheEntry) Kept() bool {
return true
}

func (t *cuckooKeptRecord) Rate() uint {
return t.rate
func (t *keptTraceCacheEntry) Rate() uint {
return uint(t.rate)
}

// DescendantCount returns the count of items associated with the trace, including all types of children like span links and span events.
func (t *keptTraceCacheEntry) DescendantCount() uint {
return uint(t.eventCount)
}

// SpanEventCount returns the count of span events in the trace.
func (t *keptTraceCacheEntry) SpanEventCount() uint {
return uint(t.spanEventCount)
}

func (t *cuckooKeptRecord) DescendantCount() uint {
// SpanLinkCount returns the count of span links in the trace.
func (t *keptTraceCacheEntry) SpanLinkCount() uint {
return uint(t.spanLinkCount)
}

// SpanCount returns the count of spans in the trace.
func (t *keptTraceCacheEntry) SpanCount() uint {
return uint(t.spanCount)
}

func (t *cuckooKeptRecord) Count(*types.Span) {
t.spanCount++
// Count records additional spans in the cache record.
func (t *keptTraceCacheEntry) Count(s *types.Span) {
t.eventCount++
switch s.AnnotationType() {
case types.SpanAnnotationTypeSpanEvent:
t.spanEventCount++
case types.SpanAnnotationTypeLink:
t.spanLinkCount++
default:
t.spanCount++
}
}

// Make sure it implements TraceSentRecord
var _ TraceSentRecord = (*cuckooKeptRecord)(nil)
var _ TraceSentRecord = (*keptTraceCacheEntry)(nil)

// cuckooSentRecord is what we return when the trace was dropped.
// It's always the same one.
Expand All @@ -59,14 +101,26 @@ func (t *cuckooDroppedRecord) DescendantCount() uint {
return 0
}

func (t *cuckooDroppedRecord) SpanEventCount() uint {
return 0
}

func (t *cuckooDroppedRecord) SpanLinkCount() uint {
return 0
}

func (t *cuckooDroppedRecord) SpanCount() uint {
return 0
}

func (t *cuckooDroppedRecord) Count(*types.Span) {
}

// Make sure it implements TraceSentRecord
var _ TraceSentRecord = (*cuckooDroppedRecord)(nil)

type cuckooSentCache struct {
kept *lru.Cache[string, *cuckooKeptRecord]
kept *lru.Cache[string, *keptTraceCacheEntry]
dropped *CuckooTraceChecker
cfg config.SampleCacheConfig

Expand All @@ -85,7 +139,7 @@ type cuckooSentCache struct {
var _ TraceSentCache = (*cuckooSentCache)(nil)

func NewCuckooSentCache(cfg config.SampleCacheConfig, met metrics.Metrics) (TraceSentCache, error) {
stc, err := lru.New[string, *cuckooKeptRecord](int(cfg.KeptSize))
stc, err := lru.New[string, *keptTraceCacheEntry](int(cfg.KeptSize))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -122,13 +176,11 @@ func (c *cuckooSentCache) Stop() {
func (c *cuckooSentCache) Record(trace *types.Trace, keep bool) {
if keep {
// record this decision in the sent record LRU for future spans
sentRecord := cuckooKeptRecord{
rate: trace.SampleRate,
spanCount: trace.DescendantCount(),
}
sentRecord := NewKeptTraceCacheEntry(trace)

c.keptMut.Lock()
defer c.keptMut.Unlock()
c.kept.Add(trace.TraceID, &sentRecord)
c.kept.Add(trace.TraceID, sentRecord)
return
}
// if we're not keeping it, save it in the dropped trace filter
Expand All @@ -154,7 +206,7 @@ func (c *cuckooSentCache) Check(span *types.Span) (TraceSentRecord, bool) {
}

func (c *cuckooSentCache) Resize(cfg config.SampleCacheConfig) error {
stc, err := lru.New[string, *cuckooKeptRecord](int(cfg.KeptSize))
stc, err := lru.New[string, *keptTraceCacheEntry](int(cfg.KeptSize))
if err != nil {
return err
}
Expand Down
6 changes: 6 additions & 0 deletions collect/cache/traceSentCache.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ type TraceSentRecord interface {
Rate() uint
// DescendantCount returns the count of items associated with the trace, including all types of children like span links and span events.
DescendantCount() uint
// SpanEventCount returns the count of span events in the trace.
SpanEventCount() uint
// SpanLinkCount returns the count of span links in the trace.
SpanLinkCount() uint
// SpanCount returns the count of child spans in the trace.
SpanCount() uint
// Count records additional spans in the totals
Count(*types.Span)
}
Expand Down
45 changes: 35 additions & 10 deletions collect/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ func (i *InMemCollector) processSpan(sp *types.Span) {
// bump the count of records on this trace -- if the root span isn't
// the last late span, then it won't be perfect, but it will be better than
// having none at all
i.dealWithSentTrace(sr.Kept(), sr.Rate(), sr.DescendantCount(), sp)
i.dealWithSentTrace(sr, sp)
return
}
// trace hasn't already been sent (or this span is really old); let's
Expand Down Expand Up @@ -421,7 +421,7 @@ func (i *InMemCollector) processSpan(sp *types.Span) {
// if the trace we got back from the cache has already been sent, deal with the
// span.
if trace.Sent {
i.dealWithSentTrace(trace.KeepSample, trace.SampleRate, trace.DescendantCount(), sp)
i.dealWithSentTrace(cache.NewKeptTraceCacheEntry(trace), sp)
}

// great! trace is live. add the span.
Expand Down Expand Up @@ -483,14 +483,16 @@ func (i *InMemCollector) ProcessSpanImmediately(sp *types.Span, keep bool, sampl
// dealWithSentTrace handles a span that has arrived after the sampling decision
// on the trace has already been made, and it obeys that decision by either
// sending the span immediately or dropping it.
func (i *InMemCollector) dealWithSentTrace(keep bool, sampleRate uint, spanCount uint, sp *types.Span) {
func (i *InMemCollector) dealWithSentTrace(tr cache.TraceSentRecord, sp *types.Span) {
if i.Config.GetAddRuleReasonToTrace() {
sp.Data["meta.refinery.reason"] = "late"
}
if i.hostname != "" {
sp.Data["meta.refinery.local_hostname"] = i.hostname
}
isDryRun := i.Config.GetIsDryRun()
keep := tr.Kept()

if isDryRun {
// if dry run mode is enabled, we keep all traces and mark the spans with the sampling decision
sp.Data[config.DryRunFieldName] = keep
Expand All @@ -503,10 +505,18 @@ func (i *InMemCollector) dealWithSentTrace(keep bool, sampleRate uint, spanCount
}
if keep {
i.Logger.Debug().WithField("trace_id", sp.TraceID).Logf("Sending span because of previous decision to send trace")
mergeTraceAndSpanSampleRates(sp, sampleRate, isDryRun)
mergeTraceAndSpanSampleRates(sp, tr.Rate(), isDryRun)
// if this span is a late root span, possibly update it with our current span count
if i.Config.GetAddSpanCountToRoot() && i.isRootSpan(sp) {
sp.Data["meta.span_count"] = int64(spanCount)
if i.isRootSpan(sp) {
if i.Config.GetAddCountsToRoot() {
sp.Data["meta.span_event_count"] = int64(tr.SpanEventCount())
sp.Data["meta.span_link_count"] = int64(tr.SpanLinkCount())
sp.Data["meta.span_count"] = int64(tr.SpanCount())
sp.Data["meta.event_count"] = int64(tr.DescendantCount())
} else if i.Config.GetAddSpanCountToRoot() {
sp.Data["meta.span_count"] = int64(tr.DescendantCount())
}

}
i.addAdditionalAttributes(sp)
i.Transmission.EnqueueSpan(sp)
Expand Down Expand Up @@ -589,8 +599,16 @@ func (i *InMemCollector) send(trace *types.Trace, sendReason string) {
}

// If we have a root span, update it with the count before determining the SampleRate.
if i.Config.GetAddSpanCountToRoot() && trace.RootSpan != nil {
trace.RootSpan.Data["meta.span_count"] = int64(trace.DescendantCount())
if trace.RootSpan != nil {
rs := trace.RootSpan
if i.Config.GetAddCountsToRoot() {
rs.Data["meta.span_event_count"] = int64(trace.SpanEventCount())
rs.Data["meta.span_link_count"] = int64(trace.SpanLinkCount())
rs.Data["meta.span_count"] = int64(trace.SpanCount())
rs.Data["meta.event_count"] = int64(trace.DescendantCount())
} else if i.Config.GetAddSpanCountToRoot() {
rs.Data["meta.span_count"] = int64(trace.DescendantCount())
}
}

// use sampler key to find sampler; create and cache if not found
Expand Down Expand Up @@ -638,8 +656,15 @@ func (i *InMemCollector) send(trace *types.Trace, sendReason string) {

// update the root span (if we have one, which we might not if the trace timed out)
// with the final total as of our send time
if i.Config.GetAddSpanCountToRoot() && i.isRootSpan(sp) {
sp.Data["meta.span_count"] = int64(trace.DescendantCount())
if i.isRootSpan(sp) {
if i.Config.GetAddCountsToRoot() {
sp.Data["meta.span_event_count"] = int64(trace.SpanEventCount())
sp.Data["meta.span_link_count"] = int64(trace.SpanLinkCount())
sp.Data["meta.span_count"] = int64(trace.SpanCount())
sp.Data["meta.event_count"] = int64(trace.DescendantCount())
} else if i.Config.GetAddSpanCountToRoot() {
sp.Data["meta.span_count"] = int64(trace.DescendantCount())
}
}

isDryRun := i.Config.GetIsDryRun()
Expand Down
Loading

0 comments on commit ba71dac

Please sign in to comment.