Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[exporter/datadog]: updated remove sublayer stats calc and mutex #3378

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions exporter/datadogexporter/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ import (
"github.com/DataDog/datadog-agent/pkg/trace/exportable/obfuscate"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/pb"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/sampler"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/stats"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/traceutil"
)

// obfuscatePayload applies obfuscator rules to the trace payloads
Expand Down Expand Up @@ -74,18 +72,3 @@ func getAnalyzedSpans(sps []*pb.Span) []*pb.Span {
}
return top
}

// Compute Sublayers updates a spans metrics with relevant metadata so that it's duration and breakdown between different services can
// be accurately displayed in the Datadog UI
func computeSublayerMetrics(calculator *sublayerCalculator, t pb.Trace) {
root := traceutil.GetRoot(t)
traceutil.ComputeTopLevel(t)

subtraces := stats.ExtractSubtraces(t, root)
sublayers := make(map[*pb.Span][]stats.SublayerValue)
for _, subtrace := range subtraces {
subtraceSublayers := calculator.computeSublayers(subtrace.Trace)
sublayers[subtrace.Root] = subtraceSublayers
stats.SetSublayersOnSpan(subtrace.Root, subtraceSublayers)
}
}
9 changes: 6 additions & 3 deletions exporter/datadogexporter/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ const (
)

// ComputeAPMStats calculates the stats that should be submitted to APM about a given trace
func computeAPMStats(tracePayload *pb.TracePayload, calculator *sublayerCalculator, pushTime int64) *stats.Payload {
func computeAPMStats(tracePayload *pb.TracePayload, pushTime int64) *stats.Payload {
statsRawBuckets := make(map[int64]*stats.RawBucket)

// removing sublayer calc as part of work to port
// https://github.com/DataDog/datadog-agent/pull/7450/files
emptySublayer := make([]stats.SublayerValue, 0)

bucketTS := pushTime - statsBucketDuration
for _, trace := range tracePayload.Traces {
spans := getAnalyzedSpans(trace.Spans)
sublayers := calculator.computeSublayers(trace.Spans)
for _, span := range spans {

// TODO: While this is hardcoded to assume a single 10s buckets for now,
Expand All @@ -58,7 +61,7 @@ func computeAPMStats(tracePayload *pb.TracePayload, calculator *sublayerCalculat
Weight: 1,
TopLevel: true,
}
statsRawBucket.HandleSpan(weightedSpan, tracePayload.Env, []string{versionAggregationTag}, sublayers)
statsRawBucket.HandleSpan(weightedSpan, tracePayload.Env, []string{versionAggregationTag}, emptySublayer)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it equivalent that we pass an empty sublayer here (and thus leave empty the statsRawBucket fields) to using the new payload format from the Datadog Agent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😞 This is a good point. Upon closer inspection, no, it is not equivalent. I think due to changes in RawBucket and StatsRawBucket we're pinned here until the exportables package can get updated. I think our payloads would not be handled properly. fml. I'm going to close this...again lol...and followup with the impacted customers and provide some workarounds involving lowering batching time. Ultimately I think we're going to continue encountering issues in Fargate and other resource constrained environments until we both reduce the size of the collector itself (since contrib is large), as well as the resource consumption.

}
}

Expand Down
24 changes: 2 additions & 22 deletions exporter/datadogexporter/traces_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@ package datadogexporter

import (
"context"
"sync"
"time"

"github.com/DataDog/datadog-agent/pkg/trace/exportable/config/configdefs"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/obfuscate"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/pb"
"github.com/DataDog/datadog-agent/pkg/trace/exportable/stats"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"
Expand All @@ -33,28 +31,12 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/exporter/datadogexporter/utils"
)

// sublayerCalculator is thread safe wrapper of a sublayer
// calculator. Each trace exporter has a single sublayer
// calculator that is reused by each push
type sublayerCalculator struct {
sc *stats.SublayerCalculator
mutex sync.Mutex
}

// ComputeSublayers computes the sublayers of a trace
func (s *sublayerCalculator) computeSublayers(trace pb.Trace) []stats.SublayerValue {
s.mutex.Lock()
defer s.mutex.Unlock()
return s.sc.ComputeSublayers(trace)
}

type traceExporter struct {
params component.ExporterCreateParams
cfg *config.Config
ctx context.Context
edgeConnection TraceEdgeConnection
obfuscator *obfuscate.Obfuscator
calculator *sublayerCalculator
client *datadog.Client
denylister *Denylister
}
Expand Down Expand Up @@ -89,14 +71,12 @@ func newTracesExporter(ctx context.Context, params component.ExporterCreateParam
// a denylist for dropping ignored resources
denylister := NewDenylister(cfg.Traces.IgnoreResources)

calculator := &sublayerCalculator{sc: stats.NewSublayerCalculator()}
exporter := &traceExporter{
params: params,
cfg: cfg,
ctx: ctx,
edgeConnection: createTraceEdgeConnection(cfg.Traces.TCPAddr.Endpoint, cfg.API.Key, params.BuildInfo),
obfuscator: obfuscator,
calculator: calculator,
client: client,
denylister: denylister,
}
Expand Down Expand Up @@ -137,7 +117,7 @@ func (exp *traceExporter) pushTraceData(
// we largely apply the same logic as the serverless implementation, simplified a bit
// https://github.com/DataDog/datadog-serverless-functions/blob/f5c3aedfec5ba223b11b76a4239fcbf35ec7d045/aws/logs_monitoring/trace_forwarder/cmd/trace/main.go#L61-L83
fallbackHost := metadata.GetHost(exp.params.Logger, exp.cfg)
ddTraces, ms := convertToDatadogTd(td, fallbackHost, exp.calculator, exp.cfg, exp.denylister)
ddTraces, ms := convertToDatadogTd(td, fallbackHost, exp.cfg, exp.denylister)

// group the traces by env to reduce the number of flushes
aggregatedTraces := aggregateTracePayloadsByEnv(ddTraces)
Expand Down Expand Up @@ -169,7 +149,7 @@ func (exp *traceExporter) pushWithRetry(ctx context.Context, ddTracePayload *pb.
}

// this is for generating metrics like hits, errors, and latency, it uses a separate endpoint than Traces
stats := computeAPMStats(ddTracePayload, exp.calculator, pushTime)
stats := computeAPMStats(ddTracePayload, pushTime)
errStats := exp.edgeConnection.SendStats(context.Background(), stats, maxRetries)

if errStats != nil {
Expand Down
10 changes: 3 additions & 7 deletions exporter/datadogexporter/translate_traces.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ const (
)

// converts Traces into an array of datadog trace payloads grouped by env
func convertToDatadogTd(td pdata.Traces, fallbackHost string, calculator *sublayerCalculator, cfg *config.Config, blk *Denylister) ([]*pb.TracePayload, []datadog.Metric) {
func convertToDatadogTd(td pdata.Traces, fallbackHost string, cfg *config.Config, blk *Denylister) ([]*pb.TracePayload, []datadog.Metric) {
// TODO:
// do we apply other global tags, like version+service, to every span or only root spans of a service
// should globalTags['service'] take precedence over a trace's resource.service.name? I don't believe so, need to confirm
Expand All @@ -81,7 +81,7 @@ func convertToDatadogTd(td pdata.Traces, fallbackHost string, calculator *sublay
host = fallbackHost
}

payload := resourceSpansToDatadogSpans(rs, calculator, host, cfg, blk)
payload := resourceSpansToDatadogSpans(rs, host, cfg, blk)
traces = append(traces, &payload)

ms := metrics.DefaultMetrics("traces", host, uint64(pushTime))
Expand Down Expand Up @@ -118,7 +118,7 @@ func aggregateTracePayloadsByEnv(tracePayloads []*pb.TracePayload) []*pb.TracePa
}

// converts a Trace's resource spans into a trace payload
func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, calculator *sublayerCalculator, hostname string, cfg *config.Config, blk *Denylister) pb.TracePayload {
func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, hostname string, cfg *config.Config, blk *Denylister) pb.TracePayload {
// get env tag
env := utils.NormalizeTag(cfg.Env)

Expand Down Expand Up @@ -198,10 +198,6 @@ func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, calculator *sublayerCal
// TODO: allow users to configure specific spans to be marked as an analyzed spans for app analytics
top := getAnalyzedSpans(apiTrace.Spans)

// calculates span metrics for representing direction and timing among it's different services for display in
// service overview graphs
// see: https://github.com/DataDog/datadog-agent/blob/f69a7d35330c563e9cad4c5b8865a357a87cd0dc/pkg/trace/stats/sublayers.go#L204
computeSublayerMetrics(calculator, apiTrace.Spans)
payload.Transactions = append(payload.Transactions, top...)
payload.Traces = append(payload.Traces, apiTrace)
}
Expand Down
Loading