Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: only create real spans when session tracing/sampling #61777

Merged
merged 2 commits into from
Mar 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ func (ex *connExecutor) close(ctx context.Context, closeType closeType) {
panic(errors.AssertionFailedf("unexpected state in conn executor after ApplyWithPayload %T", t))
}
}
if util.CrdbTestBuild && ex.state.sp != nil {
if util.CrdbTestBuild && ex.state.Ctx != nil {
panic(errors.AssertionFailedf("txn span not closed in state %s", ex.machine.CurState()))
}
} else if closeType == externalTxnClose {
Expand Down
5 changes: 1 addition & 4 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ func (ex *connExecutor) execStmtInOpenState(
alreadyRecording := ex.transitionCtx.sessionTracing.Enabled()
stmtTraceThreshold := traceStmtThreshold.Get(&ex.planner.execCfg.Settings.SV)
if !alreadyRecording && stmtTraceThreshold > 0 {
ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer)
ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer, tracing.WithForceRealSpan())
stmtThresholdSpan.SetVerbose(true)
}

Expand Down Expand Up @@ -1544,9 +1544,6 @@ func (ex *connExecutor) recordTransaction(ev txnEvent, implicit bool, txnStart t
func createRootOrChildSpan(
parentCtx context.Context, opName string, tr *tracing.Tracer, os ...tracing.SpanOption,
) (context.Context, *tracing.Span) {
// WithForceRealSpan is used to support the use of session tracing, which
// may start recording on this span.
os = append(os, tracing.WithForceRealSpan())
return tracing.EnsureChildSpan(parentCtx, tr, opName, os...)
}

Expand Down
14 changes: 8 additions & 6 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1642,16 +1642,18 @@ func (st *SessionTracing) StartTracing(
return nil
}

// If we're inside a transaction, start recording on the txn span.
// If we're inside a transaction, hijack the txn's ctx with one that has a
// recording span.
if _, ok := st.ex.machine.CurState().(stateNoTxn); !ok {
sp := tracing.SpanFromContext(st.ex.state.Ctx)
if sp == nil {
txnCtx := st.ex.state.Ctx
if sp := tracing.SpanFromContext(txnCtx); sp == nil {
return errors.Errorf("no txn span for SessionTracing")
}
// We want to clear out any existing recordings so they don't show up in
// future traces.
sp.ResetRecording()

newTxnCtx, sp := tracing.EnsureChildSpan(txnCtx, st.ex.server.cfg.AmbientCtx.Tracer,
"session tracing", tracing.WithForceRealSpan())
sp.SetVerbose(true)
st.ex.state.Ctx = newTxnCtx
st.firstTxnSpan = sp
}

Expand Down
30 changes: 9 additions & 21 deletions pkg/sql/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,9 @@ type instrumentationHelper struct {
finishCollectionDiagnostics func()
withStatementTrace func(trace tracing.Recording, stmt string)

sp *tracing.Span
// If true, the instrumentation helper is responsible for finishing sp.
finishSpan bool
origCtx context.Context
evalCtx *tree.EvalContext
sp *tracing.Span
origCtx context.Context
evalCtx *tree.EvalContext

// If savePlanForStats is true, the explainPlan will be collected and returned
// via PlanForStats().
Expand Down Expand Up @@ -167,8 +165,6 @@ func (ih *instrumentationHelper) Setup(
ih.savePlanForStats = appStats.shouldSaveLogicalPlanDescription(fingerprint, implicitTxn)

if sp := tracing.SpanFromContext(ctx); sp != nil {
ih.sp = sp
ih.finishSpan = false
if sp.IsVerbose() {
// If verbose tracing was enabled at a higher level, stats
// collection is enabled so that stats are shown in the traces, but
Expand All @@ -186,18 +182,10 @@ func (ih *instrumentationHelper) Setup(

if !ih.collectBundle && ih.withStatementTrace == nil && ih.outputMode == unmodifiedOutput {
if ih.collectExecStats {
// If we need to collect stats, create a non-verbose child span. Stats
// will be added as structured metadata and processed in Finish.
ih.origCtx = ctx
if ih.sp != nil {
// The span present in the context is sufficient for us to
// record stats as structured metadata, so there is nothing else
// to do.
return ctx, true
}
// This is an edge case when the span is not present in the original
// context for some reason. This should *never* happen, but we
// choose to be defensive about it.
newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement")
ih.finishSpan = true
newCtx, ih.sp = tracing.EnsureChildSpan(ctx, cfg.AmbientCtx.Tracer, "traced statement", tracing.WithForceRealSpan())
return newCtx, true
}
return ctx, false
Expand All @@ -208,7 +196,6 @@ func (ih *instrumentationHelper) Setup(
ih.origCtx = ctx
ih.evalCtx = p.EvalContext()
newCtx, ih.sp = tracing.StartVerboseTrace(ctx, cfg.AmbientCtx.Tracer, "traced statement")
ih.finishSpan = true
return newCtx, true
}

Expand All @@ -224,9 +211,10 @@ func (ih *instrumentationHelper) Finish(
retErr error,
) error {
ctx := ih.origCtx
if ih.finishSpan {
ih.sp.Finish()
if ih.sp == nil {
return retErr
}
ih.sp.Finish()

// Record the statement information that we've collected.
// Note that in case of implicit transactions, the trace contains the auto-commit too.
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/contention_event
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ INSERT INTO kv VALUES('k', 'v')

user root

statement ok
SET TRACING = on

# Scan all ranges of the table (note that we have intentionally
# split it into at least six ranges). This is better than a point
# lookup in that it gives tracing more of a chance to get things
Expand All @@ -47,9 +50,6 @@ user root
#
# NB: the contention event is not in our trace span but in one of its
# children, so it wouldn't be found if we filtered by the trace span ID.
#
# NB: this needs the 5node-pretend59315 config because otherwise the span is not
# tracked.
query B
WITH spans AS (
SELECT span_id
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inflight_trace_spans
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ user testuser
statement ok
BEGIN

statement ok
SET TRACING = on

let $curr_trace_id
SELECT * FROM crdb_internal.trace_id()

Expand Down
7 changes: 6 additions & 1 deletion pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -3634,7 +3634,12 @@ may increase either contention or retry errors, or both.`,
if sp == nil {
return tree.DNull, nil
}
return tree.NewDInt(tree.DInt(sp.GetRecording()[0].TraceID)), nil

traceID := sp.TraceID()
if traceID == 0 {
return tree.DNull, nil
}
return tree.NewDInt(tree.DInt(traceID)), nil
},
Info: "Returns the current trace ID or an error if no trace is open.",
// NB: possibly this is or could be made stable, but it's not worth it.
Expand Down
40 changes: 24 additions & 16 deletions pkg/sql/txn_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ type txnState struct {

// Ctx is the context for everything running in this SQL txn.
// This is only set while the session's state is not stateNoTxn.
//
// It also embeds the tracing span associated with the SQL txn. These are
// often root spans, as SQL txns are frequently the level at which we do
// tracing. This context is hijacked when session tracing is enabled.
Ctx context.Context

// sp is the span corresponding to the SQL txn. These are often root spans, as
// SQL txns are frequently the level at which we do tracing.
sp *tracing.Span
// recordingThreshold, is not zero, indicates that sp is recording and that
// the recording should be dumped to the log if execution of the transaction
// took more than this.
Expand Down Expand Up @@ -154,25 +155,30 @@ func (ts *txnState) resetForNewSQLTxn(
// Create a context for this transaction. It will include a root span that
// will contain everything executed as part of the upcoming SQL txn, including
// (automatic or user-directed) retries. The span is closed by finishSQLTxn().
// TODO(andrei): figure out how to close these spans on server shutdown? Ties
// into a larger discussion about how to drain SQL and rollback open txns.
opName := sqlTxnName
txnCtx, sp := createRootOrChildSpan(connCtx, opName, tranCtx.tracer)
alreadyRecording := tranCtx.sessionTracing.Enabled()

var txnCtx context.Context
var sp *tracing.Span
if alreadyRecording {
// WithForceRealSpan is used to support the use of session tracing,
// which will start recording on this span.
txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer, tracing.WithForceRealSpan())
} else {
txnCtx, sp = createRootOrChildSpan(connCtx, opName, tranCtx.tracer)
}
if txnType == implicitTxn {
sp.SetTag("implicit", "true")
}

alreadyRecording := tranCtx.sessionTracing.Enabled()
duration := traceTxnThreshold.Get(&tranCtx.settings.SV)
if !alreadyRecording && (duration > 0) {
sp.SetVerbose(true)
ts.recordingThreshold = duration
ts.recordingStart = timeutil.Now()
}

ts.sp = sp
ts.Ctx, ts.cancel = contextutil.WithCancel(txnCtx)

ts.mon.Start(ts.Ctx, tranCtx.connMon, mon.BoundAccount{} /* reserved */)
ts.mu.Lock()
ts.mu.stmtCount = 0
Expand Down Expand Up @@ -207,16 +213,16 @@ func (ts *txnState) finishSQLTxn() {
ts.cancel()
ts.cancel = nil
}
if ts.sp == nil {
sp := tracing.SpanFromContext(ts.Ctx)
if sp == nil {
panic(errors.AssertionFailedf("No span in context? Was resetForNewSQLTxn() called previously?"))
}

if ts.recordingThreshold > 0 {
logTraceAboveThreshold(ts.Ctx, ts.sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart))
logTraceAboveThreshold(ts.Ctx, sp.GetRecording(), "SQL txn", ts.recordingThreshold, timeutil.Since(ts.recordingStart))
}

ts.sp.Finish()
ts.sp = nil
sp.Finish()
ts.Ctx = nil
ts.mu.Lock()
ts.mu.txn = nil
Expand All @@ -239,10 +245,12 @@ func (ts *txnState) finishExternalTxn() {
ts.cancel()
ts.cancel = nil
}
if ts.sp != nil {
ts.sp.Finish()

if ts.Ctx != nil {
if sp := tracing.SpanFromContext(ts.Ctx); sp != nil {
sp.Finish()
}
}
ts.sp = nil
ts.Ctx = nil
ts.mu.Lock()
ts.mu.txn = nil
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/txn_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ func (tc *testContext) createOpenState(typ txnType) (fsm.State, *txnState) {
ts := txnState{
Ctx: ctx,
connCtx: tc.ctx,
sp: sp,
cancel: cancel,
sqlTimestamp: timeutil.Now(),
priority: roachpb.NormalUserPriority,
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/tracing/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ func (sp *Span) SetVerboseRecursively(to bool) {

// ResetRecording clears any previously recorded information. This doesn't
// affect any auxiliary trace sinks such as net/trace or zipkin.
//
// TODO(irfansharif): Remove this, it's no longer used.
func (sp *Span) ResetRecording() {
sp.i.ResetRecording()
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/tracing/span_inner.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ type spanInner struct {
}

func (s *spanInner) TraceID() uint64 {
if s.isNoop() {
return 0
}
return s.crdb.traceID
}

Expand Down Expand Up @@ -73,6 +76,9 @@ func (s *spanInner) ResetRecording() {
}

func (s *spanInner) GetRecording() Recording {
if s.isNoop() {
return nil
}
// If the span is not verbose, optimize by avoiding the tags.
// This span is likely only used to carry payloads around.
wantTags := s.crdb.recordingType() == RecordingVerbose
Expand Down