Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/tracing: simplify log messages in trace recordings #70502

Merged
merged 3 commits into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ require (
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.1
github.com/opennota/wd v0.0.0-20180911144301-b446539ab1e7 // indirect
github.com/opentracing/opentracing-go v1.2.0
github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea // indirect
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5
github.com/pierrre/geohash v1.0.0
Expand Down
11 changes: 2 additions & 9 deletions pkg/kv/bulk/sst_batcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"math/rand"
"reflect"
"runtime"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
Expand Down Expand Up @@ -225,14 +224,8 @@ func runTestImport(t *testing.T, batchSizeValue int64) {
}
}
var splitRetries int
for _, rec := range getRec() {
for _, l := range rec.Logs {
for _, line := range l.Fields {
if strings.Contains(line.Value.StripMarkers(), "SSTable cannot be added spanning range bounds") {
splitRetries++
}
}
}
for _, sp := range getRec() {
splitRetries += tracing.CountLogMessages(sp, "SSTable cannot be added spanning range bounds")
}
if splitRetries != expectedSplitRetries {
t.Fatalf("expected %d split-caused retries, got %d", expectedSplitRetries, splitRetries)
Expand Down
22 changes: 8 additions & 14 deletions pkg/kv/kvserver/concurrency/concurrency_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1018,16 +1018,14 @@ func (m *monitor) collectRecordings() string {
rec := g.collect()
for _, span := range rec {
for _, log := range span.Logs {
for _, field := range log.Fields {
if prev > 0 {
prev--
continue
}
logs = append(logs, logRecord{
g: g, value: field.Value.StripMarkers(),
})
g.prevEvents++
if prev > 0 {
prev--
continue
}
logs = append(logs, logRecord{
g: g, value: log.Msg().StripMarkers(),
})
g.prevEvents++
}
}
if atomic.LoadInt32(&g.finished) == 1 {
Expand Down Expand Up @@ -1068,11 +1066,7 @@ func (m *monitor) hasNewEvents(g *monitoredGoroutine) bool {
events := 0
rec := g.collect()
for _, span := range rec {
for _, log := range span.Logs {
for range log.Fields {
events++
}
}
events += len(span.Logs)
}
return events > g.prevEvents
}
Expand Down
16 changes: 10 additions & 6 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -916,11 +916,11 @@ func (n *Node) batchInternal(

var br *roachpb.BatchResponse
if err := n.stopper.RunTaskWithErr(ctx, "node.Node: batch", func(ctx context.Context) error {
var finishSpan func(*roachpb.BatchResponse)
var finishSpan func(context.Context, *roachpb.BatchResponse)
// Shadow ctx from the outer function. Written like this to pass the linter.
ctx, finishSpan = n.setupSpanForIncomingRPC(ctx, tenID)
// NB: wrapped to delay br evaluation to its value when returning.
defer func() { finishSpan(br) }()
defer func() { finishSpan(ctx, br) }()
if log.HasSpanOrEvent(ctx) {
log.Eventf(ctx, "node received request: %s", args.Summary())
}
Expand Down Expand Up @@ -1062,7 +1062,7 @@ func (n *Node) Batch(
// be nil in case no response is to be returned to the rpc caller.
func (n *Node) setupSpanForIncomingRPC(
ctx context.Context, tenID roachpb.TenantID,
) (context.Context, func(*roachpb.BatchResponse)) {
) (context.Context, func(context.Context, *roachpb.BatchResponse)) {
// The operation name matches the one created by the interceptor in the
// remoteTrace case below.
const opName = "/cockroach.roachpb.Internal/Batch"
Expand All @@ -1083,7 +1083,7 @@ func (n *Node) setupSpanForIncomingRPC(
}
}

finishSpan := func(br *roachpb.BatchResponse) {
finishSpan := func(ctx context.Context, br *roachpb.BatchResponse) {
if newSpan != nil {
newSpan.Finish()
}
Expand All @@ -1098,8 +1098,12 @@ func (n *Node) setupSpanForIncomingRPC(
// sensitive stripped out of the verbose messages. However,
// structured payloads stay untouched.
if rec := grpcSpan.GetRecording(); rec != nil {
maybeRedactRecording(tenID, rec)
br.CollectedSpans = append(br.CollectedSpans, rec...)
err := redactRecordingForTenant(tenID, rec)
if err == nil {
br.CollectedSpans = append(br.CollectedSpans, rec...)
} else {
log.Errorf(ctx, "error redacting trace recording: %s", err)
}
}
}
}
Expand Down
27 changes: 21 additions & 6 deletions pkg/server/node_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,38 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
)

var sRedactedMarker = redact.RedactableString(redact.EscapeBytes(nil))

func maybeRedactRecording(tenID roachpb.TenantID, rec tracing.Recording) {
// redactRecordingForTenant redacts the sensitive parts of log messages in the
// recording if the tenant to which this recording is intended is not the system
// tenant (the system tenant gets an. See https://github.com/cockroachdb/cockroach/issues/70407.
// The recording is modified in place.
//
// tenID is the tenant that will receive this recording.
func redactRecordingForTenant(tenID roachpb.TenantID, rec tracing.Recording) error {
if tenID == roachpb.SystemTenantID {
return
return nil
}
// For tenants, strip the verbose log messages. See:
// https://github.com/cockroachdb/cockroach/issues/70407
for i := range rec {
sp := &rec[i]
sp.Tags = nil
for j := range sp.Logs {
record := &sp.Logs[j]
for k := range record.Fields {
field := &record.Fields[k]
if record.Message != "" && !sp.RedactableLogs {
// If Message is set, the record should have been produced by a 22.1
// node that also sets RedactableLogs.
return errors.AssertionFailedf(
"recording has non-redactable span with the Message field set: %s", sp)
}
record.Message = record.Message.Redact()

// For compatibility with old versions, also redact DeprecatedFields.
for k := range record.DeprecatedFields {
field := &record.DeprecatedFields[k]
if field.Key != tracingpb.LogMessageField {
// We don't have any of these fields, but let's not take any
// chances (our dependencies might slip them in).
Expand All @@ -51,4 +65,5 @@ func maybeRedactRecording(tenID roachpb.TenantID, rec tracing.Recording) {
}
}
}
return nil
}
14 changes: 7 additions & 7 deletions pkg/server/node_tenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ import (
"go.opentelemetry.io/otel/attribute"
)

// TestMaybeRedactRecording verifies that maybeRedactRecording strips
// TestMaybeRedactRecording verifies that redactRecordingForTenant strips
// sensitive details for recordings consumed by tenants.
//
// See kvccl.TestTenantTracesAreRedacted for an end-to-end test of this.
func TestMaybeRedactRecording(t *testing.T) {
func TestRedactRecordingForTenant(t *testing.T) {
defer leaktest.AfterTest(t)()

const (
Expand Down Expand Up @@ -59,10 +59,10 @@ func TestMaybeRedactRecording(t *testing.T) {

t.Run("regular-tenant", func(t *testing.T) {
rec := mkRec()
maybeRedactRecording(roachpb.MakeTenantID(100), rec)
require.NoError(t, redactRecordingForTenant(roachpb.MakeTenantID(100), rec))
require.Zero(t, rec[0].Tags)
require.Len(t, rec[0].Logs, 1)
msg := rec[0].Logs[0].Fields[0].Value
msg := rec[0].Logs[0].Msg().StripMarkers()
t.Log(msg)
require.NotContains(t, msg, msgSensitive)
require.NotContains(t, msg, tagSensitive)
Expand All @@ -72,15 +72,15 @@ func TestMaybeRedactRecording(t *testing.T) {

t.Run("system-tenant", func(t *testing.T) {
rec := mkRec()
maybeRedactRecording(roachpb.SystemTenantID, rec)
require.NoError(t, redactRecordingForTenant(roachpb.SystemTenantID, rec))
require.Equal(t, map[string]string{
"_verbose": "1",
"all_span_tags_are_stripped": "because_no_redactability",
"tag_not_sensitive": tagNotSensitive,
"tag_sensitive": tagSensitive,
}, rec[0].Tags)
require.Len(t, rec[0].Logs, 1)
msg := rec[0].Logs[0].Fields[0].Value
msg := rec[0].Logs[0].Msg().StripMarkers()
t.Log(msg)
require.Contains(t, msg, msgSensitive)
require.Contains(t, msg, tagSensitive)
Expand All @@ -93,7 +93,7 @@ func TestMaybeRedactRecording(t *testing.T) {
// you're here to see why this test failed to compile, ensure that the
// change you're making to RecordedSpan does not include new sensitive data
// that may leak from the KV layer to tenants. If it does, update
// maybeRedactRecording appropriately.
// redactRecordingForTenant appropriately.
type calcifiedRecordedSpan struct {
TraceID uint64
SpanID uint64
Expand Down
16 changes: 2 additions & 14 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,23 +659,11 @@ func (s *statusServer) Allocator(

func recordedSpansToTraceEvents(spans []tracingpb.RecordedSpan) []*serverpb.TraceEvent {
var output []*serverpb.TraceEvent
var buf bytes.Buffer
for _, sp := range spans {
for _, entry := range sp.Logs {
event := &serverpb.TraceEvent{
Time: entry.Time,
}
if len(entry.Fields) == 1 {
event.Message = entry.Fields[0].Value.StripMarkers()
} else {
buf.Reset()
for i, f := range entry.Fields {
if i != 0 {
buf.WriteByte(' ')
}
fmt.Fprintf(&buf, "%s:%v", f.Key, f.Value)
}
event.Message = buf.String()
Time: entry.Time,
Message: entry.Msg().StripMarkers(),
}
output = append(output, event)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2453,7 +2453,7 @@ func getMessagesForSubtrace(
allLogs = append(allLogs,
logRecordRow{
timestamp: logTime,
msg: span.Logs[i].Msg(),
msg: span.Logs[i].Msg().StripMarkers(),
span: span,
// Add 1 to the index to account for the first dummy message in a
// span.
Expand Down
10 changes: 4 additions & 6 deletions pkg/sql/rowexec/tablereader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,11 @@ func TestLimitScans(t *testing.T) {
}
}
for _, l := range span.Logs {
for _, f := range l.Fields {
match := re.FindStringSubmatch(f.Value.StripMarkers())
if match == nil {
continue
}
ranges[match[1]] = struct{}{}
match := re.FindStringSubmatch(l.Msg().StripMarkers())
if match == nil {
continue
}
ranges[match[1]] = struct{}{}
}
}
if len(ranges) != 1 {
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/log/ambient_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestAnnotateCtxSpan(t *testing.T) {
sp2.Finish()
sp1.Finish()

if err := tracing.TestingCheckRecordedSpans(sp1.GetRecording(), `
if err := tracing.CheckRecordedSpans(sp1.GetRecording(), `
span: root
tags: _verbose=1
event: a
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/log/trace_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func TestTrace(t *testing.T) {
return ctxWithSpan, sp
},
check: func(t *testing.T, _ context.Context, sp *tracing.Span) {
if err := tracing.TestingCheckRecordedSpans(sp.GetRecording(), `
if err := tracing.CheckRecordedSpans(sp.GetRecording(), `
span: s
tags: _verbose=1
event: test1
Expand Down Expand Up @@ -105,7 +105,7 @@ func TestTraceWithTags(t *testing.T) {
log.Info(ctxWithSpan, "log")

sp.Finish()
if err := tracing.TestingCheckRecordedSpans(sp.GetRecording(), `
if err := tracing.CheckRecordedSpans(sp.GetRecording(), `
span: s
tags: _verbose=1
event: [tag=1] test1
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/log/trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func TestEventLogAndTrace(t *testing.T) {
sp.Finish()
el.Finish()

if err := tracing.TestingCheckRecordedSpans(sp.GetRecording(), `
if err := tracing.CheckRecordedSpans(sp.GetRecording(), `
span: s
tags: _verbose=1
event: test3
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/stop/stopper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ func TestStopperRunAsyncTaskTracing(t *testing.T) {

s.Stop(ctx)
finish()
require.NoError(t, tracing.TestingCheckRecordedSpans(getRecording(), `
require.NoError(t, tracing.CheckRecordedSpans(getRecording(), `
span: parent
span: async child same trace
event: async 2`))
Expand Down
2 changes: 0 additions & 2 deletions pkg/util/tracing/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ go_library(
"@com_github_gogo_protobuf//proto",
"@com_github_gogo_protobuf//types",
"@com_github_jaegertracing_jaeger//model/json",
"@com_github_opentracing_opentracing_go//:opentracing-go",
"@com_github_opentracing_opentracing_go//log",
"@com_github_petermattis_goid//:goid",
"@com_github_pmezard_go_difflib//difflib",
"@io_opentelemetry_go_otel//attribute",
Expand Down
10 changes: 5 additions & 5 deletions pkg/util/tracing/collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func TestTracingCollectorGetSpanRecordings(t *testing.T) {
nodeRecordings := getSpansFromAllNodes(localTraceID)
node1Recordings := nodeRecordings[roachpb.NodeID(1)]
require.Equal(t, 1, len(node1Recordings))
require.NoError(t, tracing.TestingCheckRecordedSpans(node1Recordings[0], `
require.NoError(t, tracing.CheckRecordedSpans(node1Recordings[0], `
span: root
tags: _unfinished=1 _verbose=1
event: structured=root
Expand All @@ -158,7 +158,7 @@ func TestTracingCollectorGetSpanRecordings(t *testing.T) {
`))
node2Recordings := nodeRecordings[roachpb.NodeID(2)]
require.Equal(t, 1, len(node2Recordings))
require.NoError(t, tracing.TestingCheckRecordedSpans(node2Recordings[0], `
require.NoError(t, tracing.CheckRecordedSpans(node2Recordings[0], `
span: root.child.remotechild
tags: _unfinished=1 _verbose=1
event: structured=root.child.remotechild
Expand All @@ -171,18 +171,18 @@ func TestTracingCollectorGetSpanRecordings(t *testing.T) {
nodeRecordings := getSpansFromAllNodes(remoteTraceID)
node1Recordings := nodeRecordings[roachpb.NodeID(1)]
require.Equal(t, 2, len(node1Recordings))
require.NoError(t, tracing.TestingCheckRecordedSpans(node1Recordings[0], `
require.NoError(t, tracing.CheckRecordedSpans(node1Recordings[0], `
span: root2.child.remotechild
tags: _unfinished=1 _verbose=1
`))
require.NoError(t, tracing.TestingCheckRecordedSpans(node1Recordings[1], `
require.NoError(t, tracing.CheckRecordedSpans(node1Recordings[1], `
span: root2.child.remotechild2
tags: _unfinished=1 _verbose=1
`))

node2Recordings := nodeRecordings[roachpb.NodeID(2)]
require.Equal(t, 1, len(node2Recordings))
require.NoError(t, tracing.TestingCheckRecordedSpans(node2Recordings[0], `
require.NoError(t, tracing.CheckRecordedSpans(node2Recordings[0], `
span: root2
tags: _unfinished=1 _verbose=1
event: structured=root2
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/tracing/crdbspan.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,10 @@ func (s *crdbSpan) record(msg redact.RedactableString) {
now = time.Now()
}
logRecord := &tracingpb.LogRecord{
Time: now,
Fields: []tracingpb.LogRecord_Field{
Time: now,
Message: msg,
// Compatibility with 21.2.
DeprecatedFields: []tracingpb.LogRecord_Field{
{Key: tracingpb.LogMessageField, Value: msg},
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/tracing/grpc_interceptor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func TestGRPCInterceptors(t *testing.T) {
span: /cockroach.testutils.grpcutils.GRPCTest/%[1]s
tags: span.kind=server test-baggage-key=test-baggage-value
event: structured=magic-value`, tc.name)
require.NoError(t, tracing.TestingCheckRecordedSpans(finalRecs, exp))
require.NoError(t, tracing.CheckRecordedSpans(finalRecs, exp))
})
}
testutils.SucceedsSoon(t, func() error {
Expand Down
Loading