Skip to content

Commit

Permalink
tracing: add execution tracing
Browse files Browse the repository at this point in the history
This commit adds the ability to capture execution traces from the past
few seconds of execution when something seems wrong. Often when a timer
fires and we detect something is wrong, the relevant information is
already lost. The new flight recorder in go
golang/go#63185 creates a ring buffer that
enables capturing these traces. This commit adds the capability to
capture traces but doesn't enable it anywhere.

There is a small performance cost of having the flight recorder always
enabled, so some performance testing is required to determine if we need
to protect this behind a cluster setting.

Epic: none

Release note: None
  • Loading branch information
andrewbaptist committed Jan 22, 2025
1 parent 2f2c9b0 commit 2cee08f
Show file tree
Hide file tree
Showing 13 changed files with 148 additions and 28 deletions.
24 changes: 12 additions & 12 deletions DEPS.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -11659,10 +11659,10 @@ def go_deps():
name = "org_golang_x_exp",
build_file_proto_mode = "disable_global",
importpath = "golang.org/x/exp",
sha256 = "3e3717f5151e8c2ebf267b4d53698b97847c0de144683c51b74ab7edf5039fa8",
strip_prefix = "golang.org/x/exp@v0.0.0-20231110203233-9a3e6036ecaa",
sha256 = "e45242309fed2820ca11b50763ec50022317c841c604b61b7c56268fb948ea00",
strip_prefix = "golang.org/x/exp@v0.0.0-20241217172543-b2144cdd0a67",
urls = [
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/org_golang_x_exp-v0.0.0-20231110203233-9a3e6036ecaa.zip",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/org_golang_x_exp-v0.0.0-20241217172543-b2144cdd0a67.zip",
],
)
go_repository(
Expand Down Expand Up @@ -11719,20 +11719,20 @@ def go_deps():
name = "org_golang_x_mod",
build_file_proto_mode = "disable_global",
importpath = "golang.org/x/mod",
sha256 = "3c3528c39639b7cd699c121c100ddb71ab49f94bff257a4a3935e3ae9e8571fc",
strip_prefix = "golang.org/x/mod@v0.20.0",
sha256 = "0d0a96054b32dfa40cec2a2868bf87cdda31e3bb2dac0a5e2218efe3ef3ef3cc",
strip_prefix = "golang.org/x/mod@v0.22.0",
urls = [
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/mod/org_golang_x_mod-v0.20.0.zip",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/mod/org_golang_x_mod-v0.22.0.zip",
],
)
go_repository(
name = "org_golang_x_net",
build_file_proto_mode = "disable_global",
importpath = "golang.org/x/net",
sha256 = "c6f7bde4bb418d1f5ee5dc437d09ce9f10743ddba043cdca82eb57ddeb18d6da",
strip_prefix = "golang.org/x/net@v0.28.0",
sha256 = "ced2b8369a529613741b5bfe1f0df0dc98545d9203341cfcef5f824cdd961016",
strip_prefix = "golang.org/x/net@v0.32.0",
urls = [
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/net/org_golang_x_net-v0.28.0.zip",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/net/org_golang_x_net-v0.32.0.zip",
],
)
go_repository(
Expand Down Expand Up @@ -11819,10 +11819,10 @@ def go_deps():
name = "org_golang_x_tools",
build_file_proto_mode = "disable_global",
importpath = "golang.org/x/tools",
sha256 = "92607be1cacf4647fd31b19ee64b1a7c198178f1005c75371e38e7b08fb138e7",
strip_prefix = "golang.org/x/tools@v0.24.0",
sha256 = "3ac1c5530724296b8ed1f5b3df6c2aa1f6312371ab20812ad5fcb35ceaf2a177",
strip_prefix = "golang.org/x/tools@v0.28.0",
urls = [
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/tools/org_golang_x_tools-v0.24.0.zip",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/tools/org_golang_x_tools-v0.28.0.zip",
],
)
go_repository(
Expand Down
8 changes: 4 additions & 4 deletions build/bazelutil/distdir_files.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -1097,14 +1097,14 @@ DISTDIR_FILES = {
"https://storage.googleapis.com/cockroach-godeps/gomod/goji.io/io_goji-v2.0.2+incompatible.zip": "1ea69b28e356cb91381ce2339004fcf144ad1b268c9e3497c9ef304751ae0bb3",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/arch/org_golang_x_arch-v0.0.0-20180920145803-b19384d3c130.zip": "9f67b677a3fefc503111d9aa7df8bacd2677411b0fcb982eb1654aa6d14cc3f8",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/crypto/org_golang_x_crypto-v0.31.0.zip": "7cbc5fbc206b07db3c691ad59bc1b31f2c32a1dae2b04c9a7772f93a696381bf",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/org_golang_x_exp-v0.0.0-20231110203233-9a3e6036ecaa.zip": "3e3717f5151e8c2ebf267b4d53698b97847c0de144683c51b74ab7edf5039fa8",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/org_golang_x_exp-v0.0.0-20241217172543-b2144cdd0a67.zip": "e45242309fed2820ca11b50763ec50022317c841c604b61b7c56268fb948ea00",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/shiny/org_golang_x_exp_shiny-v0.0.0-20230801115018-d63ba01acd4b.zip": "3b9053a5c76c778ca05061df763a8e9aa8a6cac9d5f0f80d18d81922f98a001d",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/exp/typeparams/org_golang_x_exp_typeparams-v0.0.0-20221208152030-732eee02a75a.zip": "9bd73f186851c6229484f486981f608d16e2b86acbbef6f4f7cc0480a508a4a4",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/image/org_golang_x_image-v0.21.0.zip": "7ca937a1f9501b5d0b46631a6813f833292e33a9c5070f03630f18ab8d65bba3",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/lint/org_golang_x_lint-v0.0.0-20210508222113-6edffad5e616.zip": "0a4a5ebd2b1d79e7f480cbf5a54b45a257ae1ec9d11f01688efc5c35268d4603",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/mobile/org_golang_x_mobile-v0.0.0-20190719004257-d2bd2a29d028.zip": "6b946c7da47acf3b6195336fd071bfc73d543cefab73f2d27528c5dc1dc829ec",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/mod/org_golang_x_mod-v0.20.0.zip": "3c3528c39639b7cd699c121c100ddb71ab49f94bff257a4a3935e3ae9e8571fc",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/net/org_golang_x_net-v0.28.0.zip": "c6f7bde4bb418d1f5ee5dc437d09ce9f10743ddba043cdca82eb57ddeb18d6da",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/mod/org_golang_x_mod-v0.22.0.zip": "0d0a96054b32dfa40cec2a2868bf87cdda31e3bb2dac0a5e2218efe3ef3ef3cc",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/net/org_golang_x_net-v0.32.0.zip": "ced2b8369a529613741b5bfe1f0df0dc98545d9203341cfcef5f824cdd961016",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/oauth2/org_golang_x_oauth2-v0.7.0.zip": "b682f8cf62ed36f3bec9f8a832ff61a2af1124f31f42c4e1e3f3efd23d88f93f",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/perf/org_golang_x_perf-v0.0.0-20230113213139-801c7ef9e5c5.zip": "bc1b902e645fdd5d210b7db8f3280833af225b131dab5842d7a6d32a676f80f5",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/sync/org_golang_x_sync-v0.10.0.zip": "94ea75ea625ecb8d81ab473a2d7e03433e63083768cd27d48a03f8c1c9da3d8d",
Expand All @@ -1113,7 +1113,7 @@ DISTDIR_FILES = {
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/term/org_golang_x_term-v0.27.0.zip": "a4cf3d2cab860c5aad555d58c2545ebced118c1610d854fdb302b9c2866d858e",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/text/org_golang_x_text-v0.21.0.zip": "be3db791651af6f2cb0225aa5d5578c23149b2017246ba8e59586080baadd612",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/tools/go/vcs/org_golang_x_tools_go_vcs-v0.1.0-deprecated.zip": "ab155d94f90a98a5112967b89bfcd26b5825c1cd6875a5246c7905a568387260",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/tools/org_golang_x_tools-v0.24.0.zip": "92607be1cacf4647fd31b19ee64b1a7c198178f1005c75371e38e7b08fb138e7",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/tools/org_golang_x_tools-v0.28.0.zip": "3ac1c5530724296b8ed1f5b3df6c2aa1f6312371ab20812ad5fcb35ceaf2a177",
"https://storage.googleapis.com/cockroach-godeps/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20220907171357-04be3eba64a2.zip": "b9c481db33c4b682ba8ba348018ddbd2155bd227cc38ff9f6b4cb2b74bbc3c14",
"https://storage.googleapis.com/cockroach-godeps/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.15.1.zip": "7a1b124a144b2c97a29829464d4b7258e04235c1fb14bbcea902086618414a43",
"https://storage.googleapis.com/cockroach-godeps/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20190331212654-76723241ea4e.zip": "ed4dca5026c9ab5410d23bbe21c089433ca58a19bd2902311c6a91791142a687",
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ go 1.22.8
// for behavior changes, just like we would after a go upgrade.
require (
golang.org/x/crypto v0.31.0
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67
golang.org/x/exp/typeparams v0.0.0-20221208152030-732eee02a75a // indirect
golang.org/x/mod v0.20.0 // indirect
golang.org/x/net v0.28.0
golang.org/x/mod v0.22.0 // indirect
golang.org/x/net v0.32.0
golang.org/x/oauth2 v0.7.0
golang.org/x/sync v0.10.0
golang.org/x/sys v0.28.0
golang.org/x/text v0.21.0
golang.org/x/time v0.3.0
golang.org/x/tools v0.24.0
golang.org/x/tools v0.28.0
)

// The following dependencies are key infrastructure dependencies and
Expand Down
16 changes: 8 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2564,8 +2564,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo=
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c=
golang.org/x/exp/typeparams v0.0.0-20221208152030-732eee02a75a h1:Jw5wfR+h9mnIYH+OtGT2im5wV1YGGDora5vTv/aa5bE=
golang.org/x/exp/typeparams v0.0.0-20221208152030-732eee02a75a/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk=
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
Expand Down Expand Up @@ -2608,8 +2608,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand Down Expand Up @@ -2687,8 +2687,8 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
golang.org/x/oauth2 v0.0.0-20170207211851-4464e7848382/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down Expand Up @@ -3001,8 +3001,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
golang.org/x/tools/go/vcs v0.1.0-deprecated h1:cOIJqWBl99H1dH5LWizPa+0ImeeJq3t3cJjaeOWUAL4=
golang.org/x/tools/go/vcs v0.1.0-deprecated/go.mod h1:zUrvATBAvEI9535oC0yWYsLsHIV4Z7g63sNPVMtuBy8=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
3 changes: 3 additions & 0 deletions pkg/base/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,7 @@ const (
// InflightTraceDir is the directory name where the job trace dumper stores traces
// when a job opts in to dumping its execution traces.
InflightTraceDir = "inflight_trace_dump"

// ExecutionTrace is the directory name where execution traces are stored. These traces are generated when code calls Tracer.StartTracing().
ExecutionTraceDir = "execution_trace_dump"
)
1 change: 1 addition & 0 deletions pkg/cli/log_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ func setupLogging(ctx context.Context, cmd *cobra.Command, isServerCmd, applyCon
serverCfg.HeapProfileDirName = filepath.Join(outputDirectory, base.HeapProfileDir)
serverCfg.CPUProfileDirName = filepath.Join(outputDirectory, base.CPUProfileDir)
serverCfg.InflightTraceDirName = filepath.Join(outputDirectory, base.InflightTraceDir)
serverCfg.ExecutionTraceDirName = filepath.Join(outputDirectory, base.ExecutionTraceDir)

return nil
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ type BaseConfig struct {
// InflightTraceDirName is the directory name for job traces.
InflightTraceDirName string

// ExecutionTraceDirName is the directory name for execution traces.
ExecutionTraceDirName string

// DefaultZoneConfig is used to set the default zone config inside the server.
// It can be overridden during tests by setting the DefaultZoneConfigOverride
// server testing knob. Whatever is installed here is in turn used to
Expand Down
8 changes: 8 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ func NewServer(cfg Config, stopper *stop.Stopper) (serverctl.ServerStartupInterf
panic(errors.New("no tracer set in AmbientCtx"))
}

// Set up and start the execution tracer.
fr, err := tracing.NewFlightRecorder(ctx, cfg.ExecutionTraceDirName)
if err != nil {
return nil, err
}
stopper.AddCloser(fr)
cfg.AmbientCtx.Tracer.SetFlightRecorder(fr)

clock, err := newClockFromConfig(ctx, cfg.BaseConfig)
if err != nil {
return nil, err
Expand Down
2 changes: 2 additions & 0 deletions pkg/server/server_controller_new_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ func makeSharedProcessTenantServerConfig(
baseCfg.HeapProfileDirName = kvServerCfg.BaseConfig.HeapProfileDirName
baseCfg.CPUProfileDirName = kvServerCfg.BaseConfig.CPUProfileDirName
baseCfg.GoroutineDumpDirName = kvServerCfg.BaseConfig.GoroutineDumpDirName
baseCfg.ExecutionTraceDirName = kvServerCfg.BaseConfig.ExecutionTraceDirName

// The ListenerFactory allows us to dynamically choose a
// listen port based on the user's configuration.
Expand Down Expand Up @@ -291,6 +292,7 @@ func makeSharedProcessTenantServerConfig(
baseCfg.GoroutineDumpDirName = ""
baseCfg.HeapProfileDirName = ""
baseCfg.CPUProfileDirName = ""
baseCfg.ExecutionTraceDirName = ""

// Expose the process-wide runtime metrics to the tenant's metric
// collector. Since they are process-wide, all tenants can see them.
Expand Down
4 changes: 4 additions & 0 deletions pkg/server/testserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config {
if cfg.CPUProfileDirName == "" {
cfg.CPUProfileDirName = filepath.Join(storeSpec.Path, "logs", base.CPUProfileDir)
}
if cfg.ExecutionTraceDirName == "" {
cfg.ExecutionTraceDirName = filepath.Join(storeSpec.Path, "logs", base.ExecutionTraceDir)
}
}
}
cfg.Stores = base.StoreSpecList{Specs: params.StoreSpecs}
Expand Down Expand Up @@ -1710,6 +1713,7 @@ func (ts *testServer) StartTenant(
baseCfg.GoroutineDumpDirName = ts.Cfg.BaseConfig.GoroutineDumpDirName
baseCfg.ExternalIODirConfig = params.ExternalIODirConfig
baseCfg.ExternalIODir = params.ExternalIODir
baseCfg.ExecutionTraceDirName = ts.Cfg.BaseConfig.ExecutionTraceDirName

// Grant the tenant the default capabilities.
if err := ts.grantDefaultTenantCapabilities(ctx, params.TenantID, params.SkipTenantCheck); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/tracing/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"context.go",
"crdbspan.go",
"doc.go",
"flight_recorder.go",
"span.go",
"span_finalizer_race_off.go",
"span_finalizer_race_on.go",
Expand Down Expand Up @@ -51,6 +52,7 @@ go_library(
"@io_opentelemetry_go_otel_sdk//trace",
"@io_opentelemetry_go_otel_trace//:trace",
"@org_golang_google_grpc//metadata",
"@org_golang_x_exp//trace",
"@org_golang_x_net//trace",
],
)
Expand Down
82 changes: 82 additions & 0 deletions pkg/util/tracing/flight_recorder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package tracing

import (
"bytes"
"context"
"fmt"
"os"
"time"

"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"golang.org/x/exp/trace"
)

const timeFormat = "2006-01-02T15_04_05.000"
const min_record_interval = time.Minute

type FlightRecorder struct {
fr *trace.FlightRecorder
outputDir string
// lastSnapshot prevents the flight recorder from being called too frequently.
lastSnapshot time.Time
}

// NewFlightRecorder creates and starts a trace.FlightRecorder
func NewFlightRecorder(ctx context.Context, outputDir string) (*FlightRecorder, error) {
if outputDir == "" {
return nil, errors.Newf("output directory not set: %s", outputDir)
}

if err := os.MkdirAll(outputDir, 0755); err != nil {
return nil, err
}

fr := FlightRecorder{
fr: trace.NewFlightRecorder(),
outputDir: outputDir,
}

// TODO: Only start if the cluster setting is enabled.
if err := fr.fr.Start(); err != nil {
return nil, err
}

return &fr, nil
}

func (fr *FlightRecorder) MaybeSnapshot() bool {
now := timeutil.Now()
// Don't take the snapshot if there was already one in the past interval.
if now.Sub(fr.lastSnapshot) < min_record_interval {
return false
}
fr.lastSnapshot = now

// Attempt to grab the snapshot.
var b bytes.Buffer
if _, err := fr.fr.WriteTo(&b); err != nil {
return false
}
filename := fmt.Sprintf(
"%s/trace.%s",
fr.outputDir,
now.Format(timeFormat),
)
// Write it to a file.
if err := os.WriteFile(filename, b.Bytes(), 0o755); err != nil {
return false
}
return true
}

func (fr *FlightRecorder) Close() {
// We don't care about any errors from stopping the flight recorder as we
// are likely shutting down.
_ = fr.fr.Stop()
}
15 changes: 15 additions & 0 deletions pkg/util/tracing/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ type Tracer struct {
stack debugutil.SafeStack
// closed is set on Close().
_closed int32 // accessed atomically
fr *FlightRecorder
}

// SpanRegistry is a map that references all non-Finish'ed local root spans,
Expand Down Expand Up @@ -674,6 +675,20 @@ func NewTracerWithOpt(ctx context.Context, opts ...TracerOption) *Tracer {
return t
}

// SetFlightRecorder will set up the flight recorder for this Tracer.
func (t *Tracer) SetFlightRecorder(fr *FlightRecorder) {
t.fr = fr
}

// FlightRecorderSnapshot will take an execution trace snapshot of the last few
// seconds of the system if execution traces are enabled and sufficient time has
// passed since the last snapshot. Typically this method should only be called
// if something took unusually long to complete and you want to save and analyze
// what happened before this.
func (t *Tracer) FlightRecorderSnapshot() bool {
return t.fr.MaybeSnapshot()
}

// tracerOptions groups configuration for Tracer construction.
type tracerOptions struct {
sv *settings.Values
Expand Down

0 comments on commit 2cee08f

Please sign in to comment.