Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util: add tiflash wait time in execution summary (#57058) #57371

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions DEPS.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5815,13 +5815,13 @@ def go_deps():
name = "com_github_pingcap_tipb",
build_file_proto_mode = "disable_global",
importpath = "github.com/pingcap/tipb",
sha256 = "b39e154272ba36d145c6049947a012a76be740b32a44a46d7253caa145c56cc9",
strip_prefix = "github.com/pingcap/tipb@v0.0.0-20241008083645-0bcddae67837",
sha256 = "1b707429b5b938a05b250b5770be2a6aa243d6a4983d23b01bbca164e86b3e3c",
strip_prefix = "github.com/pingcap/tipb@v0.0.0-20241022082558-0607513e7fa4",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
],
)
go_repository(
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ require (
github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5
github.com/pingcap/tidb/pkg/parser v0.0.0-20211011031125-9b13dc409c5e
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4
github.com/prometheus/client_golang v1.20.5
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.57.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,8 @@ github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d h1:y3EueKVfVykdpTyfU
github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d/go.mod h1:ORfBOFp1eteu2odzsyaxI+b8TzJwgjwyQcGhI+9SfEA=
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeAhmOQHNPRRogMYxP13V7BkYw3ZsoSfE=
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530=
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837 h1:tyIymn821fB8gUmqafdvLlcFkVOpgyJXImoYJ8n9oJE=
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4 h1:wvaUybJT0fUReCDcFtV3CEvMuI9iu+G7IW72tbSlil4=
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
Expand Down
102 changes: 96 additions & 6 deletions pkg/util/execdetails/execdetails.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ type basicCopRuntimeStats struct {
procTimes Percentile[Duration]
// executor extra infos
tiflashScanContext TiFlashScanContext
tiflashWaitSummary TiFlashWaitSummary
}

type canGetFloat64 interface {
Expand Down Expand Up @@ -682,7 +683,10 @@ func (p *Percentile[valueType]) Sum() float64 {
// String implements the RuntimeStats interface.
func (e *basicCopRuntimeStats) String() string {
if e.storeType == "tiflash" {
return fmt.Sprintf("time:%v, loops:%d, threads:%d, ", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads) + e.tiflashScanContext.String()
if e.tiflashWaitSummary.CanBeIgnored() {
return fmt.Sprintf("time:%v, loops:%d, threads:%d, %s", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads, e.tiflashScanContext.String())
}
return fmt.Sprintf("time:%v, loops:%d, threads:%d, %s, %s", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads, e.tiflashWaitSummary.String(), e.tiflashScanContext.String())
}
return fmt.Sprintf("time:%v, loops:%d", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load())
}
Expand All @@ -700,6 +704,7 @@ func (e *basicCopRuntimeStats) Clone() RuntimeStats {
stats.consume.Store(e.consume.Load())
stats.rows.Store(e.rows.Load())
stats.tiflashScanContext = e.tiflashScanContext.Clone()
stats.tiflashWaitSummary = e.tiflashWaitSummary.Clone()
return stats
}

Expand All @@ -720,6 +725,7 @@ func (e *basicCopRuntimeStats) Merge(rs RuntimeStats) {
e.procTimes.MergePercentile(&tmp.procTimes)
}
e.tiflashScanContext.Merge(tmp.tiflashScanContext)
e.tiflashWaitSummary.Merge(tmp.tiflashWaitSummary)
}

// Tp implements the RuntimeStats interface.
Expand Down Expand Up @@ -799,7 +805,14 @@ func (crs *CopRuntimeStats) RecordOneCopTask(address string, summary *tipb.Execu
totalVectorIdxReadVecTimeMs: summary.GetTiflashScanContext().GetTotalVectorIdxReadVecTimeMs(),
totalVectorIdxReadOthersTimeMs: summary.GetTiflashScanContext().GetTotalVectorIdxReadOthersTimeMs(),
},
tiflashWaitSummary: TiFlashWaitSummary{
executionTime: *summary.TimeProcessedNs,
minTSOWaitTime: summary.GetTiflashWaitSummary().GetMinTSOWaitNs(),
pipelineBreakerWaitTime: summary.GetTiflashWaitSummary().GetPipelineBreakerWaitNs(),
pipelineQueueWaitTime: summary.GetTiflashWaitSummary().GetPipelineQueueWaitNs(),
},
}

for _, instance := range summary.GetTiflashScanContext().GetRegionsOfInstance() {
data.tiflashScanContext.regionsOfInstance[instance.GetInstanceId()] = instance.GetRegionNum()
}
Expand All @@ -826,7 +839,7 @@ func (crs *CopRuntimeStats) GetTasks() (totalTasks int32) {
}

// MergeBasicStats traverses basicCopRuntimeStats in the CopRuntimeStats and collects some useful information.
func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], totalTime time.Duration, totalTasks, totalLoops, totalThreads int32, totalTiFlashScanContext TiFlashScanContext) {
func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], totalTime time.Duration, totalTasks, totalLoops, totalThreads int32, totalTiFlashScanContext TiFlashScanContext, totalTiFlashWaitSummary TiFlashWaitSummary) {
totalTiFlashScanContext = TiFlashScanContext{
regionsOfInstance: make(map[string]uint64),
}
Expand All @@ -836,6 +849,7 @@ func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], t
totalLoops += instanceStats.loop.Load()
totalThreads += instanceStats.threads
totalTiFlashScanContext.Merge(instanceStats.tiflashScanContext)
totalTiFlashWaitSummary.Merge(instanceStats.tiflashWaitSummary)
totalTasks += instanceStats.totalTasks
}
return
Expand All @@ -846,15 +860,18 @@ func (crs *CopRuntimeStats) String() string {
return ""
}

procTimes, totalTime, totalTasks, totalLoops, totalThreads, totalTiFlashScanContext := crs.MergeBasicStats()
procTimes, totalTime, totalTasks, totalLoops, totalThreads, totalTiFlashScanContext, totalTiFlashWaitSummary := crs.MergeBasicStats()
avgTime := time.Duration(totalTime.Nanoseconds() / int64(totalTasks))
isTiFlashCop := crs.storeType == "tiflash"

buf := bytes.NewBuffer(make([]byte, 0, 16))
{
printTiFlashScanContext := func() {
printTiFlashSpecificInfo := func() {
if isTiFlashCop {
fmt.Fprintf(buf, ", threads:%d}", totalThreads)
if !totalTiFlashWaitSummary.CanBeIgnored() {
buf.WriteString(", " + totalTiFlashWaitSummary.String())
}
if !totalTiFlashScanContext.Empty() {
buf.WriteString(", " + totalTiFlashScanContext.String())
}
Expand All @@ -864,12 +881,12 @@ func (crs *CopRuntimeStats) String() string {
}
if totalTasks == 1 {
fmt.Fprintf(buf, "%v_task:{time:%v, loops:%d", crs.storeType, FormatDuration(time.Duration(procTimes.GetPercentile(0))), totalLoops)
printTiFlashScanContext()
printTiFlashSpecificInfo()
} else {
fmt.Fprintf(buf, "%v_task:{proc max:%v, min:%v, avg: %v, p80:%v, p95:%v, iters:%v, tasks:%v",
crs.storeType, FormatDuration(time.Duration(procTimes.GetMax().GetFloat64())), FormatDuration(time.Duration(procTimes.GetMin().GetFloat64())), FormatDuration(avgTime),
FormatDuration(time.Duration(procTimes.GetPercentile(0.8))), FormatDuration(time.Duration(procTimes.GetPercentile(0.95))), totalLoops, totalTasks)
printTiFlashScanContext()
printTiFlashSpecificInfo()
}
}
if !isTiFlashCop {
Expand Down Expand Up @@ -1211,6 +1228,79 @@ func (context *TiFlashScanContext) Empty() bool {
return res
}

// TiFlashWaitSummary is used to express all kinds of wait information in tiflash
type TiFlashWaitSummary struct {
// keep execution time to do merge work, always record the wait time with largest execution time
executionTime uint64
minTSOWaitTime uint64
pipelineBreakerWaitTime uint64
pipelineQueueWaitTime uint64
}

// Clone implements the deep copy of * TiFlashWaitSummary
func (waitSummary *TiFlashWaitSummary) Clone() TiFlashWaitSummary {
newSummary := TiFlashWaitSummary{
executionTime: waitSummary.executionTime,
minTSOWaitTime: waitSummary.minTSOWaitTime,
pipelineBreakerWaitTime: waitSummary.pipelineBreakerWaitTime,
pipelineQueueWaitTime: waitSummary.pipelineQueueWaitTime,
}
return newSummary
}

// String dumps TiFlashWaitSummary info as string
func (waitSummary *TiFlashWaitSummary) String() string {
if waitSummary.CanBeIgnored() {
return ""
}
buf := bytes.NewBuffer(make([]byte, 0, 32))
buf.WriteString("tiflash_wait: {")
empty := true
if waitSummary.minTSOWaitTime >= uint64(time.Millisecond) {
buf.WriteString("minTSO_wait: ")
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.minTSOWaitTime).Milliseconds(), 10))
buf.WriteString("ms")
empty = false
}
if waitSummary.pipelineBreakerWaitTime >= uint64(time.Millisecond) {
if !empty {
buf.WriteString(", ")
}
buf.WriteString("pipeline_breaker_wait: ")
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.pipelineBreakerWaitTime).Milliseconds(), 10))
buf.WriteString("ms")
empty = false
}
if waitSummary.pipelineQueueWaitTime >= uint64(time.Millisecond) {
if !empty {
buf.WriteString(", ")
}
buf.WriteString("pipeline_queue_wait: ")
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.pipelineQueueWaitTime).Milliseconds(), 10))
buf.WriteString("ms")
}
buf.WriteString("}")
return buf.String()
}

// Merge make sum to merge the information in TiFlashWaitSummary
func (waitSummary *TiFlashWaitSummary) Merge(other TiFlashWaitSummary) {
if waitSummary.executionTime < other.executionTime {
waitSummary.executionTime = other.executionTime
waitSummary.minTSOWaitTime = other.minTSOWaitTime
waitSummary.pipelineBreakerWaitTime = other.pipelineBreakerWaitTime
waitSummary.pipelineQueueWaitTime = other.pipelineQueueWaitTime
}
}

// CanBeIgnored check whether TiFlashWaitSummary can be ignored, not all tidb executors have significant tiflash wait summary
func (waitSummary *TiFlashWaitSummary) CanBeIgnored() bool {
res := waitSummary.minTSOWaitTime < uint64(time.Millisecond) &&
waitSummary.pipelineBreakerWaitTime < uint64(time.Millisecond) &&
waitSummary.pipelineQueueWaitTime < uint64(time.Millisecond)
return res
}

// BasicRuntimeStats is the basic runtime stats.
type BasicRuntimeStats struct {
// executor's Next() called times.
Expand Down
Loading