Skip to content

Commit

Permalink
runtime,runtime/metrics: track running time per goroutine
Browse files Browse the repository at this point in the history
Fixes golang#41554.

This commit introduces a /sched/goroutine/running:nanoseconds metric,
defined as the total time spent by a goroutine in the running state.
This measurement is useful for systems that would benefit from
fine-grained CPU attribution.

An alternative to scheduler-backed CPU attribution would be the use of
profiler labels. Given it's common to spawn multiple goroutines for the
same task, goroutine-backed statistics can easily become misleading.
Profiler labels instead let you measure CPU performance across a set of
cooperating goroutines. That said, it has two downsides:
- performance overhead; for high-performance systems that care about
  fine-grained CPU attribution (databases for e.g. that want to measure
  total CPU time spent processing each request), profiler labels are too
  cost-prohibitive, especially given the Go runtime has a much cheaper
  and more granular view of the data needed
- inaccuracy and imprecision, as evaluated in
  golang#36821

It's worth noting that we already export /sched/latencies:seconds to
track scheduling latencies, i.e. time spent by a goroutine in the
runnable state (go-review.googlesource.com/c/go/+/308933). This commit
does effectively the same except for the running state on the requesting
goroutine. Users are free to use this metric to power histograms or
tracking on-CPU time across a set of goroutines.

Change-Id: Ie78336a3ddeca0521ae29cce57bc7a5ea67da297
  • Loading branch information
irfansharif committed May 13, 2022
1 parent 7e33e9e commit 2b9dc3c
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 5 deletions.
6 changes: 6 additions & 0 deletions src/runtime/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ func initMetrics() {
out.scalar = uint64(gomaxprocs)
},
},
"/sched/goroutine/running:nanoseconds": {
compute: func(_ *statAggregate, out *metricValue) {
out.kind = metricKindUint64
out.scalar = uint64(grunningnanos())
},
},
"/sched/goroutines:goroutines": {
compute: func(_ *statAggregate, out *metricValue) {
out.kind = metricKindUint64
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/metrics/description.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ var allDesc = []Description{
Description: "The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously.",
Kind: KindUint64,
},
{
Name: "/sched/goroutine/running:nanoseconds",
Description: "Wall time spent by the current goroutine in the running state.",
Kind: KindUint64,
},
{
Name: "/sched/goroutines:goroutines",
Description: "Count of live goroutines.",
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/metrics/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ Below is the full list of supported metrics, ordered lexicographically.
operating system threads that can execute user-level Go code
simultaneously.
/sched/goroutine/running:nanoseconds
Wall time spent by the current goroutine in the running state.
/sched/goroutines:goroutines
Count of live goroutines.
Expand Down
24 changes: 21 additions & 3 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -994,8 +994,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
}
}

// Handle tracking for scheduling latencies.
// Handle tracking for scheduling and running latencies.
now := nanotime()
if newval == _Grunning {
// We're transitioning into the running state, record the timestamp for
// subsequent use.
gp.lastsched = now
}
if oldval == _Grunning {
// We're transitioning out of running, record how long we were in the
// state.
gp.runningnanos += now - gp.lastsched

// Track every 8th time a goroutine transitions out of running.
if gp.trackingSeq%gTrackingPeriod == 0 {
gp.tracking = true
Expand All @@ -1007,14 +1017,12 @@ func casgstatus(gp *g, oldval, newval uint32) {
// We transitioned out of runnable, so measure how much
// time we spent in this state and add it to
// runnableTime.
now := nanotime()
gp.runnableTime += now - gp.runnableStamp
gp.runnableStamp = 0
}
if newval == _Grunnable {
// We just transitioned into runnable, so record what
// time that happened.
now := nanotime()
gp.runnableStamp = now
} else if newval == _Grunning {
// We're transitioning into running, so turn off
Expand Down Expand Up @@ -3258,6 +3266,14 @@ func dropg() {
setGNoWB(&_g_.m.curg, nil)
}

// grunningnanos returns the wall time spent by current g in the running state.
// A goroutine may be running on an OS thread that's descheduled by the OS
// scheduler, this time still counts towards the metric.
func grunningnanos() int64 {
gp := getg()
return gp.runningnanos + nanotime() - gp.lastsched
}

// checkTimers runs any timers for the P that are ready.
// If now is not 0 it is the current time.
// It returns the passed time or the current time if now was passed as 0.
Expand Down Expand Up @@ -3491,6 +3507,8 @@ func goexit0(gp *g) {
gp.param = nil
gp.labels = nil
gp.timer = nil
gp.lastsched = 0
gp.runningnanos = 0

if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
// Flush assist credit to the global pool. This gives
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/runtime2.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,6 @@ type g struct {
traceseq uint64 // trace event sequencer
tracelastp puintptr // last P emitted an event for this goroutine
lockedm muintptr
sig uint32
writebuf []byte
sigcode0 uintptr
sigcode1 uintptr
Expand All @@ -488,6 +487,9 @@ type g struct {
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
selectDone uint32 // are we participating in a select and did someone win the race?
sig uint32
lastsched int64 // timestamp when the G last started running
runningnanos int64 // wall time spent in the running state

// goroutineProfiled indicates the status of this goroutine's stack for the
// current in-progress goroutine profile
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sizeof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
{runtime.G{}, 240, 392}, // g, but exported for testing
{runtime.G{}, 256, 408}, // g, but exported for testing
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
}

Expand Down

0 comments on commit 2b9dc3c

Please sign in to comment.