From 445187c636223afb44fd2331b0807163ae856733 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 00:01:33 +0200 Subject: [PATCH 01/30] metrics: WIP use runtime/metrics --- metrics/metrics.go | 102 ++++++++++--- metrics/metrics_test.go | 32 +---- metrics/runtime.go | 212 ---------------------------- metrics/runtime_cgo.go | 10 -- metrics/runtime_gccpufraction.go | 10 -- metrics/runtime_no_cgo.go | 8 -- metrics/runtime_no_gccpufraction.go | 10 -- metrics/runtime_test.go | 88 ------------ 8 files changed, 89 insertions(+), 383 deletions(-) delete mode 100644 metrics/runtime.go delete mode 100644 metrics/runtime_cgo.go delete mode 100644 metrics/runtime_gccpufraction.go delete mode 100644 metrics/runtime_no_cgo.go delete mode 100644 metrics/runtime_no_gccpufraction.go delete mode 100644 metrics/runtime_test.go diff --git a/metrics/metrics.go b/metrics/metrics.go index 747d6471a764..db05319080b1 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -8,6 +8,8 @@ package metrics import ( "os" "runtime" + "runtime/metrics" + "runtime/pprof" "strings" "time" @@ -54,8 +56,69 @@ func init() { } } -// CollectProcessMetrics periodically collects various metrics about the running -// process. +var threadCreateProfile = pprof.Lookup("threadcreate") + +type runtimeValues struct { + GCPauses float64 + GCAllocs float64 + GCFrees float64 + MemTotal uint64 + HeapFree uint64 + HeapReleased uint64 + HeapUnused uint64 +} + +var runtimeSamples = []metrics.Sample{ + {Name: "/gc/pauses:seconds"}, // Histogram + {Name: "/gc/heap/allocs-by-size:bytes"}, // Histogram + {Name: "/gc/heap/frees-by-size:bytes"}, // Histogram + {Name: "/memory/classes/total:bytes"}, + {Name: "/memory/classes/heap/free:bytes"}, + {Name: "/memory/classes/heap/released:bytes"}, + {Name: "/memory/classes/heap/unused:bytes"}, +} + +func readRuntimeMetrics(v *runtimeValues) { + metrics.Read(runtimeSamples) + for _, s := range runtimeSamples { + switch s.Name { + case "/gc/pauses:seconds": + v.GCPauses = medianBucket(s.Value.Float64Histogram()) + case "/gc/heap/allocs-by-size:bytes": + v.GCAllocs = medianBucket(s.Value.Float64Histogram()) + case "/gc/heap/frees-by-size:bytes": + v.GCFrees = medianBucket(s.Value.Float64Histogram()) + case "/memory/classes/total:bytes": + v.MemTotal = s.Value.Uint64() + case "/memory/classes/heap/free:bytes": + v.HeapFree = s.Value.Uint64() + case "/memory/classes/heap/released:bytes": + v.HeapReleased = s.Value.Uint64() + case "/memory/classes/heap/unused:bytes": + v.HeapUnused = s.Value.Uint64() + } + } +} + +// medianBucket gives the median of a histogram. +// This is taken from the runtime/metrics example code. +func medianBucket(h *metrics.Float64Histogram) float64 { + total := uint64(0) + for _, count := range h.Counts { + total += count + } + thresh := total / 2 + total = 0 + for i, count := range h.Counts { + total += count + if total >= thresh { + return h.Buckets[i] + } + } + panic("should not happen") +} + +// CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled if !Enabled { @@ -64,14 +127,12 @@ func CollectProcessMetrics(refresh time.Duration) { refreshFreq := int64(refresh / time.Second) // Create the various data collectors - cpuStats := make([]*CPUStats, 2) - memstats := make([]*runtime.MemStats, 2) - diskstats := make([]*DiskStats, 2) - for i := 0; i < len(memstats); i++ { - cpuStats[i] = new(CPUStats) - memstats[i] = new(runtime.MemStats) - diskstats[i] = new(DiskStats) - } + var ( + cpuStats = make([]CPUStats, 2) + diskstats = make([]DiskStats, 2) + memstats = make([]runtimeValues, 2) + ) + // Define the various metrics to collect var ( cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) @@ -93,26 +154,31 @@ func CollectProcessMetrics(refresh time.Duration) { diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) ) + // Iterate loading the different stats and updating the meters for i := 1; ; i++ { location1 := i % 2 location2 := (i - 1) % 2 - ReadCPUStats(cpuStats[location1]) + ReadCPUStats(&cpuStats[location1]) + cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + cpuThreads.Update(int64(threadCreateProfile.Count())) cpuGoroutines.Update(int64(runtime.NumGoroutine())) - runtime.ReadMemStats(memstats[location1]) - memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) - memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) - memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) - memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased)) - memUsed.Update(int64(memstats[location1].Alloc)) + readRuntimeMetrics(&memstats[location1]) + + memPauses.Mark(int64(memstats[location1].GCPauses - memstats[location2].GCPauses)) + memAllocs.Mark(int64(memstats[location1].GCAllocs - memstats[location2].GCAllocs)) + memFrees.Mark(int64(memstats[location1].GCAllocs - memstats[location2].GCAllocs)) + + memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapReleased)) + memUsed.Update(int64(memstats[location1].MemTotal)) - if ReadDiskStats(diskstats[location1]) == nil { + if ReadDiskStats(&diskstats[location1]) == nil { diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 029c99870eba..0faf07c4fe59 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -2,8 +2,6 @@ package metrics import ( "fmt" - "io" - "log" "sync" "testing" "time" @@ -11,11 +9,11 @@ import ( const FANOUT = 128 -// Stop the compiler from complaining during debugging. -var ( - _ = io.Discard - _ = log.LstdFlags -) +func TestReadRuntimeValues(t *testing.T) { + var v runtimeValues + readRuntimeMetrics(&v) + t.Logf("%+v", v) +} func BenchmarkMetrics(b *testing.B) { r := NewRegistry() @@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) { m := NewRegisteredMeter("meter", r) t := NewRegisteredTimer("timer", r) RegisterDebugGCStats(r) - RegisterRuntimeMemStats(r) b.ResetTimer() ch := make(chan bool) @@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) { }() //*/ - wgR := &sync.WaitGroup{} - //* - wgR.Add(1) - go func() { - defer wgR.Done() - //log.Println("go CaptureRuntimeMemStats") - for { - select { - case <-ch: - //log.Println("done CaptureRuntimeMemStats") - return - default: - CaptureRuntimeMemStatsOnce(r) - } - } - }() - //*/ - wgW := &sync.WaitGroup{} /* wgW.Add(1) @@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) { wg.Wait() close(ch) wgD.Wait() - wgR.Wait() wgW.Wait() } diff --git a/metrics/runtime.go b/metrics/runtime.go deleted file mode 100644 index 9450c479bad7..000000000000 --- a/metrics/runtime.go +++ /dev/null @@ -1,212 +0,0 @@ -package metrics - -import ( - "runtime" - "runtime/pprof" - "time" -) - -var ( - memStats runtime.MemStats - runtimeMetrics struct { - MemStats struct { - Alloc Gauge - BuckHashSys Gauge - DebugGC Gauge - EnableGC Gauge - Frees Gauge - HeapAlloc Gauge - HeapIdle Gauge - HeapInuse Gauge - HeapObjects Gauge - HeapReleased Gauge - HeapSys Gauge - LastGC Gauge - Lookups Gauge - Mallocs Gauge - MCacheInuse Gauge - MCacheSys Gauge - MSpanInuse Gauge - MSpanSys Gauge - NextGC Gauge - NumGC Gauge - GCCPUFraction GaugeFloat64 - PauseNs Histogram - PauseTotalNs Gauge - StackInuse Gauge - StackSys Gauge - Sys Gauge - TotalAlloc Gauge - } - NumCgoCall Gauge - NumGoroutine Gauge - NumThread Gauge - ReadMemStats Timer - } - frees uint64 - lookups uint64 - mallocs uint64 - numGC uint32 - numCgoCalls int64 - - threadCreateProfile = pprof.Lookup("threadcreate") -) - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called as a goroutine. -func CaptureRuntimeMemStats(r Registry, d time.Duration) { - for range time.Tick(d) { - CaptureRuntimeMemStatsOnce(r) - } -} - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called in a background -// goroutine. Giving a registry which has not been given to -// RegisterRuntimeMemStats will panic. -// -// Be very careful with this because runtime.ReadMemStats calls the C -// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() -// and that last one does what it says on the tin. -func CaptureRuntimeMemStatsOnce(r Registry) { - t := time.Now() - runtime.ReadMemStats(&memStats) // This takes 50-200us. - runtimeMetrics.ReadMemStats.UpdateSince(t) - - runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) - runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) - if memStats.DebugGC { - runtimeMetrics.MemStats.DebugGC.Update(1) - } else { - runtimeMetrics.MemStats.DebugGC.Update(0) - } - if memStats.EnableGC { - runtimeMetrics.MemStats.EnableGC.Update(1) - } else { - runtimeMetrics.MemStats.EnableGC.Update(0) - } - - runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) - runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) - runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) - runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) - runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) - runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) - runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) - runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) - runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) - runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) - runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) - runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) - runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) - runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) - runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) - runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) - runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) - - // - i := numGC % uint32(len(memStats.PauseNs)) - ii := memStats.NumGC % uint32(len(memStats.PauseNs)) - if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { - for i = 0; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } else { - if i > ii { - for ; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - i = 0 - } - for ; i < ii; i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } - frees = memStats.Frees - lookups = memStats.Lookups - mallocs = memStats.Mallocs - numGC = memStats.NumGC - - runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) - runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) - runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) - runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) - runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) - - currentNumCgoCalls := numCgoCall() - runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) - numCgoCalls = currentNumCgoCalls - - runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) - - runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) -} - -// Register runtimeMetrics for the Go runtime statistics exported in runtime and -// specifically runtime.MemStats. The runtimeMetrics are named by their -// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. -func RegisterRuntimeMemStats(r Registry) { - runtimeMetrics.MemStats.Alloc = NewGauge() - runtimeMetrics.MemStats.BuckHashSys = NewGauge() - runtimeMetrics.MemStats.DebugGC = NewGauge() - runtimeMetrics.MemStats.EnableGC = NewGauge() - runtimeMetrics.MemStats.Frees = NewGauge() - runtimeMetrics.MemStats.HeapAlloc = NewGauge() - runtimeMetrics.MemStats.HeapIdle = NewGauge() - runtimeMetrics.MemStats.HeapInuse = NewGauge() - runtimeMetrics.MemStats.HeapObjects = NewGauge() - runtimeMetrics.MemStats.HeapReleased = NewGauge() - runtimeMetrics.MemStats.HeapSys = NewGauge() - runtimeMetrics.MemStats.LastGC = NewGauge() - runtimeMetrics.MemStats.Lookups = NewGauge() - runtimeMetrics.MemStats.Mallocs = NewGauge() - runtimeMetrics.MemStats.MCacheInuse = NewGauge() - runtimeMetrics.MemStats.MCacheSys = NewGauge() - runtimeMetrics.MemStats.MSpanInuse = NewGauge() - runtimeMetrics.MemStats.MSpanSys = NewGauge() - runtimeMetrics.MemStats.NextGC = NewGauge() - runtimeMetrics.MemStats.NumGC = NewGauge() - runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() - runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) - runtimeMetrics.MemStats.PauseTotalNs = NewGauge() - runtimeMetrics.MemStats.StackInuse = NewGauge() - runtimeMetrics.MemStats.StackSys = NewGauge() - runtimeMetrics.MemStats.Sys = NewGauge() - runtimeMetrics.MemStats.TotalAlloc = NewGauge() - runtimeMetrics.NumCgoCall = NewGauge() - runtimeMetrics.NumGoroutine = NewGauge() - runtimeMetrics.NumThread = NewGauge() - runtimeMetrics.ReadMemStats = NewTimer() - - r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) - r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) - r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) - r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) - r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) - r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) - r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) - r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) - r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) - r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) - r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) - r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) - r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) - r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) - r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) - r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) - r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) - r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) - r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) - r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) - r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) - r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) - r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) - r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) - r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) - r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) - r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) - r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) - r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) - r.Register("runtime.NumThread", runtimeMetrics.NumThread) - r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) -} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go deleted file mode 100644 index 4307ebdba689..000000000000 --- a/metrics/runtime_cgo.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build cgo && !appengine && !js -// +build cgo,!appengine,!js - -package metrics - -import "runtime" - -func numCgoCall() int64 { - return runtime.NumCgoCall() -} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go deleted file mode 100644 index 28cd44752b45..000000000000 --- a/metrics/runtime_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build go1.5 -// +build go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return memStats.GCCPUFraction -} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go deleted file mode 100644 index 1799bef63bfb..000000000000 --- a/metrics/runtime_no_cgo.go +++ /dev/null @@ -1,8 +0,0 @@ -//go:build !cgo || appengine || js -// +build !cgo appengine js - -package metrics - -func numCgoCall() int64 { - return 0 -} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go deleted file mode 100644 index af1a4b63c809..000000000000 --- a/metrics/runtime_no_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !go1.5 -// +build !go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return 0 -} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go deleted file mode 100644 index f85f7868f71a..000000000000 --- a/metrics/runtime_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package metrics - -import ( - "runtime" - "testing" - "time" -) - -func BenchmarkRuntimeMemStats(b *testing.B) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - b.ResetTimer() - for i := 0; i < b.N; i++ { - CaptureRuntimeMemStatsOnce(r) - } -} - -func TestRuntimeMemStats(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 { - t.Fatal(count - zero) - } - runtime.GC() - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 { - t.Fatal(count - zero) - } - for i := 0; i < 256; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 { - t.Fatal(count - zero) - } - for i := 0; i < 257; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures. - t.Fatal(count - zero) - } -} - -func TestRuntimeMemStatsNumThread(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - - if value := runtimeMetrics.NumThread.Value(); value < 1 { - t.Fatalf("got NumThread: %d, wanted at least 1", value) - } -} - -func TestRuntimeMemStatsBlocking(t *testing.T) { - if g := runtime.GOMAXPROCS(0); g < 2 { - t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) - } - ch := make(chan int) - go testRuntimeMemStatsBlocking(ch) - var memStats runtime.MemStats - t0 := time.Now() - runtime.ReadMemStats(&memStats) - t1 := time.Now() - t.Log("i++ during runtime.ReadMemStats:", <-ch) - go testRuntimeMemStatsBlocking(ch) - d := t1.Sub(t0) - t.Log(d) - time.Sleep(d) - t.Log("i++ during time.Sleep:", <-ch) -} - -func testRuntimeMemStatsBlocking(ch chan int) { - i := 0 - for { - select { - case ch <- i: - return - default: - i++ - } - } -} From 41cc6128b818798c24464ad26b9961d0256ef7dc Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 00:10:50 +0200 Subject: [PATCH 02/30] metrics: fixup --- metrics/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index db05319080b1..cc3b56732786 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -173,9 +173,9 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses.Mark(int64(memstats[location1].GCPauses - memstats[location2].GCPauses)) memAllocs.Mark(int64(memstats[location1].GCAllocs - memstats[location2].GCAllocs)) - memFrees.Mark(int64(memstats[location1].GCAllocs - memstats[location2].GCAllocs)) + memFrees.Mark(int64(memstats[location1].GCFrees - memstats[location2].GCFrees)) - memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapReleased)) + memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapFree - memstats[location1].HeapReleased)) memUsed.Update(int64(memstats[location1].MemTotal)) if ReadDiskStats(&diskstats[location1]) == nil { From 993a51b86cdca80870a9719049a5c1c38022f5f8 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 00:16:38 +0200 Subject: [PATCH 03/30] metrics: use gauge for gc metrics --- metrics/metrics.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index cc3b56732786..b8bbc2121f8e 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -130,7 +130,7 @@ func CollectProcessMetrics(refresh time.Duration) { var ( cpuStats = make([]CPUStats, 2) diskstats = make([]DiskStats, 2) - memstats = make([]runtimeValues, 2) + memstats runtimeValues ) // Define the various metrics to collect @@ -141,9 +141,9 @@ func CollectProcessMetrics(refresh time.Duration) { cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memPauses = GetOrRegisterGaugeFloat64("system/memory/pauses", DefaultRegistry) + memAllocs = GetOrRegisterGaugeFloat64("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterGaugeFloat64("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) @@ -169,14 +169,12 @@ func CollectProcessMetrics(refresh time.Duration) { cpuThreads.Update(int64(threadCreateProfile.Count())) cpuGoroutines.Update(int64(runtime.NumGoroutine())) - readRuntimeMetrics(&memstats[location1]) - - memPauses.Mark(int64(memstats[location1].GCPauses - memstats[location2].GCPauses)) - memAllocs.Mark(int64(memstats[location1].GCAllocs - memstats[location2].GCAllocs)) - memFrees.Mark(int64(memstats[location1].GCFrees - memstats[location2].GCFrees)) - - memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapFree - memstats[location1].HeapReleased)) - memUsed.Update(int64(memstats[location1].MemTotal)) + readRuntimeMetrics(&memstats) + memPauses.Update(memstats.GCPauses) + memAllocs.Update(memstats.GCAllocs) + memFrees.Update(memstats.GCFrees) + memHeld.Update(int64(memstats.MemTotal - memstats.HeapFree - memstats.HeapReleased)) + memUsed.Update(int64(memstats.MemTotal)) if ReadDiskStats(&diskstats[location1]) == nil { diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) From 516f5a91355169b92594dc46d84b6bcc0e98970c Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 00:38:00 +0200 Subject: [PATCH 04/30] metrics: update --- metrics/metrics.go | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index b8bbc2121f8e..be34573ff9a8 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -60,8 +60,9 @@ var threadCreateProfile = pprof.Lookup("threadcreate") type runtimeValues struct { GCPauses float64 - GCAllocs float64 - GCFrees float64 + GCAllocBytes uint64 + GCFreedBytes uint64 + MemTotal uint64 HeapFree uint64 HeapReleased uint64 @@ -69,9 +70,9 @@ type runtimeValues struct { } var runtimeSamples = []metrics.Sample{ - {Name: "/gc/pauses:seconds"}, // Histogram - {Name: "/gc/heap/allocs-by-size:bytes"}, // Histogram - {Name: "/gc/heap/frees-by-size:bytes"}, // Histogram + {Name: "/gc/pauses:seconds"}, // Histogram + {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/frees:bytes"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, @@ -84,10 +85,14 @@ func readRuntimeMetrics(v *runtimeValues) { switch s.Name { case "/gc/pauses:seconds": v.GCPauses = medianBucket(s.Value.Float64Histogram()) - case "/gc/heap/allocs-by-size:bytes": - v.GCAllocs = medianBucket(s.Value.Float64Histogram()) - case "/gc/heap/frees-by-size:bytes": - v.GCFrees = medianBucket(s.Value.Float64Histogram()) + case "/gc/heap/allocs:bytes": + if s.Value.Kind() == metrics.KindUint64 { + v.GCAllocBytes = s.Value.Uint64() + } + case "/gc/heap/frees:bytes": + if s.Value.Kind() == metrics.KindUint64 { + v.GCFreedBytes = s.Value.Uint64() + } case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/free:bytes": @@ -130,7 +135,7 @@ func CollectProcessMetrics(refresh time.Duration) { var ( cpuStats = make([]CPUStats, 2) diskstats = make([]DiskStats, 2) - memstats runtimeValues + memstats = make([]runtimeValues, 2) ) // Define the various metrics to collect @@ -142,8 +147,8 @@ func CollectProcessMetrics(refresh time.Duration) { cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) memPauses = GetOrRegisterGaugeFloat64("system/memory/pauses", DefaultRegistry) - memAllocs = GetOrRegisterGaugeFloat64("system/memory/allocs", DefaultRegistry) - memFrees = GetOrRegisterGaugeFloat64("system/memory/frees", DefaultRegistry) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) @@ -169,12 +174,12 @@ func CollectProcessMetrics(refresh time.Duration) { cpuThreads.Update(int64(threadCreateProfile.Count())) cpuGoroutines.Update(int64(runtime.NumGoroutine())) - readRuntimeMetrics(&memstats) - memPauses.Update(memstats.GCPauses) - memAllocs.Update(memstats.GCAllocs) - memFrees.Update(memstats.GCFrees) - memHeld.Update(int64(memstats.MemTotal - memstats.HeapFree - memstats.HeapReleased)) - memUsed.Update(int64(memstats.MemTotal)) + readRuntimeMetrics(&memstats[location1]) + memAllocs.Mark(int64(memstats[location1].GCAllocBytes - memstats[location2].GCAllocBytes)) + memFrees.Mark(int64(memstats[location1].GCFreedBytes - memstats[location2].GCFreedBytes)) + memPauses.Update(memstats[location1].GCPauses) + memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapFree - memstats[location1].HeapReleased)) + memUsed.Update(int64(memstats[location1].MemTotal)) if ReadDiskStats(&diskstats[location1]) == nil { diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) From 1a4bbce8d14df433df15a5ca0c41f7a24f0a4746 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 00:53:51 +0200 Subject: [PATCH 05/30] metrics: update --- metrics/metrics.go | 53 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index be34573ff9a8..0fe3edd27a9a 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -7,7 +7,6 @@ package metrics import ( "os" - "runtime" "runtime/metrics" "runtime/pprof" "strings" @@ -67,6 +66,8 @@ type runtimeValues struct { HeapFree uint64 HeapReleased uint64 HeapUnused uint64 + + Goroutines uint64 } var runtimeSamples = []metrics.Sample{ @@ -77,6 +78,7 @@ var runtimeSamples = []metrics.Sample{ {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, {Name: "/memory/classes/heap/unused:bytes"}, + {Name: "/sched/goroutines:goroutines"}, } func readRuntimeMetrics(v *runtimeValues) { @@ -101,6 +103,8 @@ func readRuntimeMetrics(v *runtimeValues) { v.HeapReleased = s.Value.Uint64() case "/memory/classes/heap/unused:bytes": v.HeapUnused = s.Value.Uint64() + case "/sched/goroutines:goroutines": + v.Goroutines = s.Value.Uint64() } } } @@ -162,33 +166,32 @@ func CollectProcessMetrics(refresh time.Duration) { // Iterate loading the different stats and updating the meters for i := 1; ; i++ { - location1 := i % 2 - location2 := (i - 1) % 2 - - ReadCPUStats(&cpuStats[location1]) + now := i % 2 + prev := (i - 1) % 2 - cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) - cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) - cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + ReadCPUStats(&cpuStats[now]) + cpuSysLoad.Update((cpuStats[now].GlobalTime - cpuStats[prev].GlobalTime) / refreshFreq) + cpuSysWait.Update((cpuStats[now].GlobalWait - cpuStats[prev].GlobalWait) / refreshFreq) + cpuProcLoad.Update((cpuStats[now].LocalTime - cpuStats[prev].LocalTime) / refreshFreq) cpuThreads.Update(int64(threadCreateProfile.Count())) - cpuGoroutines.Update(int64(runtime.NumGoroutine())) - - readRuntimeMetrics(&memstats[location1]) - memAllocs.Mark(int64(memstats[location1].GCAllocBytes - memstats[location2].GCAllocBytes)) - memFrees.Mark(int64(memstats[location1].GCFreedBytes - memstats[location2].GCFreedBytes)) - memPauses.Update(memstats[location1].GCPauses) - memHeld.Update(int64(memstats[location1].MemTotal - memstats[location1].HeapFree - memstats[location1].HeapReleased)) - memUsed.Update(int64(memstats[location1].MemTotal)) - - if ReadDiskStats(&diskstats[location1]) == nil { - diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) - diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) - diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) - - diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) + + readRuntimeMetrics(&memstats[now]) + cpuGoroutines.Update(int64(memstats[now].Goroutines)) + memAllocs.Mark(int64(memstats[now].GCAllocBytes - memstats[prev].GCAllocBytes)) + memFrees.Mark(int64(memstats[now].GCFreedBytes - memstats[prev].GCFreedBytes)) + memPauses.Update(memstats[now].GCPauses) + memUsed.Update(int64(memstats[now].MemTotal - memstats[now].HeapFree - memstats[now].HeapReleased)) + memHeld.Update(int64(memstats[now].MemTotal)) + + if ReadDiskStats(&diskstats[now]) == nil { + diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) + diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount) + diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) + + diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) } time.Sleep(refresh) } From 7c1a4ac295f54e656191dfd0e37d2b61500e5e60 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 01:04:41 +0200 Subject: [PATCH 06/30] metrics: improve loop --- metrics/metrics.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 0fe3edd27a9a..cf34880a4804 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -164,10 +164,9 @@ func CollectProcessMetrics(refresh time.Duration) { diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) ) - // Iterate loading the different stats and updating the meters - for i := 1; ; i++ { - now := i % 2 - prev := (i - 1) % 2 + // Iterate loading the different stats and updating the meters. + now, prev := 0, 1 + for ; ; now, prev = prev, now { ReadCPUStats(&cpuStats[now]) cpuSysLoad.Update((cpuStats[now].GlobalTime - cpuStats[prev].GlobalTime) / refreshFreq) From 41706c815c1c6347753631a443d5bb4cd6f27ff1 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 01:05:43 +0200 Subject: [PATCH 07/30] metrics: track alloc/free in objects not gc --- metrics/metrics.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index cf34880a4804..a874bc6164fe 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -60,7 +60,9 @@ var threadCreateProfile = pprof.Lookup("threadcreate") type runtimeValues struct { GCPauses float64 GCAllocBytes uint64 + GCAllocObj uint64 GCFreedBytes uint64 + GCFreedObj uint64 MemTotal uint64 HeapFree uint64 @@ -73,7 +75,9 @@ type runtimeValues struct { var runtimeSamples = []metrics.Sample{ {Name: "/gc/pauses:seconds"}, // Histogram {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/allocs:objects"}, {Name: "/gc/heap/frees:bytes"}, + {Name: "/gc/heap/frees:objects"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, @@ -91,10 +95,18 @@ func readRuntimeMetrics(v *runtimeValues) { if s.Value.Kind() == metrics.KindUint64 { v.GCAllocBytes = s.Value.Uint64() } + case "/gc/heap/allocs:objects": + if s.Value.Kind() == metrics.KindUint64 { + v.GCAllocObj = s.Value.Uint64() + } case "/gc/heap/frees:bytes": if s.Value.Kind() == metrics.KindUint64 { v.GCFreedBytes = s.Value.Uint64() } + case "/gc/heap/frees:objects": + if s.Value.Kind() == metrics.KindUint64 { + v.GCFreedObj = s.Value.Uint64() + } case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/free:bytes": @@ -177,8 +189,8 @@ func CollectProcessMetrics(refresh time.Duration) { readRuntimeMetrics(&memstats[now]) cpuGoroutines.Update(int64(memstats[now].Goroutines)) - memAllocs.Mark(int64(memstats[now].GCAllocBytes - memstats[prev].GCAllocBytes)) - memFrees.Mark(int64(memstats[now].GCFreedBytes - memstats[prev].GCFreedBytes)) + memAllocs.Mark(int64(memstats[now].GCAllocObj - memstats[prev].GCAllocObj)) + memFrees.Mark(int64(memstats[now].GCFreedObj - memstats[prev].GCFreedObj)) memPauses.Update(memstats[now].GCPauses) memUsed.Update(int64(memstats[now].MemTotal - memstats[now].HeapFree - memstats[now].HeapReleased)) memHeld.Update(int64(memstats[now].MemTotal)) From c0b8c7b727912e60293284908a3f0d8146934bd7 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 01:16:26 +0200 Subject: [PATCH 08/30] metrics: rename variables --- metrics/metrics.go | 31 ++++++++++++++++--------------- metrics/metrics_test.go | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index a874bc6164fe..a767447d303b 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -57,7 +57,7 @@ func init() { var threadCreateProfile = pprof.Lookup("threadcreate") -type runtimeValues struct { +type runtimeStats struct { GCPauses float64 GCAllocBytes uint64 GCAllocObj uint64 @@ -85,7 +85,7 @@ var runtimeSamples = []metrics.Sample{ {Name: "/sched/goroutines:goroutines"}, } -func readRuntimeMetrics(v *runtimeValues) { +func readRuntimeMetrics(v *runtimeStats) { metrics.Read(runtimeSamples) for _, s := range runtimeSamples { switch s.Name { @@ -145,13 +145,14 @@ func CollectProcessMetrics(refresh time.Duration) { if !Enabled { return } + refreshFreq := int64(refresh / time.Second) // Create the various data collectors var ( - cpuStats = make([]CPUStats, 2) + cpustats = make([]CPUStats, 2) diskstats = make([]DiskStats, 2) - memstats = make([]runtimeValues, 2) + rstats = make([]runtimeStats, 2) ) // Define the various metrics to collect @@ -180,20 +181,20 @@ func CollectProcessMetrics(refresh time.Duration) { now, prev := 0, 1 for ; ; now, prev = prev, now { - ReadCPUStats(&cpuStats[now]) - cpuSysLoad.Update((cpuStats[now].GlobalTime - cpuStats[prev].GlobalTime) / refreshFreq) - cpuSysWait.Update((cpuStats[now].GlobalWait - cpuStats[prev].GlobalWait) / refreshFreq) - cpuProcLoad.Update((cpuStats[now].LocalTime - cpuStats[prev].LocalTime) / refreshFreq) + ReadCPUStats(&cpustats[now]) + cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq) + cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq) + cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq) cpuThreads.Update(int64(threadCreateProfile.Count())) - readRuntimeMetrics(&memstats[now]) - cpuGoroutines.Update(int64(memstats[now].Goroutines)) - memAllocs.Mark(int64(memstats[now].GCAllocObj - memstats[prev].GCAllocObj)) - memFrees.Mark(int64(memstats[now].GCFreedObj - memstats[prev].GCFreedObj)) - memPauses.Update(memstats[now].GCPauses) - memUsed.Update(int64(memstats[now].MemTotal - memstats[now].HeapFree - memstats[now].HeapReleased)) - memHeld.Update(int64(memstats[now].MemTotal)) + readRuntimeMetrics(&rstats[now]) + cpuGoroutines.Update(int64(rstats[now].Goroutines)) + memAllocs.Mark(int64(rstats[now].GCAllocObj - rstats[prev].GCAllocObj)) + memFrees.Mark(int64(rstats[now].GCFreedObj - rstats[prev].GCFreedObj)) + memPauses.Update(rstats[now].GCPauses) + memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) + memHeld.Update(int64(rstats[now].MemTotal)) if ReadDiskStats(&diskstats[now]) == nil { diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 0faf07c4fe59..9af6d3e525bd 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -10,7 +10,7 @@ import ( const FANOUT = 128 func TestReadRuntimeValues(t *testing.T) { - var v runtimeValues + var v runtimeStats readRuntimeMetrics(&v) t.Logf("%+v", v) } From 3705d41cc6c82acbaeb07d18a2846ad8ffdc2092 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 01:17:12 +0200 Subject: [PATCH 09/30] metrics: fixup --- metrics/metrics.go | 4 ++-- metrics/metrics_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index a767447d303b..596aa31eec02 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -85,7 +85,7 @@ var runtimeSamples = []metrics.Sample{ {Name: "/sched/goroutines:goroutines"}, } -func readRuntimeMetrics(v *runtimeStats) { +func readRuntimeStats(v *runtimeStats) { metrics.Read(runtimeSamples) for _, s := range runtimeSamples { switch s.Name { @@ -188,7 +188,7 @@ func CollectProcessMetrics(refresh time.Duration) { cpuThreads.Update(int64(threadCreateProfile.Count())) - readRuntimeMetrics(&rstats[now]) + readRuntimeStats(&rstats[now]) cpuGoroutines.Update(int64(rstats[now].Goroutines)) memAllocs.Mark(int64(rstats[now].GCAllocObj - rstats[prev].GCAllocObj)) memFrees.Mark(int64(rstats[now].GCFreedObj - rstats[prev].GCFreedObj)) diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 9af6d3e525bd..e3fde1ea62ce 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -11,7 +11,7 @@ const FANOUT = 128 func TestReadRuntimeValues(t *testing.T) { var v runtimeStats - readRuntimeMetrics(&v) + readRuntimeStats(&v) t.Logf("%+v", v) } From 69e9bbfef4afdc54e3d87ecd53f97ae5816cadd6 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 17:22:52 +0200 Subject: [PATCH 10/30] metrics: track GC alloc/free in bytes The bytes-based metric is better because it includes 'tiny' objects. It's also easier to understand. --- metrics/metrics.go | 47 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 596aa31eec02..ac6f7b2e1418 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -60,9 +60,7 @@ var threadCreateProfile = pprof.Lookup("threadcreate") type runtimeStats struct { GCPauses float64 GCAllocBytes uint64 - GCAllocObj uint64 GCFreedBytes uint64 - GCFreedObj uint64 MemTotal uint64 HeapFree uint64 @@ -73,11 +71,9 @@ type runtimeStats struct { } var runtimeSamples = []metrics.Sample{ - {Name: "/gc/pauses:seconds"}, // Histogram + {Name: "/gc/pauses:seconds"}, // histogram {Name: "/gc/heap/allocs:bytes"}, - {Name: "/gc/heap/allocs:objects"}, {Name: "/gc/heap/frees:bytes"}, - {Name: "/gc/heap/frees:objects"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, @@ -95,18 +91,10 @@ func readRuntimeStats(v *runtimeStats) { if s.Value.Kind() == metrics.KindUint64 { v.GCAllocBytes = s.Value.Uint64() } - case "/gc/heap/allocs:objects": - if s.Value.Kind() == metrics.KindUint64 { - v.GCAllocObj = s.Value.Uint64() - } case "/gc/heap/frees:bytes": if s.Value.Kind() == metrics.KindUint64 { v.GCFreedBytes = s.Value.Uint64() } - case "/gc/heap/frees:objects": - if s.Value.Kind() == metrics.KindUint64 { - v.GCFreedObj = s.Value.Uint64() - } case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/free:bytes": @@ -157,18 +145,16 @@ func CollectProcessMetrics(refresh time.Duration) { // Define the various metrics to collect var ( - cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) - cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) - cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) - cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) - cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - - memPauses = GetOrRegisterGaugeFloat64("system/memory/pauses", DefaultRegistry) - memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) - + cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) + cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + memPauses = GetOrRegisterGaugeFloat64("system/memory/pauses", DefaultRegistry) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -180,31 +166,34 @@ func CollectProcessMetrics(refresh time.Duration) { // Iterate loading the different stats and updating the meters. now, prev := 0, 1 for ; ; now, prev = prev, now { - + // CPU ReadCPUStats(&cpustats[now]) cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq) cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq) cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq) + // Threads cpuThreads.Update(int64(threadCreateProfile.Count())) + // Go runtime metrics readRuntimeStats(&rstats[now]) cpuGoroutines.Update(int64(rstats[now].Goroutines)) - memAllocs.Mark(int64(rstats[now].GCAllocObj - rstats[prev].GCAllocObj)) - memFrees.Mark(int64(rstats[now].GCFreedObj - rstats[prev].GCFreedObj)) + memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) + memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) memPauses.Update(rstats[now].GCPauses) memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) memHeld.Update(int64(rstats[now].MemTotal)) + // Disk if ReadDiskStats(&diskstats[now]) == nil { diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount) diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) - diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) } + time.Sleep(refresh) } } From 182856deaa20985be5ca447986f53ae0e0f497d4 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 20:00:42 +0200 Subject: [PATCH 11/30] metrics: skip invalid/unknown Go runtime metrics --- metrics/metrics.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index ac6f7b2e1418..0eb66d58d041 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -84,17 +84,17 @@ var runtimeSamples = []metrics.Sample{ func readRuntimeStats(v *runtimeStats) { metrics.Read(runtimeSamples) for _, s := range runtimeSamples { + if s.Value.Kind() == metrics.KindBad { + continue // skip invalid/unknown metrics + } + switch s.Name { case "/gc/pauses:seconds": - v.GCPauses = medianBucket(s.Value.Float64Histogram()) + v.GCPauses = median(s.Value.Float64Histogram()) case "/gc/heap/allocs:bytes": - if s.Value.Kind() == metrics.KindUint64 { - v.GCAllocBytes = s.Value.Uint64() - } + v.GCAllocBytes = s.Value.Uint64() case "/gc/heap/frees:bytes": - if s.Value.Kind() == metrics.KindUint64 { - v.GCFreedBytes = s.Value.Uint64() - } + v.GCFreedBytes = s.Value.Uint64() case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/free:bytes": @@ -109,9 +109,8 @@ func readRuntimeStats(v *runtimeStats) { } } -// medianBucket gives the median of a histogram. -// This is taken from the runtime/metrics example code. -func medianBucket(h *metrics.Float64Histogram) float64 { +// median gives an approximation of the median value of a histogram. +func median(h *metrics.Float64Histogram) float64 { total := uint64(0) for _, count := range h.Counts { total += count From 87ca27dc8ae561698f5646f04767311e93b00d27 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 20:02:37 +0200 Subject: [PATCH 12/30] metrics: explain why skipped --- metrics/metrics.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 0eb66d58d041..4b07de3fdd65 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -84,8 +84,11 @@ var runtimeSamples = []metrics.Sample{ func readRuntimeStats(v *runtimeStats) { metrics.Read(runtimeSamples) for _, s := range runtimeSamples { + // Skip invalid/unknown metrics. This is needed because some metrics + // are unavailable in older Go versions, and attempting to read a 'bad' + // metric panics. if s.Value.Kind() == metrics.KindBad { - continue // skip invalid/unknown metrics + continue } switch s.Name { From 58194f57b77305f98065f9405d7177342ba1cfaa Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 28 Sep 2022 20:45:09 +0200 Subject: [PATCH 13/30] metrics: change GC pauses back to meter --- metrics/metrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 4b07de3fdd65..99f439d982b3 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -152,7 +152,7 @@ func CollectProcessMetrics(refresh time.Duration) { cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - memPauses = GetOrRegisterGaugeFloat64("system/memory/pauses", DefaultRegistry) + memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) @@ -182,7 +182,7 @@ func CollectProcessMetrics(refresh time.Duration) { cpuGoroutines.Update(int64(rstats[now].Goroutines)) memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) - memPauses.Update(rstats[now].GCPauses) + memPauses.Mark(int64(rstats[now].GCPauses - rstats[prev].GCPauses)) memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) memHeld.Update(int64(rstats[now].MemTotal)) From 432760bc993873e7d7062cf8d87cf9086b937b07 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 14:08:02 +0100 Subject: [PATCH 14/30] metrics: add Float64Histogram adapter --- metrics/metrics.go | 25 +--- metrics/runtimehistogram.go | 279 ++++++++++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+), 21 deletions(-) create mode 100644 metrics/runtimehistogram.go diff --git a/metrics/metrics.go b/metrics/metrics.go index 99f439d982b3..d5cbebf57cc7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -58,7 +58,7 @@ func init() { var threadCreateProfile = pprof.Lookup("threadcreate") type runtimeStats struct { - GCPauses float64 + GCPauses *metrics.Float64Histogram GCAllocBytes uint64 GCFreedBytes uint64 @@ -93,7 +93,7 @@ func readRuntimeStats(v *runtimeStats) { switch s.Name { case "/gc/pauses:seconds": - v.GCPauses = median(s.Value.Float64Histogram()) + v.GCPauses = s.Value.Float64Histogram() case "/gc/heap/allocs:bytes": v.GCAllocBytes = s.Value.Uint64() case "/gc/heap/frees:bytes": @@ -112,23 +112,6 @@ func readRuntimeStats(v *runtimeStats) { } } -// median gives an approximation of the median value of a histogram. -func median(h *metrics.Float64Histogram) float64 { - total := uint64(0) - for _, count := range h.Counts { - total += count - } - thresh := total / 2 - total = 0 - for i, count := range h.Counts { - total += count - if total >= thresh { - return h.Buckets[i] - } - } - panic("should not happen") -} - // CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled @@ -152,7 +135,7 @@ func CollectProcessMetrics(refresh time.Duration) { cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) + memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) @@ -182,7 +165,7 @@ func CollectProcessMetrics(refresh time.Duration) { cpuGoroutines.Update(int64(rstats[now].Goroutines)) memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) - memPauses.Mark(int64(rstats[now].GCPauses - rstats[prev].GCPauses)) + memPauses.update(rstats[now].GCPauses) memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) memHeld.Update(int64(rstats[now].MemTotal)) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go new file mode 100644 index 000000000000..48153af63eaa --- /dev/null +++ b/metrics/runtimehistogram.go @@ -0,0 +1,279 @@ +package metrics + +import ( + "math" + "runtime/metrics" + "sort" + "sync/atomic" +) + +func getOrRegisterRuntimeHistogram(name string, r Registry) *runtimeHistogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, newRuntimeHistogram).(*runtimeHistogram) +} + +// runtimeHistogram wraps a runtime/metrics histogram. +type runtimeHistogram struct { + v atomic.Value +} + +func newRuntimeHistogram() Histogram { + h := new(runtimeHistogram) + h.update(&metrics.Float64Histogram{}) + return h +} + +func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { + return h.v.Load().(*runtimeHistogramSnapshot) +} + +func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { + s := runtimeHistogramSnapshot{ + Float64Histogram: metrics.Float64Histogram{ + Counts: make([]uint64, len(mh.Counts)), + Buckets: make([]float64, len(mh.Buckets)), + }, + } + copy(s.Counts, mh.Counts) + copy(s.Buckets, mh.Buckets) + h.v.Store(&s) +} + +func (h *runtimeHistogram) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogram) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogram) Sample() Sample { + return NilSample{} +} + +// Snapshot returns a non-changing cop of the histogram. +func (h *runtimeHistogram) Snapshot() Histogram { + return h.load() +} + +// Count returns the sample count. +func (h *runtimeHistogram) Count() int64 { + return h.load().Count() +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogram) Mean() float64 { + return h.load().Mean() +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogram) StdDev() float64 { + return h.load().StdDev() +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogram) Variance() float64 { + return h.load().Variance() +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogram) Percentile(p float64) float64 { + return h.load().Percentile(p) +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogram) Percentiles(ps []float64) []float64 { + return h.load().Percentiles(ps) +} + +// Max returns the highest sample value. +func (h *runtimeHistogram) Max() int64 { + return h.load().Max() +} + +// Min returns the lowest sample value. +func (h *runtimeHistogram) Min() int64 { + return h.load().Min() +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogram) Sum() int64 { + return h.load().Sum() +} + +type runtimeHistogramSnapshot struct { + metrics.Float64Histogram +} + +func (h *runtimeHistogramSnapshot) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogramSnapshot) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogramSnapshot) Sample() Sample { + return NilSample{} +} + +func (h *runtimeHistogramSnapshot) Snapshot() Histogram { + return h +} + +// Count returns the sample count. +func (h *runtimeHistogramSnapshot) Count() int64 { + var count int64 + for _, c := range h.Counts { + count += int64(c) + } + return count +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogramSnapshot) Mean() float64 { + sum, count := h.mean() + return sum / count +} + +// mean computes the mean and also the total sample count. +func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) { + var sum float64 + for i, c := range h.Counts { + midpoint := (h.Buckets[i] + h.Buckets[i+1]) / 2 + sum += midpoint * float64(c) + totalCount += float64(c) + } + return sum / totalCount, totalCount +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogramSnapshot) StdDev() float64 { + return math.Sqrt(h.Variance()) +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogramSnapshot) Variance() float64 { + mean, totalCount := h.mean() + var sum float64 + for i, c := range h.Counts { + midpoint := (h.Buckets[i] + h.Buckets[i+1]) / 2 + d := midpoint - mean + sum += float64(c) * (d * d) + } + return sum / totalCount +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 { + threshold := float64(h.Count()) * p + values := [1]float64{threshold} + h.computePercentiles(values[:]) + return values[0] +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { + // Compute threshold values. We need these to be sorted + // for the percentile computation, but restore the original + // order later, so keep the indexes as well. + count := float64(h.Count()) + thresholds := make([]float64, len(ps)) + indexes := make([]int, len(ps)) + for i, percentile := range ps { + thresholds[i] = count * percentile + indexes[i] = i + } + sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes}) + + // Now compute. The result is stored back into the thresholds slice. + h.computePercentiles(thresholds) + + // Put the result back into the requested order. + sort.Sort(floatsByIndex{thresholds, indexes}) + return thresholds +} + +func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { + var totalCount float64 + for i, count := range h.Counts { + totalCount += float64(count) + + for len(thresh) > 0 && thresh[0] < totalCount { + thresh[0] = h.Buckets[i] + thresh = thresh[1:] + } + if len(thresh) == 0 { + return + } + } +} + +// The operations below return rounded results, because +// runtime/metrics.Float64Histogram is a collection of float64s. +// It also doesn't keep track of individual samples, so the results +// are approximated. + +// Max returns the highest sample value. +func (h *runtimeHistogramSnapshot) Max() int64 { + for i := len(h.Counts) - 1; i >= 0; i-- { + count := h.Counts[i] + if count > 0 { + return int64(math.Ceil(h.Buckets[i+1])) + } + } + return 0 +} + +// Min returns the lowest sample value. +func (h *runtimeHistogramSnapshot) Min() int64 { + for i, count := range h.Counts { + if count > 0 { + return int64(math.Floor(h.Buckets[i])) + } + } + return 0 +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogramSnapshot) Sum() int64 { + var sum float64 + for i := range h.Counts { + sum += h.Buckets[i] * float64(h.Counts[i]) + } + return int64(math.Round(sum)) +} + +type floatsAscendingKeepingIndex struct { + values []float64 + indexes []int +} + +func (s floatsAscendingKeepingIndex) Len() int { + return len(s.values) +} + +func (s floatsAscendingKeepingIndex) Less(i, j int) bool { + return s.values[i] < s.values[j] +} + +func (s floatsAscendingKeepingIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} + +type floatsByIndex struct { + values []float64 + indexes []int +} + +func (s floatsByIndex) Len() int { + return len(s.values) +} + +func (s floatsByIndex) Less(i, j int) bool { + return s.indexes[i] < s.indexes[j] +} + +func (s floatsByIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} From 32a49ce6e1f9a4a63299d98e0f859f568911c070 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 17:49:24 +0100 Subject: [PATCH 15/30] metrics: fix some issues in runtimeHistogram --- metrics/runtimehistogram.go | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index 48153af63eaa..51b32922b7f3 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -31,10 +31,8 @@ func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { s := runtimeHistogramSnapshot{ - Float64Histogram: metrics.Float64Histogram{ - Counts: make([]uint64, len(mh.Counts)), - Buckets: make([]float64, len(mh.Buckets)), - }, + Counts: make([]uint64, len(mh.Counts)), + Buckets: make([]float64, len(mh.Buckets)), } copy(s.Counts, mh.Counts) copy(s.Buckets, mh.Buckets) @@ -101,9 +99,7 @@ func (h *runtimeHistogram) Sum() int64 { return h.load().Sum() } -type runtimeHistogramSnapshot struct { - metrics.Float64Histogram -} +type runtimeHistogramSnapshot metrics.Float64Histogram func (h *runtimeHistogramSnapshot) Clear() { panic("runtimeHistogram does not support Clear") @@ -130,8 +126,11 @@ func (h *runtimeHistogramSnapshot) Count() int64 { // Mean returns an approximation of the mean. func (h *runtimeHistogramSnapshot) Mean() float64 { - sum, count := h.mean() - return sum / count + if len(h.Counts) == 0 { + return 0 + } + mean, _ := h.mean() + return mean } // mean computes the mean and also the total sample count. @@ -152,6 +151,10 @@ func (h *runtimeHistogramSnapshot) StdDev() float64 { // Variance approximates the variance of the histogram. func (h *runtimeHistogramSnapshot) Variance() float64 { + if len(h.Counts) == 0 { + return 0 + } + mean, totalCount := h.mean() var sum float64 for i, c := range h.Counts { @@ -159,7 +162,7 @@ func (h *runtimeHistogramSnapshot) Variance() float64 { d := midpoint - mean sum += float64(c) * (d * d) } - return sum / totalCount + return sum / (totalCount - 1) } // Percentile computes the p'th percentile value. @@ -179,7 +182,7 @@ func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { thresholds := make([]float64, len(ps)) indexes := make([]int, len(ps)) for i, percentile := range ps { - thresholds[i] = count * percentile + thresholds[i] = count * math.Max(0, math.Min(1.0, percentile)) indexes[i] = i } sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes}) @@ -207,10 +210,9 @@ func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { } } -// The operations below return rounded results, because -// runtime/metrics.Float64Histogram is a collection of float64s. -// It also doesn't keep track of individual samples, so the results -// are approximated. +// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods +// below need to return int64 to satisfy the interface. The histogram provided by runtime +// also doesn't keep track of individual samples, so results are approximated. // Max returns the highest sample value. func (h *runtimeHistogramSnapshot) Max() int64 { @@ -239,7 +241,7 @@ func (h *runtimeHistogramSnapshot) Sum() int64 { for i := range h.Counts { sum += h.Buckets[i] * float64(h.Counts[i]) } - return int64(math.Round(sum)) + return int64(math.Ceil(sum)) } type floatsAscendingKeepingIndex struct { From 42afa6824b1bbc8ce253b9e4561b569ac35a5da5 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 17:49:40 +0100 Subject: [PATCH 16/30] metrics: add tests --- metrics/runtimehistogram_test.go | 131 +++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 metrics/runtimehistogram_test.go diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go new file mode 100644 index 000000000000..08a9aeb6686d --- /dev/null +++ b/metrics/runtimehistogram_test.go @@ -0,0 +1,131 @@ +package metrics + +import ( + "fmt" + "math" + "reflect" + "runtime/metrics" + "testing" +) + +type runtimeHistogramTest struct { + h metrics.Float64Histogram + + Count int64 + Min int64 + Max int64 + Sum int64 + Mean float64 + Variance float64 + StdDev float64 + Percentiles []float64 // .5 .8 .9 .99 .995 +} + +// This test checks the results of statistical functions implemented +// by runtimeHistogramSnapshot. +func TestRuntimeHistogramStats(t *testing.T) { + tests := []runtimeHistogramTest{ + 0: { + h: metrics.Float64Histogram{ + Counts: []uint64{}, + Buckets: []float64{}, + }, + Count: 0, + Max: 0, + Min: 0, + Sum: 0, + Mean: 0, + Variance: 0, + StdDev: 0, + Percentiles: []float64{0, 0, 0, 0, 0}, + }, + 1: { + h: metrics.Float64Histogram{ + Counts: []uint64{0, 1}, + Buckets: []float64{0, 1, math.Inf(1)}, + }, + Count: 1, + Max: math.MaxInt64, + Min: 1, + Sum: 1, + Mean: math.Inf(1), + Percentiles: []float64{1, 1, 1, 1, 1}, + // Not sure if these should be a different value. + Variance: math.NaN(), + StdDev: math.NaN(), + }, + 2: { + h: metrics.Float64Histogram{ + Counts: []uint64{8, 6, 3, 1}, + Buckets: []float64{12, 16, 18, 24, 25}, + }, + Count: 18, + Max: 25, + Min: 12, + Sum: 270, + Mean: 16.75, + Variance: 10.3015, + StdDev: 3.2096, + Percentiles: []float64{16, 18, 18, 24, 24}, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + s := runtimeHistogramSnapshot(test.h) + + if v := s.Count(); v != test.Count { + t.Errorf("Count() = %v, want %v", v, test.Count) + } + if v := s.Min(); v != test.Min { + t.Errorf("Min() = %v, want %v", v, test.Min) + } + if v := s.Max(); v != test.Max { + t.Errorf("Max() = %v, want %v", v, test.Max) + } + if v := s.Sum(); v != test.Sum { + t.Errorf("Sum() = %v, want %v", v, test.Sum) + } + if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) { + t.Errorf("Mean() = %v, want %v", v, test.Mean) + } + if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) { + t.Errorf("Variance() = %v, want %v", v, test.Variance) + } + if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) { + t.Errorf("StdDev() = %v, want %v", v, test.StdDev) + } + ps := []float64{.5, .8, .9, .99, .995} + if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) { + t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles) + } + }) + } +} + +func approxEqual(x, y, ε float64) bool { + if math.IsInf(x, -1) && math.IsInf(y, -1) { + return true + } + if math.IsInf(x, 1) && math.IsInf(y, 1) { + return true + } + if math.IsNaN(x) && math.IsNaN(y) { + return true + } + return math.Abs(x-y) < ε +} + +// This test verifies that requesting Percentiles in unsorted order +// returns them in the requested order. +func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) { + p := runtimeHistogramSnapshot{ + Counts: []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + } + result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) + expected := []float64{10, 2, 5, 1, 2} + if !reflect.DeepEqual(result, expected) { + t.Fatal("wrong result:", result) + } +} From e223b0399f1bcd181723bca3ee0fb96d58e3e589 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 18:00:57 +0100 Subject: [PATCH 17/30] metrics: handle nil histogram updates --- metrics/runtimehistogram.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index 51b32922b7f3..69212afec4f7 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -30,6 +30,13 @@ func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { } func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { + if mh == nil { + // The update value can be nil if the current Go version doesn't support a + // requested metric. It's just easier to handle nil here than putting + // conditionals everywhere. + return + } + s := runtimeHistogramSnapshot{ Counts: make([]uint64, len(mh.Counts)), Buckets: make([]float64, len(mh.Buckets)), From d5142b097afb1adbedf36007ae3c2083768c1121 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 18:01:24 +0100 Subject: [PATCH 18/30] metrics: add some more runtime metrics --- metrics/metrics.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index d5cbebf57cc7..3e0169eccefa 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -61,24 +61,28 @@ type runtimeStats struct { GCPauses *metrics.Float64Histogram GCAllocBytes uint64 GCFreedBytes uint64 + GCAllocSizes *metrics.Float64Histogram MemTotal uint64 HeapFree uint64 HeapReleased uint64 HeapUnused uint64 - Goroutines uint64 + Goroutines uint64 + SchedLatency *metrics.Float64Histogram } var runtimeSamples = []metrics.Sample{ {Name: "/gc/pauses:seconds"}, // histogram {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/allocs-by-size:bytes"}, // histogram {Name: "/gc/heap/frees:bytes"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, {Name: "/memory/classes/heap/unused:bytes"}, {Name: "/sched/goroutines:goroutines"}, + {Name: "/sched/latencies:seconds"}, // histogram } func readRuntimeStats(v *runtimeStats) { @@ -135,7 +139,9 @@ func CollectProcessMetrics(refresh time.Duration) { cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", nil) memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", nil) + memAllocsBySize = getOrRegisterRuntimeHistogram("system/memory/allocs-bysize", nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) @@ -163,11 +169,13 @@ func CollectProcessMetrics(refresh time.Duration) { // Go runtime metrics readRuntimeStats(&rstats[now]) cpuGoroutines.Update(int64(rstats[now].Goroutines)) + cpuSchedLatency.update(rstats[now].SchedLatency) memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) - memPauses.update(rstats[now].GCPauses) memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) memHeld.Update(int64(rstats[now].MemTotal)) + memPauses.update(rstats[now].GCPauses) + memAllocsBySize.update(rstats[now].GCAllocSizes) // Disk if ReadDiskStats(&diskstats[now]) == nil { From a8dd515af73e0efd00df7c1908a1a638914db37d Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 18:59:29 +0100 Subject: [PATCH 19/30] metrics: improve handling of +Inf value in bucket The result of Histogram methods should never be Inf or NaN because the metrics exporters can't handle these values. --- metrics/runtimehistogram.go | 26 ++++++++++++++++++++++++-- metrics/runtimehistogram_test.go | 20 +++++++++++--------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index 69212afec4f7..d651ef1dfc56 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -144,13 +144,30 @@ func (h *runtimeHistogramSnapshot) Mean() float64 { func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) { var sum float64 for i, c := range h.Counts { - midpoint := (h.Buckets[i] + h.Buckets[i+1]) / 2 + midpoint := h.midpoint(i) sum += midpoint * float64(c) totalCount += float64(c) } return sum / totalCount, totalCount } +func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 { + high := h.Buckets[bucket+1] + low := h.Buckets[bucket] + if math.IsInf(high, 1) { + // The edge of the highest bucket can be +Inf, and it's supposed to mean that this + // bucket contains all remaining samples > low. We can't get the middle of an + // infinite range, so just return the lower bound of this bucket instead. + return low + } + if math.IsInf(low, -1) { + // Similarly, we can get -Inf in the left edge of the lowest bucket, + // and it means the bucket contains all remaining values < high. + return high + } + return (low + high) / 2 +} + // StdDev approximates the standard deviation of the histogram. func (h *runtimeHistogramSnapshot) StdDev() float64 { return math.Sqrt(h.Variance()) @@ -163,9 +180,14 @@ func (h *runtimeHistogramSnapshot) Variance() float64 { } mean, totalCount := h.mean() + if totalCount <= 1 { + // There is no variance when there are zero or one items. + return 0 + } + var sum float64 for i, c := range h.Counts { - midpoint := (h.Buckets[i] + h.Buckets[i+1]) / 2 + midpoint := h.midpoint(i) d := midpoint - mean sum += float64(c) * (d * d) } diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go index 08a9aeb6686d..6cf2d50d9a3b 100644 --- a/metrics/runtimehistogram_test.go +++ b/metrics/runtimehistogram_test.go @@ -8,6 +8,8 @@ import ( "testing" ) +var _ Histogram = (*runtimeHistogram)(nil) + type runtimeHistogramTest struct { h metrics.Float64Histogram @@ -40,19 +42,19 @@ func TestRuntimeHistogramStats(t *testing.T) { Percentiles: []float64{0, 0, 0, 0, 0}, }, 1: { + // This checks the case where the highest bucket is +Inf. h: metrics.Float64Histogram{ - Counts: []uint64{0, 1}, - Buckets: []float64{0, 1, math.Inf(1)}, + Counts: []uint64{0, 1, 2}, + Buckets: []float64{0, 0.5, 1, math.Inf(1)}, }, - Count: 1, + Count: 3, Max: math.MaxInt64, - Min: 1, - Sum: 1, - Mean: math.Inf(1), + Min: 0, + Sum: 3, + Mean: 0.9166666, Percentiles: []float64{1, 1, 1, 1, 1}, - // Not sure if these should be a different value. - Variance: math.NaN(), - StdDev: math.NaN(), + Variance: 0.020833, + StdDev: 0.144433, }, 2: { h: metrics.Float64Histogram{ From e0712aa5a7ac980cbac67c7bc7aa0a6138be7768 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 20:02:09 +0100 Subject: [PATCH 20/30] metrics: actually read new runtime metrics --- metrics/metrics.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metrics/metrics.go b/metrics/metrics.go index 3e0169eccefa..3c081e1d96c5 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -100,6 +100,8 @@ func readRuntimeStats(v *runtimeStats) { v.GCPauses = s.Value.Float64Histogram() case "/gc/heap/allocs:bytes": v.GCAllocBytes = s.Value.Uint64() + case "/gc/heap/allocs-by-size:bytes": + v.GCAllocSizes = s.Value.Float64Histogram() case "/gc/heap/frees:bytes": v.GCFreedBytes = s.Value.Uint64() case "/memory/classes/total:bytes": @@ -112,6 +114,8 @@ func readRuntimeStats(v *runtimeStats) { v.HeapUnused = s.Value.Uint64() case "/sched/goroutines:goroutines": v.Goroutines = s.Value.Uint64() + case "/sched/latencies:seconds": + v.SchedLatency = s.Value.Float64Histogram() } } } From 4419d879d31b89d42e259e94f22d786ebf4e0b59 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 20:06:29 +0100 Subject: [PATCH 21/30] metrics: add scale factor --- metrics/metrics.go | 11 ++++++++--- metrics/runtimehistogram.go | 25 +++++++++++++++---------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 3c081e1d96c5..1ee5b6df56b1 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -136,6 +136,11 @@ func CollectProcessMetrics(refresh time.Duration) { rstats = make([]runtimeStats, 2) ) + // This scale factor is used for the runtime's time metrics. It's useful to convert to + // ns here because the runtime gives times in float seconds, but runtimeHistogram can + // only provide integers for the minimum and maximum values. + const secondsToNs = float64(time.Second) + // Define the various metrics to collect var ( cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) @@ -143,9 +148,9 @@ func CollectProcessMetrics(refresh time.Duration) { cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", nil) - memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", nil) - memAllocsBySize = getOrRegisterRuntimeHistogram("system/memory/allocs-bysize", nil) + cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) + memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) + memAllocsBySize = getOrRegisterRuntimeHistogram("system/memory/allocs-bysize", 1, nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index d651ef1dfc56..480102fac662 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -7,28 +7,26 @@ import ( "sync/atomic" ) -func getOrRegisterRuntimeHistogram(name string, r Registry) *runtimeHistogram { - if nil == r { +func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, newRuntimeHistogram).(*runtimeHistogram) + constructor := func() Histogram { return newRuntimeHistogram(scale) } + return r.GetOrRegister(name, constructor).(*runtimeHistogram) } // runtimeHistogram wraps a runtime/metrics histogram. type runtimeHistogram struct { - v atomic.Value + v atomic.Value + scaleFactor float64 } -func newRuntimeHistogram() Histogram { - h := new(runtimeHistogram) +func newRuntimeHistogram(scale float64) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} h.update(&metrics.Float64Histogram{}) return h } -func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { - return h.v.Load().(*runtimeHistogramSnapshot) -} - func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { if mh == nil { // The update value can be nil if the current Go version doesn't support a @@ -43,9 +41,16 @@ func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { } copy(s.Counts, mh.Counts) copy(s.Buckets, mh.Buckets) + for i, b := range s.Buckets { + s.Buckets[i] = b * h.scaleFactor + } h.v.Store(&s) } +func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { + return h.v.Load().(*runtimeHistogramSnapshot) +} + func (h *runtimeHistogram) Clear() { panic("runtimeHistogram does not support Clear") } From 0db04a100cd1d930152c6ebfebfd5b5bec0c4ca9 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 20:07:08 +0100 Subject: [PATCH 22/30] metrics: add +Inf workaround in Max --- metrics/runtimehistogram.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index 480102fac662..c68939af1ef7 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -253,7 +253,11 @@ func (h *runtimeHistogramSnapshot) Max() int64 { for i := len(h.Counts) - 1; i >= 0; i-- { count := h.Counts[i] if count > 0 { - return int64(math.Ceil(h.Buckets[i+1])) + edge := h.Buckets[i+1] + if math.IsInf(edge, 1) { + edge = h.Buckets[i] + } + return int64(math.Ceil(edge)) } } return 0 From 825acd1f350c93a175f6f6975771e2e3af1e03d5 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 20:45:53 +0100 Subject: [PATCH 23/30] metrics/influxdb: add .25 percentile in histogram output --- metrics/influxdb/influxdb.go | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index e99717aeebf9..1bf0c355edfe 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -160,27 +160,28 @@ func (r *reporter) send() error { }) case metrics.Histogram: ms := metric.Snapshot() - if ms.Count() > 0 { - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p25": ps[0], + "p50": ps[1], + "p75": ps[2], + "p95": ps[3], + "p99": ps[4], + "p999": ps[5], + "p9999": ps[6], + } pts = append(pts, client.Point{ Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - }, - Time: now, + Fields: fields, + Time: now, }) } case metrics.Meter: From 3503c1189977db9c1ddef65fe8b5f9b03a9b26c1 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:04:38 +0100 Subject: [PATCH 24/30] metrics: remove allocs-bysize --- metrics/metrics.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 1ee5b6df56b1..5b9f565b84c2 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -61,7 +61,6 @@ type runtimeStats struct { GCPauses *metrics.Float64Histogram GCAllocBytes uint64 GCFreedBytes uint64 - GCAllocSizes *metrics.Float64Histogram MemTotal uint64 HeapFree uint64 @@ -75,7 +74,6 @@ type runtimeStats struct { var runtimeSamples = []metrics.Sample{ {Name: "/gc/pauses:seconds"}, // histogram {Name: "/gc/heap/allocs:bytes"}, - {Name: "/gc/heap/allocs-by-size:bytes"}, // histogram {Name: "/gc/heap/frees:bytes"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, @@ -100,8 +98,6 @@ func readRuntimeStats(v *runtimeStats) { v.GCPauses = s.Value.Float64Histogram() case "/gc/heap/allocs:bytes": v.GCAllocBytes = s.Value.Uint64() - case "/gc/heap/allocs-by-size:bytes": - v.GCAllocSizes = s.Value.Float64Histogram() case "/gc/heap/frees:bytes": v.GCFreedBytes = s.Value.Uint64() case "/memory/classes/total:bytes": @@ -150,7 +146,6 @@ func CollectProcessMetrics(refresh time.Duration) { cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) - memAllocsBySize = getOrRegisterRuntimeHistogram("system/memory/allocs-bysize", 1, nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) @@ -184,7 +179,6 @@ func CollectProcessMetrics(refresh time.Duration) { memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) memHeld.Update(int64(rstats[now].MemTotal)) memPauses.update(rstats[now].GCPauses) - memAllocsBySize.update(rstats[now].GCAllocSizes) // Disk if ReadDiskStats(&diskstats[now]) == nil { From 152df4e7674bbf4153efdcd2a057ddeaea8f0983 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:09:10 +0100 Subject: [PATCH 25/30] metrics: add more heap metrics --- metrics/metrics.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 5b9f565b84c2..c81229deaa6b 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -61,6 +61,7 @@ type runtimeStats struct { GCPauses *metrics.Float64Histogram GCAllocBytes uint64 GCFreedBytes uint64 + GCHeapGoal uint64 MemTotal uint64 HeapFree uint64 @@ -75,6 +76,7 @@ var runtimeSamples = []metrics.Sample{ {Name: "/gc/pauses:seconds"}, // histogram {Name: "/gc/heap/allocs:bytes"}, {Name: "/gc/heap/frees:bytes"}, + {Name: "/gc/heap/goal:bytes"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, @@ -100,6 +102,8 @@ func readRuntimeStats(v *runtimeStats) { v.GCAllocBytes = s.Value.Uint64() case "/gc/heap/frees:bytes": v.GCFreedBytes = s.Value.Uint64() + case "/gc/heap/goal:bytes": + v.GCHeapGoal = s.Value.Uint64() case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/free:bytes": @@ -148,8 +152,11 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapGCGoal = GetOrRegisterGauge("system/memory/gcgoal", DefaultRegistry) + heapTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + heapUnused = GetOrRegisterGauge("system/memory/unused", DefaultRegistry) + heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapReleased = GetOrRegisterGauge("system/memory/used", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -172,13 +179,19 @@ func CollectProcessMetrics(refresh time.Duration) { // Go runtime metrics readRuntimeStats(&rstats[now]) + cpuGoroutines.Update(int64(rstats[now].Goroutines)) cpuSchedLatency.update(rstats[now].SchedLatency) + memPauses.update(rstats[now].GCPauses) + memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) - memUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapFree - rstats[now].HeapReleased)) - memHeld.Update(int64(rstats[now].MemTotal)) - memPauses.update(rstats[now].GCPauses) + + heapTotal.Update(int64(rstats[now].MemTotal)) + heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) + heapUnused.Update(int64(rstats[now].HeapUnused)) + heapReleased.Update(int64(rstats[now].HeapReleased)) + heapGCGoal.Update(int64(rstats[now].GCHeapGoal)) // Disk if ReadDiskStats(&diskstats[now]) == nil { From fdc0ba35901a2984046e5bd03306be4e09433665 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:09:18 +0100 Subject: [PATCH 26/30] metrics: fix test --- metrics/runtimehistogram_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go index 6cf2d50d9a3b..d53a01438311 100644 --- a/metrics/runtimehistogram_test.go +++ b/metrics/runtimehistogram_test.go @@ -48,7 +48,7 @@ func TestRuntimeHistogramStats(t *testing.T) { Buckets: []float64{0, 0.5, 1, math.Inf(1)}, }, Count: 3, - Max: math.MaxInt64, + Max: 1, Min: 0, Sum: 3, Mean: 0.9166666, From 2aed4e30d1c0f265bcca08606343d977db651a90 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:25:45 +0100 Subject: [PATCH 27/30] metrics: fix heapReleased metric name --- metrics/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index c81229deaa6b..8254908714d6 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -156,7 +156,7 @@ func CollectProcessMetrics(refresh time.Duration) { heapTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) heapUnused = GetOrRegisterGauge("system/memory/unused", DefaultRegistry) heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) - heapReleased = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapReleased = GetOrRegisterGauge("system/memory/released", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) From 2952f6cfd475b0dceb4b31d9ee8983ff7e675c5f Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:27:29 +0100 Subject: [PATCH 28/30] metrics: add heap free metric --- metrics/metrics.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metrics/metrics.go b/metrics/metrics.go index 8254908714d6..f69929b32d9a 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -157,6 +157,7 @@ func CollectProcessMetrics(refresh time.Duration) { heapUnused = GetOrRegisterGauge("system/memory/unused", DefaultRegistry) heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) heapReleased = GetOrRegisterGauge("system/memory/released", DefaultRegistry) + heapFree = GetOrRegisterGauge("system/memory/free", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -191,6 +192,7 @@ func CollectProcessMetrics(refresh time.Duration) { heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) heapUnused.Update(int64(rstats[now].HeapUnused)) heapReleased.Update(int64(rstats[now].HeapReleased)) + heapFree.Update(int64(rstats[now].HeapFree)) heapGCGoal.Update(int64(rstats[now].GCHeapGoal)) // Disk From 924584b6ef72faadd8e55ea79e35ce9ea7873b73 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 9 Nov 2022 23:46:45 +0100 Subject: [PATCH 29/30] metrics: change memory metrics again --- metrics/metrics.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index f69929b32d9a..06d819787c22 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -64,6 +64,7 @@ type runtimeStats struct { GCHeapGoal uint64 MemTotal uint64 + HeapObjects uint64 HeapFree uint64 HeapReleased uint64 HeapUnused uint64 @@ -78,6 +79,7 @@ var runtimeSamples = []metrics.Sample{ {Name: "/gc/heap/frees:bytes"}, {Name: "/gc/heap/goal:bytes"}, {Name: "/memory/classes/total:bytes"}, + {Name: "/memory/classes/heap/objects:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, {Name: "/memory/classes/heap/released:bytes"}, {Name: "/memory/classes/heap/unused:bytes"}, @@ -106,6 +108,8 @@ func readRuntimeStats(v *runtimeStats) { v.GCHeapGoal = s.Value.Uint64() case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() + case "/memory/classes/heap/objects:bytes": + v.HeapObjects = s.Value.Uint64() case "/memory/classes/heap/free:bytes": v.HeapFree = s.Value.Uint64() case "/memory/classes/heap/released:bytes": @@ -152,12 +156,10 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) heapGCGoal = GetOrRegisterGauge("system/memory/gcgoal", DefaultRegistry) - heapTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - heapUnused = GetOrRegisterGauge("system/memory/unused", DefaultRegistry) heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) - heapReleased = GetOrRegisterGauge("system/memory/released", DefaultRegistry) - heapFree = GetOrRegisterGauge("system/memory/free", DefaultRegistry) + heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -188,11 +190,9 @@ func CollectProcessMetrics(refresh time.Duration) { memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) - heapTotal.Update(int64(rstats[now].MemTotal)) + memTotal.Update(int64(rstats[now].MemTotal)) heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) - heapUnused.Update(int64(rstats[now].HeapUnused)) - heapReleased.Update(int64(rstats[now].HeapReleased)) - heapFree.Update(int64(rstats[now].HeapFree)) + heapObjects.Update(int64(rstats[now].HeapObjects)) heapGCGoal.Update(int64(rstats[now].GCHeapGoal)) // Disk From 39aad8bd1a4666104620c899796b9b6333897680 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 11 Nov 2022 11:24:03 +0100 Subject: [PATCH 30/30] metrics: remove gc goal --- metrics/metrics.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 06d819787c22..2edf8e35f151 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -61,7 +61,6 @@ type runtimeStats struct { GCPauses *metrics.Float64Histogram GCAllocBytes uint64 GCFreedBytes uint64 - GCHeapGoal uint64 MemTotal uint64 HeapObjects uint64 @@ -77,7 +76,6 @@ var runtimeSamples = []metrics.Sample{ {Name: "/gc/pauses:seconds"}, // histogram {Name: "/gc/heap/allocs:bytes"}, {Name: "/gc/heap/frees:bytes"}, - {Name: "/gc/heap/goal:bytes"}, {Name: "/memory/classes/total:bytes"}, {Name: "/memory/classes/heap/objects:bytes"}, {Name: "/memory/classes/heap/free:bytes"}, @@ -104,8 +102,6 @@ func readRuntimeStats(v *runtimeStats) { v.GCAllocBytes = s.Value.Uint64() case "/gc/heap/frees:bytes": v.GCFreedBytes = s.Value.Uint64() - case "/gc/heap/goal:bytes": - v.GCHeapGoal = s.Value.Uint64() case "/memory/classes/total:bytes": v.MemTotal = s.Value.Uint64() case "/memory/classes/heap/objects:bytes": @@ -157,7 +153,6 @@ func CollectProcessMetrics(refresh time.Duration) { memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - heapGCGoal = GetOrRegisterGauge("system/memory/gcgoal", DefaultRegistry) heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) @@ -193,7 +188,6 @@ func CollectProcessMetrics(refresh time.Duration) { memTotal.Update(int64(rstats[now].MemTotal)) heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) heapObjects.Update(int64(rstats[now].HeapObjects)) - heapGCGoal.Update(int64(rstats[now].GCHeapGoal)) // Disk if ReadDiskStats(&diskstats[now]) == nil {