From f53c14a31acf6fdc8c193e46baa9bcb4b0136d2e Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Wed, 8 May 2019 18:32:58 -0400 Subject: [PATCH] server/heapprofiler: don't consider "goIdle" memory Before this patch we were taking a heap profile once RSS got above 85% of system memory and then we wouldn't take another profile until we go below it and then again above it. The problem with the RSS is that it includes "goIdle" memory (memory allocated from the OS that's not actually in use) ; I've seen it be up to several gigs. With this patch, we now take a profile every time a new high-water mark is reached. The recorded high-water mark is reset once an hour, this way ensuring that one big measurement doesn't stop us forever from taking another profile. It's simpler and it has a chance of catching some further heap increases. The rationale behind the patch is that we want profiles when the heap is large more than when the RSS is large. I'm looking at a case where we took a heap profile when the heap was 4.5 gigs with 2 gigs idle and then never took one again because of how the heuristic works. And then we OOMed when the heap was larger and the idle space was lower, but the RSS was about the same. With this patch, we would have taken a profile at a more interesting time. Release note: None --- docs/generated/settings/settings.html | 1 - pkg/base/constants.go | 4 + pkg/cli/start.go | 4 +- pkg/server/config.go | 3 +- pkg/server/heapprofiler/heapprofiler.go | 160 ++++++++----------- pkg/server/heapprofiler/heapprofiler_test.go | 96 ++++------- pkg/server/server.go | 101 +++++++----- pkg/server/status.go | 6 +- pkg/server/status/runtime.go | 83 +++------- pkg/server/status_test.go | 7 +- pkg/settings/registry.go | 5 +- 11 files changed, 209 insertions(+), 261 deletions(-) diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index b39b874bcaac..69bbf27633fd 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -76,7 +76,6 @@ server.goroutine_dump.num_goroutines_thresholdinteger1000a threshold beyond which if number of goroutines increases, then goroutine dump can be triggered server.goroutine_dump.total_dump_size_limitbyte size500 MiBtotal size of goroutine dumps to be kept. Dumps are GC'ed in the order of creation time. The latest dump is always kept even if its size exceeds the limit. server.heap_profile.max_profilesinteger5maximum number of profiles to be kept. Profiles with lower score are GC'ed, but latest profile is always kept. -server.heap_profile.system_memory_threshold_fractionfloat0.85fraction of system memory beyond which if Rss increases, then heap profile is triggered server.host_based_authentication.configurationstringhost-based authentication configuration to use during connection authentication server.rangelog.ttlduration720h0m0sif nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours. server.remote_debugging.modestringlocalset to enable remote debugging, localhost-only or disable (any, local, off) diff --git a/pkg/base/constants.go b/pkg/base/constants.go index 4aeb2845f6d2..4a0db40a6fba 100644 --- a/pkg/base/constants.go +++ b/pkg/base/constants.go @@ -37,4 +37,8 @@ const ( // ChunkRaftCommandThresholdBytes is the threshold in bytes at which // to chunk or otherwise limit commands being sent to Raft. ChunkRaftCommandThresholdBytes = 256 * 1000 + + // HeapProfileDir is the directory name where the heap profiler stores profiles + // when there is a potential OOM situation. + HeapProfileDir = "heap_profiler" ) diff --git a/pkg/cli/start.go b/pkg/cli/start.go index c359ea49f94d..ee7c9d35addb 100644 --- a/pkg/cli/start.go +++ b/pkg/cli/start.go @@ -448,7 +448,9 @@ func runStart(cmd *cobra.Command, args []string) error { } serverCfg.GoroutineDumpDirName = logOutputDirectory() - serverCfg.HeapProfileDirName = logOutputDirectory() + + heapProfileDir := filepath.Join(logOutputDirectory(), base.HeapProfileDir) + serverCfg.HeapProfileDirName = heapProfileDir // We don't care about GRPCs fairly verbose logs in most client commands, // but when actually starting a server, we enable them. grpcutil.SetSeverity(log.Severity_WARNING) diff --git a/pkg/server/config.go b/pkg/server/config.go index 6315614b62c4..c2fa1396317c 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -179,8 +179,9 @@ type Config struct { GoroutineDumpDirName string // HeapProfileDirName is the directory name for heap profiles using - // heapprofiler. + // heapprofiler. If empty, no heap profiles will be collected. HeapProfileDirName string + // Parsed values. // NodeAttributes is the parsed representation of Attrs. diff --git a/pkg/server/heapprofiler/heapprofiler.go b/pkg/server/heapprofiler/heapprofiler.go index 9302147870eb..0dfc33dd9254 100644 --- a/pkg/server/heapprofiler/heapprofiler.go +++ b/pkg/server/heapprofiler/heapprofiler.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "os" "path/filepath" + "runtime" "runtime/pprof" "strings" "time" @@ -31,15 +32,11 @@ import ( "github.com/pkg/errors" ) -const minProfileInterval = time.Minute +// resetHighWaterMarkInterval specifies how often the high-water mark value will +// be reset. Immediately after it is reset, a new profile will be taken. +const resetHighWaterMarkInterval = time.Hour var ( - systemMemoryThresholdFraction = settings.RegisterFloatSetting( - "server.heap_profile.system_memory_threshold_fraction", - "fraction of system memory beyond which if Rss increases, "+ - "then heap profile is triggered", - .85, - ) maxProfiles = settings.RegisterIntSetting( "server.heap_profile.max_profiles", "maximum number of profiles to be kept. "+ @@ -48,99 +45,80 @@ var ( ) ) -type stats struct { - rssBytes int64 - systemMemoryBytes int64 - lastProfileTime time.Time - aboveSysMemThresholdSinceLastProfile bool - currentTime func() time.Time +type testingKnobs struct { + dontWriteProfiles bool + maybeTakeProfileHook func(willTakeProfile bool) + now func() time.Time } -type heuristic struct { - name string - isTrue func(s *stats, st *cluster.Settings) (score int64, isTrue bool) -} +// HeapProfiler is used to take heap profiles. +// +// MaybeTakeProfile() is supposed to be called periodically. A profile is taken +// every time Go heap allocated bytes exceeds the previous high-water mark. The +// recorded high-water mark is also reset periodically, so that we take some +// profiles periodically. +// Profiles are also GCed periodically. The latest is always kept, and a couple +// of the ones with the largest heap are also kept. +type HeapProfiler struct { + dir string + st *cluster.Settings + // lastProfileTime marks the time when we took the last profile. + lastProfileTime time.Time + // highwaterMarkBytes represents the maximum heap size that we've seen since + // resetting the filed (which happens periodically). + highwaterMarkBytes uint64 -// fractionSystemMemoryHeuristic is true if latest Rss is more than -// systemMemoryThresholdFraction of system memory. No new profile is -// taken if Rss has been above threshold since the last time profile was taken, -// but a new profile will be triggered if Rss has dipped below threshold since -// the last profile. score is the latest value of Rss. -// At max one profile will be taken in minProfileInterval. -var fractionSystemMemoryHeuristic = heuristic{ - name: "fraction_system_memory", - isTrue: func(s *stats, st *cluster.Settings) (score int64, isTrue bool) { - currentValue := s.rssBytes - if float64(currentValue)/float64(s.systemMemoryBytes) > systemMemoryThresholdFraction.Get(&st.SV) { - if s.currentTime().Sub(s.lastProfileTime) < minProfileInterval || - s.aboveSysMemThresholdSinceLastProfile { - return 0, false - } - s.aboveSysMemThresholdSinceLastProfile = true - return currentValue, true - } - s.aboveSysMemThresholdSinceLastProfile = false - return 0, false - }, + knobs testingKnobs } -// HeapProfiler is used to take heap profiles if an OOM situation is -// detected. It stores relevant functions and stats for heuristics to use. -type HeapProfiler struct { - *stats - heuristics []heuristic - takeHeapProfile func(ctx context.Context, dir string, prefix string, suffix string) - gcProfiles func(ctx context.Context, dir, prefix string, maxCount int64) - dir string +// NewHeapProfiler creates a HeapProfiler. dir is the directory in which +// profiles are to be stored. +func NewHeapProfiler(dir string, st *cluster.Settings) (*HeapProfiler, error) { + if dir == "" { + return nil, errors.Errorf("need to specify dir for NewHeapProfiler") + } + hp := &HeapProfiler{ + dir: dir, + st: st, + } + return hp, nil } -const memprof = "memprof." - -// MaybeTakeProfile takes a heap profile if an OOM situation is detected using -// heuristics enabled in o. At max one profile is taken in a call of this -// function. This function is also responsible for updating stats in o. -func (o *HeapProfiler) MaybeTakeProfile(ctx context.Context, st *cluster.Settings, rssBytes int64) { - o.rssBytes = rssBytes - profileTaken := false - for _, h := range o.heuristics { - if score, isTrue := h.isTrue(o.stats, st); isTrue { - if !profileTaken { - prefix := memprof + h.name + "." - const format = "2006-01-02T15_04_05.999" - suffix := fmt.Sprintf("%018d_%s", score, o.currentTime().Format(format)) - o.takeHeapProfile(ctx, o.dir, prefix, suffix) - o.lastProfileTime = o.currentTime() - profileTaken = true - if o.gcProfiles != nil { - o.gcProfiles(ctx, o.dir, memprof, maxProfiles.Get(&st.SV)) - } - } - } +// MaybeTakeProfile takes a heap profile if the heap is big enough. +func (o *HeapProfiler) MaybeTakeProfile(ctx context.Context, ms runtime.MemStats) { + // If it's been too long since we took a profile, make sure we'll take one now. + if o.now().Sub(o.lastProfileTime) > resetHighWaterMarkInterval { + o.highwaterMarkBytes = 0 } -} -// NewHeapProfiler returns a HeapProfiler which has -// systemMemoryThresholdFraction heuristic enabled. dir is the directory in -// which profiles are stored. -func NewHeapProfiler(dir string, systemMemoryBytes int64) (*HeapProfiler, error) { - if dir == "" { - return nil, errors.New("directory to store profiles could not be determined") + curHeap := ms.HeapAlloc + takeProfile := curHeap > o.highwaterMarkBytes + if hook := o.knobs.maybeTakeProfileHook; hook != nil { + hook(takeProfile) } - dir = filepath.Join(dir, "heap_profiler") - if err := os.MkdirAll(dir, 0755); err != nil { - return nil, err + if !takeProfile { + return } - hp := &HeapProfiler{ - stats: &stats{ - systemMemoryBytes: systemMemoryBytes, - currentTime: timeutil.Now, - }, - heuristics: []heuristic{fractionSystemMemoryHeuristic}, - takeHeapProfile: takeHeapProfile, - gcProfiles: gcProfiles, - dir: dir, + + o.highwaterMarkBytes = curHeap + o.lastProfileTime = o.now() + + if o.knobs.dontWriteProfiles { + return } - return hp, nil + const format = "2006-01-02T15_04_05.999" + filePrefix := "memprof." + fileName := fmt.Sprintf("%s%018d_%s", filePrefix, curHeap, o.now().Format(format)) + path := filepath.Join(o.dir, fileName) + takeHeapProfile(ctx, path) + o.gcProfiles(ctx, o.dir, filePrefix) +} + +func (o *HeapProfiler) now() time.Time { + if o.knobs.now != nil { + return o.knobs.now() + } + return timeutil.Now() } // gcProfiles removes least score profile matching the specified prefix when the @@ -148,7 +126,8 @@ func NewHeapProfiler(dir string, systemMemoryBytes int64) (*HeapProfiler, error) // the profiles indicates score such that sorting the filenames corresponds to // ordering the profiles from least to max score. // Latest profile in the directory is not considered for GC. -func gcProfiles(ctx context.Context, dir, prefix string, maxCount int64) { +func (o *HeapProfiler) gcProfiles(ctx context.Context, dir, prefix string) { + maxCount := maxProfiles.Get(&o.st.SV) files, err := ioutil.ReadDir(dir) if err != nil { log.Warning(ctx, err) @@ -184,8 +163,7 @@ func gcProfiles(ctx context.Context, dir, prefix string, maxCount int64) { } } -func takeHeapProfile(ctx context.Context, dir string, prefix string, suffix string) { - path := filepath.Join(dir, prefix+suffix) +func takeHeapProfile(ctx context.Context, path string) { // Try writing a go heap profile. f, err := os.Create(path) if err != nil { diff --git a/pkg/server/heapprofiler/heapprofiler_test.go b/pkg/server/heapprofiler/heapprofiler_test.go index 6bdbe9e14b41..b6677d3c2abd 100644 --- a/pkg/server/heapprofiler/heapprofiler_test.go +++ b/pkg/server/heapprofiler/heapprofiler_test.go @@ -16,84 +16,54 @@ package heapprofiler import ( "context" - "strconv" - "strings" + "runtime" "testing" "time" "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/stretchr/testify/assert" ) -type rssVal struct { - secs time.Duration // secs is the time at which this rss value was emitted - rss int64 -} +func TestHeapProfiler(t *testing.T) { + ctx := context.Background() + type test struct { + secs int // The measurement's timestamp. + heapBytes uint64 -func testHelper( - t *testing.T, - hp *HeapProfiler, - st *cluster.Settings, - rssValues []rssVal, - expectedScores []int64, - expectedPrefixes []string, -) { - baseTime := time.Time{} - numProfiles := 0 - mockHeapProfile := func( - ctx context.Context, dir string, prefix string, suffix string, - ) { - assert.Equal(t, prefix, expectedPrefixes[numProfiles]) - score, err := strconv.ParseInt(strings.Split(suffix, "_")[0], 10, 64) - assert.Nil(t, err) - assert.Equal(t, expectedScores[numProfiles], score) - numProfiles++ + expProfile bool + } + tests := []test{ + {0, 30, true}, // we always take the first profile + {10, 40, true}, // new high-water mark + {20, 30, false}, // below high-water mark; no profile + {4000, 10, true}, // new hour; should trigger regardless of the usage } var currentTime time.Time - var timeLocker syncutil.RWMutex now := func() time.Time { - timeLocker.RLock() - defer timeLocker.RUnlock() return currentTime } - hp.takeHeapProfile = mockHeapProfile - hp.currentTime = now - // set a large negative time so that first profile is triggered correctly - // since we start time from 0 in test. - // Not needed in main code as time will never be 0. - hp.lastProfileTime = time.Time{}.Add(-1000 * time.Second) - ctx := context.TODO() - for _, r := range rssValues { - currentTime = baseTime.Add(time.Second * r.secs) - hp.MaybeTakeProfile(ctx, st, r.rss) + var tookProfile bool + hp, err := NewHeapProfiler("dummy_dir", cluster.MakeTestingClusterSettings()) + if err != nil { + t.Fatal(err) } - assert.Equal(t, numProfiles, len(expectedScores)) -} - -func TestPercentSystemMemoryHeuristic(t *testing.T) { - rssValues := []rssVal{ - {0, 30}, {20, 40}, // random small values - {30, 88}, // should trigger - {80, 89}, // should not trigger as less than 60s before last profile - {130, 10}, {140, 4}, // random small values - {150, 90}, // should trigger - {260, 92}, // should not trigger as continues above threshold - {290, 30}, // random small value - {380, 99}, // should trigger - {390, 30}, // random small value - {430, 91}, // should not trigger as less than 60s before last profile - {500, 95}, // should trigger + hp.knobs = testingKnobs{ + now: now, + dontWriteProfiles: true, + maybeTakeProfileHook: func(willTakeProfile bool) { + tookProfile = willTakeProfile + }, } - expectedScores := []int64{88, 90, 99, 95} - prefix := "memprof.fraction_system_memory." - expectedPrefixes := []string{prefix, prefix, prefix, prefix} - hp := &HeapProfiler{ - stats: &stats{systemMemoryBytes: 100}, - heuristics: []heuristic{fractionSystemMemoryHeuristic}, + + for i, r := range tests { + currentTime = (time.Time{}).Add(time.Second * time.Duration(r.secs)) + + // Initialize enough of ms for the purposes of the HeapProfiler. + var ms runtime.MemStats + ms.HeapAlloc = r.heapBytes + + hp.MaybeTakeProfile(ctx, ms) + assert.Equal(t, r.expProfile, tookProfile, i) } - st := &cluster.Settings{} - systemMemoryThresholdFraction.Override(&st.SV, .85) - testHelper(t, hp, st, rssValues, expectedScores, expectedPrefixes) } diff --git a/pkg/server/server.go b/pkg/server/server.go index e7caf66b1458..918621a26abb 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -26,6 +26,7 @@ import ( "net/http" "os" "path/filepath" + "runtime" "strings" "sync" "sync/atomic" @@ -1548,7 +1549,7 @@ func (s *Server) Start(ctx context.Context) error { s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt, s.cfg.AdvertiseAddr, s.cfg.HTTPAdvertiseAddr) // Begin recording runtime statistics. - s.startSampleEnvironment(DefaultMetricsSampleInterval) + s.startSampleEnvironment(ctx, DefaultMetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource( @@ -1904,68 +1905,92 @@ func (s *Server) Decommission(ctx context.Context, setTo bool, nodeIDs []roachpb return nil } -// startSampleEnvironment begins the heap profiler worker and a worker that -// periodically instructs the runtime stat sampler to sample the environment. -func (s *Server) startSampleEnvironment(frequency time.Duration) { +// startSampleEnvironment begins the heap profiler worker. +func (s *Server) startSampleEnvironment(ctx context.Context, frequency time.Duration) { // Immediately record summaries once on server startup. - ctx := s.AnnotateCtx(context.Background()) + ctx = s.AnnotateCtx(ctx) goroutineDumper, err := goroutinedumper.NewGoroutineDumper(s.cfg.GoroutineDumpDirName) if err != nil { log.Infof(ctx, "Could not start goroutine dumper worker due to: %s", err) } - var heapProfiler *heapprofiler.HeapProfiler - { - systemMemoryBytes, err := status.GetTotalMemory(ctx) + // We're not going to take heap profiles if running with in-memory stores. + // This helps some tests that can't write any files. + allStoresInMem := true + for _, storeSpec := range s.cfg.Stores.Specs { + if !storeSpec.InMemory { + allStoresInMem = false + break + } + } + + var heapProfiler *heapprofiler.HeapProfiler + if s.cfg.HeapProfileDirName != "" && !allStoresInMem { + if err := os.MkdirAll(s.cfg.HeapProfileDirName, 0755); err != nil { + log.Fatalf(ctx, "Could not create heap profiles dir: %s", err) + } + heapProfiler, err = heapprofiler.NewHeapProfiler( + s.cfg.HeapProfileDirName, s.ClusterSettings()) if err != nil { - log.Warningf(ctx, "Could not compute system memory due to: %s", err) - } else { - heapProfiler, err = heapprofiler.NewHeapProfiler(s.cfg.HeapProfileDirName, systemMemoryBytes) - if err != nil { - log.Infof(ctx, "Could not start heap profiler worker due to: %s", err) - } + log.Fatalf(ctx, "Could not start heap profiler worker due to: %s", err) } } - // We run two separate sampling loops, one for memory stats (via - // ReadMemStats) and one for all other runtime stats. This is necessary - // because as of go1.11, runtime.ReadMemStats() "stops the world" and - // requires waiting for any current GC run to finish. With a large heap, a - // single GC run may take longer than the default sampling period (10s). s.stopper.RunWorker(ctx, func(ctx context.Context) { + var goMemStats atomic.Value // *status.GoMemStats + goMemStats.Store(&status.GoMemStats{}) + var collectingMemStats int32 // atomic, 1 when stats call is ongoing + timer := timeutil.NewTimer() defer timer.Stop() timer.Reset(frequency) + for { select { - case <-timer.C: - timer.Read = true - s.runtime.SampleMemStats(ctx) - timer.Reset(frequency) case <-s.stopper.ShouldStop(): return - } - } - }) - - s.stopper.RunWorker(ctx, func(ctx context.Context) { - timer := timeutil.NewTimer() - defer timer.Stop() - timer.Reset(frequency) - for { - select { case <-timer.C: timer.Read = true - s.runtime.SampleEnvironment(ctx) + timer.Reset(frequency) + + // We read the heap stats on another goroutine and give up after 1s. + // This is necessary because as of Go 1.12, runtime.ReadMemStats() + // "stops the world" and that requires first waiting for any current GC + // run to finish. With a large heap and under extreme conditions, a + // single GC run may take longer than the default sampling period of + // 10s. Under normal operations and with more recent versions of Go, + // this hasn't been observed to be a problem. + statsCollected := make(chan struct{}) + if atomic.CompareAndSwapInt32(&collectingMemStats, 0, 1) { + if err := s.stopper.RunAsyncTask(ctx, "get-mem-stats", func(ctx context.Context) { + var ms status.GoMemStats + runtime.ReadMemStats(&ms.MemStats) + ms.Collected = timeutil.Now() + log.VEventf(ctx, 2, "memstats: %+v", ms) + + goMemStats.Store(&ms) + atomic.StoreInt32(&collectingMemStats, 0) + close(statsCollected) + }); err != nil { + close(statsCollected) + } + } + + select { + case <-statsCollected: + // Good; we managed to read the Go memory stats quickly enough. + case <-time.After(time.Second): + } + + curStats := goMemStats.Load().(*status.GoMemStats) + s.runtime.SampleEnvironment(ctx, *curStats) if goroutineDumper != nil { goroutineDumper.MaybeDump(ctx, s.ClusterSettings(), s.runtime.Goroutines.Value()) } if heapProfiler != nil { - heapProfiler.MaybeTakeProfile(ctx, s.ClusterSettings(), s.runtime.RSSBytes.Value()) + heapProfiler.MaybeTakeProfile(ctx, curStats.MemStats) } - timer.Reset(frequency) - case <-s.stopper.ShouldStop(): - return + } } }) diff --git a/pkg/server/status.go b/pkg/server/status.go index 5dc1863715ce..6cfec7194ec5 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -95,10 +95,6 @@ const ( // permitted in responses. omittedKeyStr = "omitted (due to the 'server.remote_debugging.mode' setting)" - // heapDir is the directory name where the heap profiler stores profiles - // when there is a potential OOM situation. - heapDir = "heap_profiler" - // goroutineDir is the directory name where the goroutinedumper stores // goroutine dumps. goroutinesDir = "goroutine_dump" @@ -681,7 +677,7 @@ func (s *statusServer) GetFiles( //TODO(ridwanmsharif): Serve logfiles so debug-zip can fetch them // intead of reading indididual entries. case serverpb.FileType_HEAP: // Requesting for saved Heap Profiles. - dir = filepath.Join(s.admin.server.cfg.HeapProfileDirName, heapDir) + dir = filepath.Join(s.admin.server.cfg.HeapProfileDirName, base.HeapProfileDir) case serverpb.FileType_GOROUTINES: // Requesting for saved Goroutine dumps. dir = filepath.Join(s.admin.server.cfg.GoroutineDumpDirName, goroutinesDir) default: diff --git a/pkg/server/status/runtime.go b/pkg/server/status/runtime.go index 35cf2d95e226..ffab35d6ef2a 100644 --- a/pkg/server/status/runtime.go +++ b/pkg/server/status/runtime.go @@ -19,15 +19,14 @@ import ( "os" "runtime" "runtime/debug" - "sync/atomic" "time" - "unsafe" "github.com/cockroachdb/cockroach/pkg/build" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" - humanize "github.com/dustin/go-humanize" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/dustin/go-humanize" "github.com/elastic/gosigar" "github.com/shirou/gopsutil/net" ) @@ -226,12 +225,6 @@ var ( } ) -type memStats struct { - goAllocated uint64 - goIdle uint64 - goTotal uint64 -} - // getCgoMemStats is a function that fetches stats for the C++ portion of the code. // We will not necessarily have implementations for all builds, so check for nil first. // Returns the following: @@ -261,9 +254,6 @@ type RuntimeStatSampler struct { net net.IOCountersStat } - // Memory stats that are updated atomically by SampleMemStats. - memStats unsafe.Pointer - initialDiskCounters diskStats initialNetCounters net.IOCountersStat @@ -387,6 +377,14 @@ func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSa return rsr } +// GoMemStats groups a runtime.MemStats structure with the timestamp when it +// was collected. +type GoMemStats struct { + runtime.MemStats + // Collected is the timestamp at which these values were collected. + Collected time.Time +} + // SampleEnvironment queries the runtime system for various interesting metrics, // storing the resulting values in the set of metric gauges maintained by // RuntimeStatSampler. This makes runtime statistics more convenient for @@ -394,7 +392,10 @@ func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSa // // This method should be called periodically by a higher level system in order // to keep runtime statistics current. -func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { +// +// SampleEnvironment takes GoMemStats as input because that is collected +// separately, on a different schedule. +func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, ms GoMemStats) { // Note that debug.ReadGCStats() does not suffer the same problem as // runtime.ReadMemStats(). The only way you can know that is by reading the // source. @@ -490,18 +491,20 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { } } - ms := (*memStats)(atomic.LoadPointer(&rsr.memStats)) - if ms == nil { - ms = &memStats{} - } - // Log summary of statistics to console. cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur - log.Infof(ctx, "runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total, "+ + goMemStatsStale := timeutil.Now().Sub(ms.Collected) > time.Second + var staleMsg = "" + if goMemStatsStale { + staleMsg = "(stale)" + } + goTotal := ms.Sys - ms.HeapReleased + log.Infof(ctx, "runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total%s, "+ "%s/%s CGO alloc/total, %.1f CGO/sec, %.1f/%.1f %%(u/s)time, %.1f %%gc (%dx), "+ "%s/%s (r/w)net", humanize.IBytes(mem.Resident), numGoroutine, - humanize.IBytes(ms.goAllocated), humanize.IBytes(ms.goIdle), humanize.IBytes(ms.goTotal), + humanize.IBytes(ms.HeapAlloc), humanize.IBytes(ms.HeapIdle), humanize.IBytes(goTotal), + staleMsg, humanize.IBytes(uint64(cgoAllocated)), humanize.IBytes(uint64(cgoTotal)), cgoRate, 100*uPerc, 100*sPerc, 100*gcPausePercent, gc.NumGC-rsr.last.gcCount, humanize.IBytes(deltaNet.BytesRecv), humanize.IBytes(deltaNet.BytesSent), @@ -509,6 +512,8 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { rsr.last.cgoCall = numCgoCall rsr.last.gcCount = gc.NumGC + rsr.GoAllocBytes.Update(int64(ms.HeapAlloc)) + rsr.GoTotalBytes.Update(int64(goTotal)) rsr.CgoCalls.Update(numCgoCall) rsr.Goroutines.Update(int64(numGoroutine)) rsr.CgoAllocBytes.Update(int64(cgoAllocated)) @@ -527,44 +532,6 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9) } -// SampleMemStats queries the runtime system for memory metrics, updating the -// memory metric gauges and making these metrics available for logging by -// SampleEnvironment. -// -// This method should be called periodically by a higher level system in order -// to keep runtime statistics current. It is distinct from SampleEnvironment -// due to a limitation in the Go runtime which causes runtime.ReadMemStats() to -// block waiting for a GC cycle to finish which can take upwards of 10 seconds -// on a large heap. -func (rsr *RuntimeStatSampler) SampleMemStats(ctx context.Context) { - // Record memory and call stats from the runtime package. It might be useful - // to call ReadMemStats() more often, but it stops the world while collecting - // stats so shouldn't be called too often. For a similar reason, we want this - // call to come first because ReadMemStats() needs to wait for an existing GC - // to finish which can take multiple seconds on large heaps. - // - // NOTE: the MemStats fields do not get decremented when memory is released, - // to get accurate numbers, be sure to subtract. eg: ms.Sys - ms.HeapReleased - // for current memory reserved. - ms := &runtime.MemStats{} - runtime.ReadMemStats(ms) - - goAllocated := ms.Alloc - goTotal := ms.Sys - ms.HeapReleased - atomic.StorePointer(&rsr.memStats, unsafe.Pointer(&memStats{ - goAllocated: goAllocated, - goIdle: ms.HeapIdle - ms.HeapReleased, - goTotal: goTotal, - })) - - rsr.GoAllocBytes.Update(int64(goAllocated)) - rsr.GoTotalBytes.Update(int64(goTotal)) - - if log.V(2) { - log.Infof(ctx, "memstats: %+v", ms) - } -} - // GetCPUCombinedPercentNorm is part of the distsqlrun.RuntimeStats interface. func (rsr *RuntimeStatSampler) GetCPUCombinedPercentNorm() float64 { return rsr.CPUCombinedPercentNorm.Value() diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 786c5bfc29d0..c48ad6173415 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -20,6 +20,7 @@ import ( "fmt" "io/ioutil" "math" + "os" "path/filepath" "reflect" "regexp" @@ -288,7 +289,11 @@ func TestStatusGetFiles(t *testing.T) { t.Run("heap", func(t *testing.T) { const testFilesNo = 3 for i := 0; i < testFilesNo; i++ { - testHeapFile := filepath.Join(storeSpec.Path, "logs", heapDir, fmt.Sprintf("heap%d.pprof", i)) + testHeapDir := filepath.Join(storeSpec.Path, "logs", base.HeapProfileDir) + testHeapFile := filepath.Join(testHeapDir, fmt.Sprintf("heap%d.pprof", i)) + if err := os.MkdirAll(testHeapDir, os.ModePerm); err != nil { + t.Fatal(err) + } if err := ioutil.WriteFile(testHeapFile, []byte(fmt.Sprintf("I'm heap file %d", i)), 0644); err != nil { t.Fatal(err) } diff --git a/pkg/settings/registry.go b/pkg/settings/registry.go index 4beadedd9452..b7d82249c27a 100644 --- a/pkg/settings/registry.go +++ b/pkg/settings/registry.go @@ -44,8 +44,9 @@ var retiredSettings = map[string]struct{}{ // removed as of 2.2. "kv.raft_log.synchronize": {}, // removed as of 19.2. - "schemachanger.bulk_index_backfill.enabled": {}, - "rocksdb.ingest_backpressure.delay_l0_file": {}, // never used + "schemachanger.bulk_index_backfill.enabled": {}, + "rocksdb.ingest_backpressure.delay_l0_file": {}, // never used + "server.heap_profile.system_memory_threshold_fraction": {}, } // Register adds a setting to the registry.