diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index b39b874bcaac..69bbf27633fd 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -76,7 +76,6 @@ server.goroutine_dump.num_goroutines_thresholdinteger1000a threshold beyond which if number of goroutines increases, then goroutine dump can be triggered server.goroutine_dump.total_dump_size_limitbyte size500 MiBtotal size of goroutine dumps to be kept. Dumps are GC'ed in the order of creation time. The latest dump is always kept even if its size exceeds the limit. server.heap_profile.max_profilesinteger5maximum number of profiles to be kept. Profiles with lower score are GC'ed, but latest profile is always kept. -server.heap_profile.system_memory_threshold_fractionfloat0.85fraction of system memory beyond which if Rss increases, then heap profile is triggered server.host_based_authentication.configurationstringhost-based authentication configuration to use during connection authentication server.rangelog.ttlduration720h0m0sif nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours. server.remote_debugging.modestringlocalset to enable remote debugging, localhost-only or disable (any, local, off) diff --git a/pkg/base/constants.go b/pkg/base/constants.go index 4aeb2845f6d2..4a0db40a6fba 100644 --- a/pkg/base/constants.go +++ b/pkg/base/constants.go @@ -37,4 +37,8 @@ const ( // ChunkRaftCommandThresholdBytes is the threshold in bytes at which // to chunk or otherwise limit commands being sent to Raft. ChunkRaftCommandThresholdBytes = 256 * 1000 + + // HeapProfileDir is the directory name where the heap profiler stores profiles + // when there is a potential OOM situation. + HeapProfileDir = "heap_profiler" ) diff --git a/pkg/cli/start.go b/pkg/cli/start.go index c359ea49f94d..ee7c9d35addb 100644 --- a/pkg/cli/start.go +++ b/pkg/cli/start.go @@ -448,7 +448,9 @@ func runStart(cmd *cobra.Command, args []string) error { } serverCfg.GoroutineDumpDirName = logOutputDirectory() - serverCfg.HeapProfileDirName = logOutputDirectory() + + heapProfileDir := filepath.Join(logOutputDirectory(), base.HeapProfileDir) + serverCfg.HeapProfileDirName = heapProfileDir // We don't care about GRPCs fairly verbose logs in most client commands, // but when actually starting a server, we enable them. grpcutil.SetSeverity(log.Severity_WARNING) diff --git a/pkg/server/config.go b/pkg/server/config.go index 6315614b62c4..c2fa1396317c 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -179,8 +179,9 @@ type Config struct { GoroutineDumpDirName string // HeapProfileDirName is the directory name for heap profiles using - // heapprofiler. + // heapprofiler. If empty, no heap profiles will be collected. HeapProfileDirName string + // Parsed values. // NodeAttributes is the parsed representation of Attrs. diff --git a/pkg/server/heapprofiler/heapprofiler.go b/pkg/server/heapprofiler/heapprofiler.go index a579c9ab3acb..0dfc33dd9254 100644 --- a/pkg/server/heapprofiler/heapprofiler.go +++ b/pkg/server/heapprofiler/heapprofiler.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "os" "path/filepath" + "runtime" "runtime/pprof" "strings" "time" @@ -31,15 +32,11 @@ import ( "github.com/pkg/errors" ) -const minProfileInterval = time.Minute +// resetHighWaterMarkInterval specifies how often the high-water mark value will +// be reset. Immediately after it is reset, a new profile will be taken. +const resetHighWaterMarkInterval = time.Hour var ( - systemMemoryThresholdFraction = settings.RegisterFloatSetting( - "server.heap_profile.system_memory_threshold_fraction", - "fraction of system memory beyond which if Rss increases, "+ - "then heap profile is triggered", - .85, - ) maxProfiles = settings.RegisterIntSetting( "server.heap_profile.max_profiles", "maximum number of profiles to be kept. "+ @@ -48,99 +45,80 @@ var ( ) ) -type stats struct { - rss int64 - systemMemory int64 - lastProfileTime time.Time - aboveSysMemThresholdSinceLastProfile bool - currentTime func() time.Time +type testingKnobs struct { + dontWriteProfiles bool + maybeTakeProfileHook func(willTakeProfile bool) + now func() time.Time } -type heuristic struct { - name string - isTrue func(s *stats, st *cluster.Settings) (score int64, isTrue bool) -} +// HeapProfiler is used to take heap profiles. +// +// MaybeTakeProfile() is supposed to be called periodically. A profile is taken +// every time Go heap allocated bytes exceeds the previous high-water mark. The +// recorded high-water mark is also reset periodically, so that we take some +// profiles periodically. +// Profiles are also GCed periodically. The latest is always kept, and a couple +// of the ones with the largest heap are also kept. +type HeapProfiler struct { + dir string + st *cluster.Settings + // lastProfileTime marks the time when we took the last profile. + lastProfileTime time.Time + // highwaterMarkBytes represents the maximum heap size that we've seen since + // resetting the filed (which happens periodically). + highwaterMarkBytes uint64 -// fractionSystemMemoryHeuristic is true if latest Rss is more than -// systemMemoryThresholdFraction of system memory. No new profile is -// taken if Rss has been above threshold since the last time profile was taken, -// but a new profile will be triggered if Rss has dipped below threshold since -// the last profile. score is the latest value of Rss. -// At max one profile will be taken in minProfileInterval. -var fractionSystemMemoryHeuristic = heuristic{ - name: "fraction_system_memory", - isTrue: func(s *stats, st *cluster.Settings) (score int64, isTrue bool) { - currentValue := s.rss - if float64(currentValue)/float64(s.systemMemory) > systemMemoryThresholdFraction.Get(&st.SV) { - if s.currentTime().Sub(s.lastProfileTime) < minProfileInterval || - s.aboveSysMemThresholdSinceLastProfile { - return 0, false - } - s.aboveSysMemThresholdSinceLastProfile = true - return currentValue, true - } - s.aboveSysMemThresholdSinceLastProfile = false - return 0, false - }, + knobs testingKnobs } -// HeapProfiler is used to take heap profiles if an OOM situation is -// detected. It stores relevant functions and stats for heuristics to use. -type HeapProfiler struct { - *stats - heuristics []heuristic - takeHeapProfile func(ctx context.Context, dir string, prefix string, suffix string) - gcProfiles func(ctx context.Context, dir, prefix string, maxCount int64) - dir string +// NewHeapProfiler creates a HeapProfiler. dir is the directory in which +// profiles are to be stored. +func NewHeapProfiler(dir string, st *cluster.Settings) (*HeapProfiler, error) { + if dir == "" { + return nil, errors.Errorf("need to specify dir for NewHeapProfiler") + } + hp := &HeapProfiler{ + dir: dir, + st: st, + } + return hp, nil } -const memprof = "memprof." - -// MaybeTakeProfile takes a heap profile if an OOM situation is detected using -// heuristics enabled in o. At max one profile is taken in a call of this -// function. This function is also responsible for updating stats in o. -func (o *HeapProfiler) MaybeTakeProfile(ctx context.Context, st *cluster.Settings, rssValue int64) { - o.rss = rssValue - profileTaken := false - for _, h := range o.heuristics { - if score, isTrue := h.isTrue(o.stats, st); isTrue { - if !profileTaken { - prefix := memprof + h.name + "." - const format = "2006-01-02T15_04_05.999" - suffix := fmt.Sprintf("%018d_%s", score, o.currentTime().Format(format)) - o.takeHeapProfile(ctx, o.dir, prefix, suffix) - o.lastProfileTime = o.currentTime() - profileTaken = true - if o.gcProfiles != nil { - o.gcProfiles(ctx, o.dir, memprof, maxProfiles.Get(&st.SV)) - } - } - } +// MaybeTakeProfile takes a heap profile if the heap is big enough. +func (o *HeapProfiler) MaybeTakeProfile(ctx context.Context, ms runtime.MemStats) { + // If it's been too long since we took a profile, make sure we'll take one now. + if o.now().Sub(o.lastProfileTime) > resetHighWaterMarkInterval { + o.highwaterMarkBytes = 0 } -} -// NewHeapProfiler returns a HeapProfiler which has -// systemMemoryThresholdFraction heuristic enabled. dir is the directory in -// which profiles are stored. -func NewHeapProfiler(dir string, systemMemory int64) (*HeapProfiler, error) { - if dir == "" { - return nil, errors.New("directory to store profiles could not be determined") + curHeap := ms.HeapAlloc + takeProfile := curHeap > o.highwaterMarkBytes + if hook := o.knobs.maybeTakeProfileHook; hook != nil { + hook(takeProfile) } - dir = filepath.Join(dir, "heap_profiler") - if err := os.MkdirAll(dir, 0755); err != nil { - return nil, err + if !takeProfile { + return } - hp := &HeapProfiler{ - stats: &stats{ - systemMemory: systemMemory, - currentTime: timeutil.Now, - }, - heuristics: []heuristic{fractionSystemMemoryHeuristic}, - takeHeapProfile: takeHeapProfile, - gcProfiles: gcProfiles, - dir: dir, + + o.highwaterMarkBytes = curHeap + o.lastProfileTime = o.now() + + if o.knobs.dontWriteProfiles { + return } - return hp, nil + const format = "2006-01-02T15_04_05.999" + filePrefix := "memprof." + fileName := fmt.Sprintf("%s%018d_%s", filePrefix, curHeap, o.now().Format(format)) + path := filepath.Join(o.dir, fileName) + takeHeapProfile(ctx, path) + o.gcProfiles(ctx, o.dir, filePrefix) +} + +func (o *HeapProfiler) now() time.Time { + if o.knobs.now != nil { + return o.knobs.now() + } + return timeutil.Now() } // gcProfiles removes least score profile matching the specified prefix when the @@ -148,7 +126,8 @@ func NewHeapProfiler(dir string, systemMemory int64) (*HeapProfiler, error) { // the profiles indicates score such that sorting the filenames corresponds to // ordering the profiles from least to max score. // Latest profile in the directory is not considered for GC. -func gcProfiles(ctx context.Context, dir, prefix string, maxCount int64) { +func (o *HeapProfiler) gcProfiles(ctx context.Context, dir, prefix string) { + maxCount := maxProfiles.Get(&o.st.SV) files, err := ioutil.ReadDir(dir) if err != nil { log.Warning(ctx, err) @@ -184,8 +163,7 @@ func gcProfiles(ctx context.Context, dir, prefix string, maxCount int64) { } } -func takeHeapProfile(ctx context.Context, dir string, prefix string, suffix string) { - path := filepath.Join(dir, prefix+suffix) +func takeHeapProfile(ctx context.Context, path string) { // Try writing a go heap profile. f, err := os.Create(path) if err != nil { diff --git a/pkg/server/heapprofiler/heapprofiler_test.go b/pkg/server/heapprofiler/heapprofiler_test.go index 9c58a8235e03..b6677d3c2abd 100644 --- a/pkg/server/heapprofiler/heapprofiler_test.go +++ b/pkg/server/heapprofiler/heapprofiler_test.go @@ -16,84 +16,54 @@ package heapprofiler import ( "context" - "strconv" - "strings" + "runtime" "testing" "time" "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/stretchr/testify/assert" ) -type rssVal struct { - secs time.Duration // secs is the time at which this rss value was emitted - rss int64 -} +func TestHeapProfiler(t *testing.T) { + ctx := context.Background() + type test struct { + secs int // The measurement's timestamp. + heapBytes uint64 -func testHelper( - t *testing.T, - hp *HeapProfiler, - st *cluster.Settings, - rssValues []rssVal, - expectedScores []int64, - expectedPrefixes []string, -) { - baseTime := time.Time{} - numProfiles := 0 - mockHeapProfile := func( - ctx context.Context, dir string, prefix string, suffix string, - ) { - assert.Equal(t, prefix, expectedPrefixes[numProfiles]) - score, err := strconv.ParseInt(strings.Split(suffix, "_")[0], 10, 64) - assert.Nil(t, err) - assert.Equal(t, expectedScores[numProfiles], score) - numProfiles++ + expProfile bool + } + tests := []test{ + {0, 30, true}, // we always take the first profile + {10, 40, true}, // new high-water mark + {20, 30, false}, // below high-water mark; no profile + {4000, 10, true}, // new hour; should trigger regardless of the usage } var currentTime time.Time - var timeLocker syncutil.RWMutex now := func() time.Time { - timeLocker.RLock() - defer timeLocker.RUnlock() return currentTime } - hp.takeHeapProfile = mockHeapProfile - hp.currentTime = now - // set a large negative time so that first profile is triggered correctly - // since we start time from 0 in test. - // Not needed in main code as time will never be 0. - hp.lastProfileTime = time.Time{}.Add(-1000 * time.Second) - ctx := context.TODO() - for _, r := range rssValues { - currentTime = baseTime.Add(time.Second * r.secs) - hp.MaybeTakeProfile(ctx, st, r.rss) + var tookProfile bool + hp, err := NewHeapProfiler("dummy_dir", cluster.MakeTestingClusterSettings()) + if err != nil { + t.Fatal(err) } - assert.Equal(t, numProfiles, len(expectedScores)) -} - -func TestPercentSystemMemoryHeuristic(t *testing.T) { - rssValues := []rssVal{ - {0, 30}, {20, 40}, // random small values - {30, 88}, // should trigger - {80, 89}, // should not trigger as less than 60s before last profile - {130, 10}, {140, 4}, // random small values - {150, 90}, // should trigger - {260, 92}, // should not trigger as continues above threshold - {290, 30}, // random small value - {380, 99}, // should trigger - {390, 30}, // random small value - {430, 91}, // should not trigger as less than 60s before last profile - {500, 95}, // should trigger + hp.knobs = testingKnobs{ + now: now, + dontWriteProfiles: true, + maybeTakeProfileHook: func(willTakeProfile bool) { + tookProfile = willTakeProfile + }, } - expectedScores := []int64{88, 90, 99, 95} - prefix := "memprof.fraction_system_memory." - expectedPrefixes := []string{prefix, prefix, prefix, prefix} - hp := &HeapProfiler{ - stats: &stats{systemMemory: 100}, - heuristics: []heuristic{fractionSystemMemoryHeuristic}, + + for i, r := range tests { + currentTime = (time.Time{}).Add(time.Second * time.Duration(r.secs)) + + // Initialize enough of ms for the purposes of the HeapProfiler. + var ms runtime.MemStats + ms.HeapAlloc = r.heapBytes + + hp.MaybeTakeProfile(ctx, ms) + assert.Equal(t, r.expProfile, tookProfile, i) } - st := &cluster.Settings{} - systemMemoryThresholdFraction.Override(&st.SV, .85) - testHelper(t, hp, st, rssValues, expectedScores, expectedPrefixes) } diff --git a/pkg/server/server.go b/pkg/server/server.go index ef85569e0170..918621a26abb 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -26,6 +26,7 @@ import ( "net/http" "os" "path/filepath" + "runtime" "strings" "sync" "sync/atomic" @@ -1548,7 +1549,7 @@ func (s *Server) Start(ctx context.Context) error { s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt, s.cfg.AdvertiseAddr, s.cfg.HTTPAdvertiseAddr) // Begin recording runtime statistics. - s.startSampleEnvironment(DefaultMetricsSampleInterval) + s.startSampleEnvironment(ctx, DefaultMetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource( @@ -1904,68 +1905,92 @@ func (s *Server) Decommission(ctx context.Context, setTo bool, nodeIDs []roachpb return nil } -// startSampleEnvironment begins the heap profiler worker and a worker that -// periodically instructs the runtime stat sampler to sample the environment. -func (s *Server) startSampleEnvironment(frequency time.Duration) { +// startSampleEnvironment begins the heap profiler worker. +func (s *Server) startSampleEnvironment(ctx context.Context, frequency time.Duration) { // Immediately record summaries once on server startup. - ctx := s.AnnotateCtx(context.Background()) + ctx = s.AnnotateCtx(ctx) goroutineDumper, err := goroutinedumper.NewGoroutineDumper(s.cfg.GoroutineDumpDirName) if err != nil { log.Infof(ctx, "Could not start goroutine dumper worker due to: %s", err) } - var heapProfiler *heapprofiler.HeapProfiler - { - systemMemory, err := status.GetTotalMemory(ctx) + // We're not going to take heap profiles if running with in-memory stores. + // This helps some tests that can't write any files. + allStoresInMem := true + for _, storeSpec := range s.cfg.Stores.Specs { + if !storeSpec.InMemory { + allStoresInMem = false + break + } + } + + var heapProfiler *heapprofiler.HeapProfiler + if s.cfg.HeapProfileDirName != "" && !allStoresInMem { + if err := os.MkdirAll(s.cfg.HeapProfileDirName, 0755); err != nil { + log.Fatalf(ctx, "Could not create heap profiles dir: %s", err) + } + heapProfiler, err = heapprofiler.NewHeapProfiler( + s.cfg.HeapProfileDirName, s.ClusterSettings()) if err != nil { - log.Warningf(ctx, "Could not compute system memory due to: %s", err) - } else { - heapProfiler, err = heapprofiler.NewHeapProfiler(s.cfg.HeapProfileDirName, systemMemory) - if err != nil { - log.Infof(ctx, "Could not start heap profiler worker due to: %s", err) - } + log.Fatalf(ctx, "Could not start heap profiler worker due to: %s", err) } } - // We run two separate sampling loops, one for memory stats (via - // ReadMemStats) and one for all other runtime stats. This is necessary - // because as of go1.11, runtime.ReadMemStats() "stops the world" and - // requires waiting for any current GC run to finish. With a large heap, a - // single GC run may take longer than the default sampling period (10s). s.stopper.RunWorker(ctx, func(ctx context.Context) { + var goMemStats atomic.Value // *status.GoMemStats + goMemStats.Store(&status.GoMemStats{}) + var collectingMemStats int32 // atomic, 1 when stats call is ongoing + timer := timeutil.NewTimer() defer timer.Stop() timer.Reset(frequency) + for { select { - case <-timer.C: - timer.Read = true - s.runtime.SampleMemStats(ctx) - timer.Reset(frequency) case <-s.stopper.ShouldStop(): return - } - } - }) - - s.stopper.RunWorker(ctx, func(ctx context.Context) { - timer := timeutil.NewTimer() - defer timer.Stop() - timer.Reset(frequency) - for { - select { case <-timer.C: timer.Read = true - s.runtime.SampleEnvironment(ctx) + timer.Reset(frequency) + + // We read the heap stats on another goroutine and give up after 1s. + // This is necessary because as of Go 1.12, runtime.ReadMemStats() + // "stops the world" and that requires first waiting for any current GC + // run to finish. With a large heap and under extreme conditions, a + // single GC run may take longer than the default sampling period of + // 10s. Under normal operations and with more recent versions of Go, + // this hasn't been observed to be a problem. + statsCollected := make(chan struct{}) + if atomic.CompareAndSwapInt32(&collectingMemStats, 0, 1) { + if err := s.stopper.RunAsyncTask(ctx, "get-mem-stats", func(ctx context.Context) { + var ms status.GoMemStats + runtime.ReadMemStats(&ms.MemStats) + ms.Collected = timeutil.Now() + log.VEventf(ctx, 2, "memstats: %+v", ms) + + goMemStats.Store(&ms) + atomic.StoreInt32(&collectingMemStats, 0) + close(statsCollected) + }); err != nil { + close(statsCollected) + } + } + + select { + case <-statsCollected: + // Good; we managed to read the Go memory stats quickly enough. + case <-time.After(time.Second): + } + + curStats := goMemStats.Load().(*status.GoMemStats) + s.runtime.SampleEnvironment(ctx, *curStats) if goroutineDumper != nil { goroutineDumper.MaybeDump(ctx, s.ClusterSettings(), s.runtime.Goroutines.Value()) } if heapProfiler != nil { - heapProfiler.MaybeTakeProfile(ctx, s.ClusterSettings(), s.runtime.Rss.Value()) + heapProfiler.MaybeTakeProfile(ctx, curStats.MemStats) } - timer.Reset(frequency) - case <-s.stopper.ShouldStop(): - return + } } }) diff --git a/pkg/server/status.go b/pkg/server/status.go index 5dc1863715ce..6cfec7194ec5 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -95,10 +95,6 @@ const ( // permitted in responses. omittedKeyStr = "omitted (due to the 'server.remote_debugging.mode' setting)" - // heapDir is the directory name where the heap profiler stores profiles - // when there is a potential OOM situation. - heapDir = "heap_profiler" - // goroutineDir is the directory name where the goroutinedumper stores // goroutine dumps. goroutinesDir = "goroutine_dump" @@ -681,7 +677,7 @@ func (s *statusServer) GetFiles( //TODO(ridwanmsharif): Serve logfiles so debug-zip can fetch them // intead of reading indididual entries. case serverpb.FileType_HEAP: // Requesting for saved Heap Profiles. - dir = filepath.Join(s.admin.server.cfg.HeapProfileDirName, heapDir) + dir = filepath.Join(s.admin.server.cfg.HeapProfileDirName, base.HeapProfileDir) case serverpb.FileType_GOROUTINES: // Requesting for saved Goroutine dumps. dir = filepath.Join(s.admin.server.cfg.GoroutineDumpDirName, goroutinesDir) default: diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index 4c249cb4f634..f132c9db86e0 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -601,8 +601,8 @@ func (rr registryRecorder) record(dest *[]tspb.TimeSeriesData) { }) } -// GetTotalMemory returns either the total system memory or if possible the -// cgroups available memory. +// GetTotalMemory returns either the total system memory (in bytes) or if +// possible the cgroups available memory. func GetTotalMemory(ctx context.Context) (int64, error) { memory, warning, err := GetTotalMemoryWithoutLogging() if err != nil { diff --git a/pkg/server/status/runtime.go b/pkg/server/status/runtime.go index e1521d4be5e5..ffab35d6ef2a 100644 --- a/pkg/server/status/runtime.go +++ b/pkg/server/status/runtime.go @@ -19,15 +19,14 @@ import ( "os" "runtime" "runtime/debug" - "sync/atomic" "time" - "unsafe" "github.com/cockroachdb/cockroach/pkg/build" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" - humanize "github.com/dustin/go-humanize" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/dustin/go-humanize" "github.com/elastic/gosigar" "github.com/shirou/gopsutil/net" ) @@ -117,7 +116,7 @@ var ( Measurement: "CPU Time", Unit: metric.Unit_PERCENT, } - metaRSS = metric.Metadata{ + metaRSSBytes = metric.Metadata{ Name: "sys.rss", Help: "Current process RSS", Measurement: "RSS", @@ -226,12 +225,6 @@ var ( } ) -type memStats struct { - goAllocated uint64 - goIdle uint64 - goTotal uint64 -} - // getCgoMemStats is a function that fetches stats for the C++ portion of the code. // We will not necessarily have implementations for all builds, so check for nil first. // Returns the following: @@ -261,9 +254,6 @@ type RuntimeStatSampler struct { net net.IOCountersStat } - // Memory stats that are updated atomically by SampleMemStats. - memStats unsafe.Pointer - initialDiskCounters diskStats initialNetCounters net.IOCountersStat @@ -288,7 +278,7 @@ type RuntimeStatSampler struct { CPUSysPercent *metric.GaugeFloat64 CPUCombinedPercentNorm *metric.GaugeFloat64 // Memory stats. - Rss *metric.Gauge + RSSBytes *metric.Gauge // File descriptor stats. FDOpen *metric.Gauge FDSoftLimit *metric.Gauge @@ -363,7 +353,7 @@ func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSa CPUSysNS: metric.NewGauge(metaCPUSysNS), CPUSysPercent: metric.NewGaugeFloat64(metaCPUSysPercent), CPUCombinedPercentNorm: metric.NewGaugeFloat64(metaCPUCombinedPercentNorm), - Rss: metric.NewGauge(metaRSS), + RSSBytes: metric.NewGauge(metaRSSBytes), HostDiskReadBytes: metric.NewGauge(metaHostDiskReadBytes), HostDiskReadCount: metric.NewGauge(metaHostDiskReadCount), HostDiskReadTime: metric.NewGauge(metaHostDiskReadTime), @@ -387,6 +377,14 @@ func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSa return rsr } +// GoMemStats groups a runtime.MemStats structure with the timestamp when it +// was collected. +type GoMemStats struct { + runtime.MemStats + // Collected is the timestamp at which these values were collected. + Collected time.Time +} + // SampleEnvironment queries the runtime system for various interesting metrics, // storing the resulting values in the set of metric gauges maintained by // RuntimeStatSampler. This makes runtime statistics more convenient for @@ -394,7 +392,10 @@ func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSa // // This method should be called periodically by a higher level system in order // to keep runtime statistics current. -func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { +// +// SampleEnvironment takes GoMemStats as input because that is collected +// separately, on a different schedule. +func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, ms GoMemStats) { // Note that debug.ReadGCStats() does not suffer the same problem as // runtime.ReadMemStats(). The only way you can know that is by reading the // source. @@ -490,18 +491,20 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { } } - ms := (*memStats)(atomic.LoadPointer(&rsr.memStats)) - if ms == nil { - ms = &memStats{} - } - // Log summary of statistics to console. cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur - log.Infof(ctx, "runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total, "+ + goMemStatsStale := timeutil.Now().Sub(ms.Collected) > time.Second + var staleMsg = "" + if goMemStatsStale { + staleMsg = "(stale)" + } + goTotal := ms.Sys - ms.HeapReleased + log.Infof(ctx, "runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total%s, "+ "%s/%s CGO alloc/total, %.1f CGO/sec, %.1f/%.1f %%(u/s)time, %.1f %%gc (%dx), "+ "%s/%s (r/w)net", humanize.IBytes(mem.Resident), numGoroutine, - humanize.IBytes(ms.goAllocated), humanize.IBytes(ms.goIdle), humanize.IBytes(ms.goTotal), + humanize.IBytes(ms.HeapAlloc), humanize.IBytes(ms.HeapIdle), humanize.IBytes(goTotal), + staleMsg, humanize.IBytes(uint64(cgoAllocated)), humanize.IBytes(uint64(cgoTotal)), cgoRate, 100*uPerc, 100*sPerc, 100*gcPausePercent, gc.NumGC-rsr.last.gcCount, humanize.IBytes(deltaNet.BytesRecv), humanize.IBytes(deltaNet.BytesSent), @@ -509,6 +512,8 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { rsr.last.cgoCall = numCgoCall rsr.last.gcCount = gc.NumGC + rsr.GoAllocBytes.Update(int64(ms.HeapAlloc)) + rsr.GoTotalBytes.Update(int64(goTotal)) rsr.CgoCalls.Update(numCgoCall) rsr.Goroutines.Update(int64(numGoroutine)) rsr.CgoAllocBytes.Update(int64(cgoAllocated)) @@ -523,48 +528,10 @@ func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context) { rsr.CPUCombinedPercentNorm.Update(combinedNormalizedPerc) rsr.FDOpen.Update(int64(fds.Open)) rsr.FDSoftLimit.Update(int64(fds.SoftLimit)) - rsr.Rss.Update(int64(mem.Resident)) + rsr.RSSBytes.Update(int64(mem.Resident)) rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9) } -// SampleMemStats queries the runtime system for memory metrics, updating the -// memory metric gauges and making these metrics available for logging by -// SampleEnvironment. -// -// This method should be called periodically by a higher level system in order -// to keep runtime statistics current. It is distinct from SampleEnvironment -// due to a limitation in the Go runtime which causes runtime.ReadMemStats() to -// block waiting for a GC cycle to finish which can take upwards of 10 seconds -// on a large heap. -func (rsr *RuntimeStatSampler) SampleMemStats(ctx context.Context) { - // Record memory and call stats from the runtime package. It might be useful - // to call ReadMemStats() more often, but it stops the world while collecting - // stats so shouldn't be called too often. For a similar reason, we want this - // call to come first because ReadMemStats() needs to wait for an existing GC - // to finish which can take multiple seconds on large heaps. - // - // NOTE: the MemStats fields do not get decremented when memory is released, - // to get accurate numbers, be sure to subtract. eg: ms.Sys - ms.HeapReleased - // for current memory reserved. - ms := &runtime.MemStats{} - runtime.ReadMemStats(ms) - - goAllocated := ms.Alloc - goTotal := ms.Sys - ms.HeapReleased - atomic.StorePointer(&rsr.memStats, unsafe.Pointer(&memStats{ - goAllocated: goAllocated, - goIdle: ms.HeapIdle - ms.HeapReleased, - goTotal: goTotal, - })) - - rsr.GoAllocBytes.Update(int64(goAllocated)) - rsr.GoTotalBytes.Update(int64(goTotal)) - - if log.V(2) { - log.Infof(ctx, "memstats: %+v", ms) - } -} - // GetCPUCombinedPercentNorm is part of the distsqlrun.RuntimeStats interface. func (rsr *RuntimeStatSampler) GetCPUCombinedPercentNorm() float64 { return rsr.CPUCombinedPercentNorm.Value() diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 786c5bfc29d0..c48ad6173415 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -20,6 +20,7 @@ import ( "fmt" "io/ioutil" "math" + "os" "path/filepath" "reflect" "regexp" @@ -288,7 +289,11 @@ func TestStatusGetFiles(t *testing.T) { t.Run("heap", func(t *testing.T) { const testFilesNo = 3 for i := 0; i < testFilesNo; i++ { - testHeapFile := filepath.Join(storeSpec.Path, "logs", heapDir, fmt.Sprintf("heap%d.pprof", i)) + testHeapDir := filepath.Join(storeSpec.Path, "logs", base.HeapProfileDir) + testHeapFile := filepath.Join(testHeapDir, fmt.Sprintf("heap%d.pprof", i)) + if err := os.MkdirAll(testHeapDir, os.ModePerm); err != nil { + t.Fatal(err) + } if err := ioutil.WriteFile(testHeapFile, []byte(fmt.Sprintf("I'm heap file %d", i)), 0644); err != nil { t.Fatal(err) } diff --git a/pkg/settings/registry.go b/pkg/settings/registry.go index 4beadedd9452..b7d82249c27a 100644 --- a/pkg/settings/registry.go +++ b/pkg/settings/registry.go @@ -44,8 +44,9 @@ var retiredSettings = map[string]struct{}{ // removed as of 2.2. "kv.raft_log.synchronize": {}, // removed as of 19.2. - "schemachanger.bulk_index_backfill.enabled": {}, - "rocksdb.ingest_backpressure.delay_l0_file": {}, // never used + "schemachanger.bulk_index_backfill.enabled": {}, + "rocksdb.ingest_backpressure.delay_l0_file": {}, // never used + "server.heap_profile.system_memory_threshold_fraction": {}, } // Register adds a setting to the registry.