-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
runtime.go
674 lines (622 loc) · 23.9 KB
/
runtime.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
// Copyright 2015 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
package status
import (
"context"
"os"
"runtime"
"runtime/debug"
"time"
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
humanize "github.com/dustin/go-humanize"
"github.com/elastic/gosigar"
"github.com/shirou/gopsutil/net"
)
var (
metaCgoCalls = metric.Metadata{
Name: "sys.cgocalls",
Help: "Total number of cgo calls",
Measurement: "cgo Calls",
Unit: metric.Unit_COUNT,
}
metaGoroutines = metric.Metadata{
Name: "sys.goroutines",
Help: "Current number of goroutines",
Measurement: "goroutines",
Unit: metric.Unit_COUNT,
}
metaGoAllocBytes = metric.Metadata{
Name: "sys.go.allocbytes",
Help: "Current bytes of memory allocated by go",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaGoTotalBytes = metric.Metadata{
Name: "sys.go.totalbytes",
Help: "Total bytes of memory allocated by go, but not released",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaCgoAllocBytes = metric.Metadata{
Name: "sys.cgo.allocbytes",
Help: "Current bytes of memory allocated by cgo",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaCgoTotalBytes = metric.Metadata{
Name: "sys.cgo.totalbytes",
Help: "Total bytes of memory allocated by cgo, but not released",
Measurement: "Memory",
Unit: metric.Unit_BYTES,
}
metaGCCount = metric.Metadata{
Name: "sys.gc.count",
Help: "Total number of GC runs",
Measurement: "GC Runs",
Unit: metric.Unit_COUNT,
}
metaGCPauseNS = metric.Metadata{
Name: "sys.gc.pause.ns",
Help: "Total GC pause",
Measurement: "GC Pause",
Unit: metric.Unit_NANOSECONDS,
}
metaGCPausePercent = metric.Metadata{
Name: "sys.gc.pause.percent",
Help: "Current GC pause percentage",
Measurement: "GC Pause",
Unit: metric.Unit_PERCENT,
}
metaCPUUserNS = metric.Metadata{
Name: "sys.cpu.user.ns",
Help: "Total user cpu time",
Measurement: "CPU Time",
Unit: metric.Unit_NANOSECONDS,
}
metaCPUUserPercent = metric.Metadata{
Name: "sys.cpu.user.percent",
Help: "Current user cpu percentage",
Measurement: "CPU Time",
Unit: metric.Unit_PERCENT,
}
metaCPUSysNS = metric.Metadata{
Name: "sys.cpu.sys.ns",
Help: "Total system cpu time",
Measurement: "CPU Time",
Unit: metric.Unit_NANOSECONDS,
}
metaCPUSysPercent = metric.Metadata{
Name: "sys.cpu.sys.percent",
Help: "Current system cpu percentage",
Measurement: "CPU Time",
Unit: metric.Unit_PERCENT,
}
metaCPUCombinedPercentNorm = metric.Metadata{
Name: "sys.cpu.combined.percent-normalized",
Help: "Current user+system cpu percentage, normalized 0-1 by number of cores",
Measurement: "CPU Time",
Unit: metric.Unit_PERCENT,
}
metaRSSBytes = metric.Metadata{
Name: "sys.rss",
Help: "Current process RSS",
Measurement: "RSS",
Unit: metric.Unit_BYTES,
}
metaFDOpen = metric.Metadata{
Name: "sys.fd.open",
Help: "Process open file descriptors",
Measurement: "File Descriptors",
Unit: metric.Unit_COUNT,
}
metaFDSoftLimit = metric.Metadata{
Name: "sys.fd.softlimit",
Help: "Process open FD soft limit",
Measurement: "File Descriptors",
Unit: metric.Unit_COUNT,
}
metaUptime = metric.Metadata{
Name: "sys.uptime",
Help: "Process uptime",
Measurement: "Uptime",
Unit: metric.Unit_SECONDS,
}
// These disk and network stats are counters of the number of operations, packets, bytes, and
// cumulative time of the disk and net IO that has been done across the whole host *since this
// Cockroach process started up*. By taking the derivatives of these metrics, we can see the
// IO throughput.
metaHostDiskReadCount = metric.Metadata{
Name: "sys.host.disk.read.count",
Unit: metric.Unit_COUNT,
Measurement: "Operations",
Help: "Disk read operations across all disks since this process started",
}
metaHostDiskReadBytes = metric.Metadata{
Name: "sys.host.disk.read.bytes",
Unit: metric.Unit_BYTES,
Measurement: "Bytes",
Help: "Bytes read from all disks since this process started",
}
metaHostDiskReadTime = metric.Metadata{
Name: "sys.host.disk.read.time",
Unit: metric.Unit_NANOSECONDS,
Measurement: "Time",
Help: "Time spent reading from all disks since this process started",
}
metaHostDiskWriteCount = metric.Metadata{
Name: "sys.host.disk.write.count",
Unit: metric.Unit_COUNT,
Measurement: "Operations",
Help: "Disk write operations across all disks since this process started",
}
metaHostDiskWriteBytes = metric.Metadata{
Name: "sys.host.disk.write.bytes",
Unit: metric.Unit_BYTES,
Measurement: "Bytes",
Help: "Bytes written to all disks since this process started",
}
metaHostDiskWriteTime = metric.Metadata{
Name: "sys.host.disk.write.time",
Unit: metric.Unit_NANOSECONDS,
Measurement: "Time",
Help: "Time spent writing to all disks since this process started",
}
metaHostDiskIOTime = metric.Metadata{
Name: "sys.host.disk.io.time",
Unit: metric.Unit_NANOSECONDS,
Measurement: "Time",
Help: "Time spent reading from or writing to all disks since this process started",
}
metaHostDiskWeightedIOTime = metric.Metadata{
Name: "sys.host.disk.weightedio.time",
Unit: metric.Unit_NANOSECONDS,
Measurement: "Time",
Help: "Weighted time spent reading from or writing to to all disks since this process started",
}
metaHostIopsInProgress = metric.Metadata{
Name: "sys.host.disk.iopsinprogress",
Unit: metric.Unit_COUNT,
Measurement: "Operations",
Help: "IO operations currently in progress on this host",
}
metaHostNetRecvBytes = metric.Metadata{
Name: "sys.host.net.recv.bytes",
Unit: metric.Unit_BYTES,
Measurement: "Bytes",
Help: "Bytes received on all network interfaces since this process started",
}
metaHostNetRecvPackets = metric.Metadata{
Name: "sys.host.net.recv.packets",
Unit: metric.Unit_COUNT,
Measurement: "Packets",
Help: "Packets received on all network interfaces since this process started",
}
metaHostNetSendBytes = metric.Metadata{
Name: "sys.host.net.send.bytes",
Unit: metric.Unit_BYTES,
Measurement: "Bytes",
Help: "Bytes sent on all network interfaces since this process started",
}
metaHostNetSendPackets = metric.Metadata{
Name: "sys.host.net.send.packets",
Unit: metric.Unit_COUNT,
Measurement: "Packets",
Help: "Packets sent on all network interfaces since this process started",
}
)
// getCgoMemStats is a function that fetches stats for the C++ portion of the code.
// We will not necessarily have implementations for all builds, so check for nil first.
// Returns the following:
// allocated uint: bytes allocated by application
// total uint: total bytes requested from system
// error : any issues fetching stats. This should be a warning only.
var getCgoMemStats func(context.Context) (uint, uint, error)
// RuntimeStatSampler is used to periodically sample the runtime environment
// for useful statistics, performing some rudimentary calculations and storing
// the resulting information in a format that can be easily consumed by status
// logging systems.
type RuntimeStatSampler struct {
clock *hlc.Clock
startTimeNanos int64
// The last sampled values of some statistics are kept only to compute
// derivative statistics.
last struct {
now int64
utime int64
stime int64
cgoCall int64
gcCount int64
gcPauseTime uint64
disk diskStats
net net.IOCountersStat
}
initialDiskCounters diskStats
initialNetCounters net.IOCountersStat
// Only show "not implemented" errors once, we don't need the log spam.
fdUsageNotImplemented bool
// Metric gauges maintained by the sampler.
// Go runtime stats.
CgoCalls *metric.Gauge
Goroutines *metric.Gauge
GoAllocBytes *metric.Gauge
GoTotalBytes *metric.Gauge
CgoAllocBytes *metric.Gauge
CgoTotalBytes *metric.Gauge
GcCount *metric.Gauge
GcPauseNS *metric.Gauge
GcPausePercent *metric.GaugeFloat64
// CPU stats.
CPUUserNS *metric.Gauge
CPUUserPercent *metric.GaugeFloat64
CPUSysNS *metric.Gauge
CPUSysPercent *metric.GaugeFloat64
CPUCombinedPercentNorm *metric.GaugeFloat64
// Memory stats.
RSSBytes *metric.Gauge
// File descriptor stats.
FDOpen *metric.Gauge
FDSoftLimit *metric.Gauge
// Disk and network stats.
HostDiskReadBytes *metric.Gauge
HostDiskReadCount *metric.Gauge
HostDiskReadTime *metric.Gauge
HostDiskWriteBytes *metric.Gauge
HostDiskWriteCount *metric.Gauge
HostDiskWriteTime *metric.Gauge
HostDiskIOTime *metric.Gauge
HostDiskWeightedIOTime *metric.Gauge
IopsInProgress *metric.Gauge
HostNetRecvBytes *metric.Gauge
HostNetRecvPackets *metric.Gauge
HostNetSendBytes *metric.Gauge
HostNetSendPackets *metric.Gauge
// Uptime and build.
Uptime *metric.Gauge // We use a gauge to be able to call Update.
BuildTimestamp *metric.Gauge
}
// NewRuntimeStatSampler constructs a new RuntimeStatSampler object.
func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSampler {
// Construct the build info metric. It is constant.
// We first build set the labels on the metadata.
info := build.GetInfo()
timestamp, err := info.Timestamp()
if err != nil {
// We can't panic here, tests don't have a build timestamp.
log.Warningf(ctx, "Could not parse build timestamp: %v", err)
}
// Build information.
metaBuildTimestamp := metric.Metadata{
Name: "build.timestamp",
Help: "Build information",
Measurement: "Build Time",
Unit: metric.Unit_TIMESTAMP_SEC,
}
metaBuildTimestamp.AddLabel("tag", info.Tag)
metaBuildTimestamp.AddLabel("go_version", info.GoVersion)
buildTimestamp := metric.NewGauge(metaBuildTimestamp)
buildTimestamp.Update(timestamp)
diskCounters, err := getSummedDiskCounters(ctx)
if err != nil {
log.Errorf(ctx, "could not get initial disk IO counters: %v", err)
}
netCounters, err := getSummedNetStats(ctx)
if err != nil {
log.Errorf(ctx, "could not get initial disk IO counters: %v", err)
}
rsr := &RuntimeStatSampler{
clock: clock,
startTimeNanos: clock.PhysicalNow(),
initialNetCounters: netCounters,
initialDiskCounters: diskCounters,
CgoCalls: metric.NewGauge(metaCgoCalls),
Goroutines: metric.NewGauge(metaGoroutines),
GoAllocBytes: metric.NewGauge(metaGoAllocBytes),
GoTotalBytes: metric.NewGauge(metaGoTotalBytes),
CgoAllocBytes: metric.NewGauge(metaCgoAllocBytes),
CgoTotalBytes: metric.NewGauge(metaCgoTotalBytes),
GcCount: metric.NewGauge(metaGCCount),
GcPauseNS: metric.NewGauge(metaGCPauseNS),
GcPausePercent: metric.NewGaugeFloat64(metaGCPausePercent),
CPUUserNS: metric.NewGauge(metaCPUUserNS),
CPUUserPercent: metric.NewGaugeFloat64(metaCPUUserPercent),
CPUSysNS: metric.NewGauge(metaCPUSysNS),
CPUSysPercent: metric.NewGaugeFloat64(metaCPUSysPercent),
CPUCombinedPercentNorm: metric.NewGaugeFloat64(metaCPUCombinedPercentNorm),
RSSBytes: metric.NewGauge(metaRSSBytes),
HostDiskReadBytes: metric.NewGauge(metaHostDiskReadBytes),
HostDiskReadCount: metric.NewGauge(metaHostDiskReadCount),
HostDiskReadTime: metric.NewGauge(metaHostDiskReadTime),
HostDiskWriteBytes: metric.NewGauge(metaHostDiskWriteBytes),
HostDiskWriteCount: metric.NewGauge(metaHostDiskWriteCount),
HostDiskWriteTime: metric.NewGauge(metaHostDiskWriteTime),
HostDiskIOTime: metric.NewGauge(metaHostDiskIOTime),
HostDiskWeightedIOTime: metric.NewGauge(metaHostDiskWeightedIOTime),
IopsInProgress: metric.NewGauge(metaHostIopsInProgress),
HostNetRecvBytes: metric.NewGauge(metaHostNetRecvBytes),
HostNetRecvPackets: metric.NewGauge(metaHostNetRecvPackets),
HostNetSendBytes: metric.NewGauge(metaHostNetSendBytes),
HostNetSendPackets: metric.NewGauge(metaHostNetSendPackets),
FDOpen: metric.NewGauge(metaFDOpen),
FDSoftLimit: metric.NewGauge(metaFDSoftLimit),
Uptime: metric.NewGauge(metaUptime),
BuildTimestamp: buildTimestamp,
}
rsr.last.disk = rsr.initialDiskCounters
rsr.last.net = rsr.initialNetCounters
return rsr
}
// SampleEnvironment queries the runtime system for various interesting metrics,
// storing the resulting values in the set of metric gauges maintained by
// RuntimeStatSampler. This makes runtime statistics more convenient for
// consumption by the time series and status systems.
//
// This method should be called periodically by a higher level system in order
// to keep runtime statistics current.
//
// SampleEnvironment takes GoMemStats as input (so that it can be collected
// separately, on another schedule), and returns MemStats (which augments and
// includes the GoMemStats).
func (rsr *RuntimeStatSampler) SampleEnvironment(
ctx context.Context, ms base.GoMemStats,
) base.MemStats {
// Note that debug.ReadGCStats() does not suffer the same problem as
// runtime.ReadMemStats(). The only way you can know that is by reading the
// source.
gc := &debug.GCStats{}
debug.ReadGCStats(gc)
numCgoCall := runtime.NumCgoCall()
numGoroutine := runtime.NumGoroutine()
// Retrieve Mem and CPU statistics.
pid := os.Getpid()
mem := gosigar.ProcMem{}
if err := mem.Get(pid); err != nil {
log.Errorf(ctx, "unable to get mem usage: %v", err)
}
cpuTime := gosigar.ProcTime{}
if err := cpuTime.Get(pid); err != nil {
log.Errorf(ctx, "unable to get cpu usage: %v", err)
}
fds := gosigar.ProcFDUsage{}
if err := fds.Get(pid); err != nil {
if _, ok := err.(gosigar.ErrNotImplemented); ok {
if !rsr.fdUsageNotImplemented {
rsr.fdUsageNotImplemented = true
log.Warningf(ctx, "unable to get file descriptor usage (will not try again): %s", err)
}
} else {
log.Errorf(ctx, "unable to get file descriptor usage: %s", err)
}
}
var deltaDisk diskStats
diskCounters, err := getSummedDiskCounters(ctx)
if err != nil {
log.Warningf(ctx, "problem fetching disk stats: %s; disk stats will be empty.", err)
} else {
deltaDisk = diskCounters
subtractDiskCounters(&deltaDisk, rsr.last.disk)
rsr.last.disk = diskCounters
subtractDiskCounters(&diskCounters, rsr.initialDiskCounters)
rsr.HostDiskReadBytes.Update(diskCounters.readBytes)
rsr.HostDiskReadCount.Update(diskCounters.readCount)
rsr.HostDiskReadTime.Update(int64(diskCounters.readTime))
rsr.HostDiskWriteBytes.Update(diskCounters.writeBytes)
rsr.HostDiskWriteCount.Update(diskCounters.writeCount)
rsr.HostDiskWriteTime.Update(int64(diskCounters.writeTime))
rsr.HostDiskIOTime.Update(int64(diskCounters.ioTime))
rsr.HostDiskWeightedIOTime.Update(int64(diskCounters.weightedIOTime))
rsr.IopsInProgress.Update(diskCounters.iopsInProgress)
}
var deltaNet net.IOCountersStat
netCounters, err := getSummedNetStats(ctx)
if err != nil {
log.Warningf(ctx, "problem fetching net stats: %s; net stats will be empty.", err)
} else {
deltaNet = netCounters
subtractNetworkCounters(&deltaNet, rsr.last.net)
rsr.last.net = netCounters
subtractNetworkCounters(&netCounters, rsr.initialNetCounters)
rsr.HostNetSendBytes.Update(int64(netCounters.BytesSent))
rsr.HostNetSendPackets.Update(int64(netCounters.PacketsSent))
rsr.HostNetRecvBytes.Update(int64(netCounters.BytesRecv))
rsr.HostNetRecvPackets.Update(int64(netCounters.PacketsRecv))
}
// Time statistics can be compared to the total elapsed time to create a
// useful percentage of total CPU usage, which would be somewhat less accurate
// if calculated later using downsampled time series data.
now := rsr.clock.PhysicalNow()
dur := float64(now - rsr.last.now)
// cpuTime.{User,Sys} are in milliseconds, convert to nanoseconds.
utime := int64(cpuTime.User) * 1e6
stime := int64(cpuTime.Sys) * 1e6
uPerc := float64(utime-rsr.last.utime) / dur
sPerc := float64(stime-rsr.last.stime) / dur
combinedNormalizedPerc := (sPerc + uPerc) / float64(runtime.NumCPU())
gcPausePercent := float64(uint64(gc.PauseTotal)-rsr.last.gcPauseTime) / dur
rsr.last.now = now
rsr.last.utime = utime
rsr.last.stime = stime
rsr.last.gcPauseTime = uint64(gc.PauseTotal)
var cgoAllocated, cgoTotal uint
if getCgoMemStats != nil {
var err error
cgoAllocated, cgoTotal, err = getCgoMemStats(ctx)
if err != nil {
log.Warningf(ctx, "problem fetching CGO memory stats: %s; CGO stats will be empty.", err)
}
}
// Log summary of statistics to console.
cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur
goMemStatsStale := timeutil.Now().Sub(ms.Collected) > time.Second
var staleMsg = ""
if goMemStatsStale {
staleMsg = "(stale)"
}
log.Infof(ctx, "runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total%s, "+
"%s/%s CGO alloc/total, %.1f CGO/sec, %.1f/%.1f %%(u/s)time, %.1f %%gc (%dx), "+
"%s/%s (r/w)net",
humanize.IBytes(mem.Resident), numGoroutine,
humanize.IBytes(ms.GoAllocated), humanize.IBytes(ms.GoIdle), humanize.IBytes(ms.GoTotal),
staleMsg,
humanize.IBytes(uint64(cgoAllocated)), humanize.IBytes(uint64(cgoTotal)),
cgoRate, 100*uPerc, 100*sPerc, 100*gcPausePercent, gc.NumGC-rsr.last.gcCount,
humanize.IBytes(deltaNet.BytesRecv), humanize.IBytes(deltaNet.BytesSent),
)
rsr.last.cgoCall = numCgoCall
rsr.last.gcCount = gc.NumGC
rsr.CgoCalls.Update(numCgoCall)
rsr.Goroutines.Update(int64(numGoroutine))
rsr.CgoAllocBytes.Update(int64(cgoAllocated))
rsr.CgoTotalBytes.Update(int64(cgoTotal))
rsr.GcCount.Update(gc.NumGC)
rsr.GcPauseNS.Update(int64(gc.PauseTotal))
rsr.GcPausePercent.Update(gcPausePercent)
rsr.CPUUserNS.Update(utime)
rsr.CPUUserPercent.Update(uPerc)
rsr.CPUSysNS.Update(stime)
rsr.CPUSysPercent.Update(sPerc)
rsr.CPUCombinedPercentNorm.Update(combinedNormalizedPerc)
rsr.FDOpen.Update(int64(fds.Open))
rsr.FDSoftLimit.Update(int64(fds.SoftLimit))
rsr.RSSBytes.Update(int64(mem.Resident))
rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9)
return base.MemStats{
Go: ms,
RSSBytes: mem.Resident,
}
}
// SampleMemStats queries the runtime system for memory metrics, updating the
// memory metric gauges and returning the data.
//
// This method should be called periodically by a higher level system in order
// to keep runtime statistics current. It is distinct from SampleEnvironment()
// because, as of Go 1.12, runtime.ReadMemStats() "stops the world" and that
// requires first waiting for any current GC run to finish. With a large heap, a
// single GC can take many seconds (https://github.com/golang/go/issues/19812).
func (rsr *RuntimeStatSampler) SampleMemStats(ctx context.Context) base.GoMemStats {
// NOTE: the MemStats fields do not get decremented when memory is released,
// to get accurate numbers, be sure to subtract. eg: ms.Sys - ms.HeapReleased
// for current memory reserved.
ms := &runtime.MemStats{}
runtime.ReadMemStats(ms)
goAllocated := ms.Alloc
goTotal := ms.Sys - ms.HeapReleased
rsr.GoAllocBytes.Update(int64(goAllocated))
rsr.GoTotalBytes.Update(int64(goTotal))
log.VErrEventf(ctx, 2, "memstats: %+v", ms)
return base.GoMemStats{
GoAllocated: goAllocated,
GoIdle: ms.HeapIdle - ms.HeapReleased,
GoTotal: goTotal,
Collected: timeutil.Now(),
}
}
// GetCPUCombinedPercentNorm is part of the distsqlrun.RuntimeStats interface.
func (rsr *RuntimeStatSampler) GetCPUCombinedPercentNorm() float64 {
return rsr.CPUCombinedPercentNorm.Value()
}
// diskStats contains the disk statistics returned by the operating
// system. Interpretation of some of these stats varies by platform,
// although as much as possible they are normalized to the semantics
// used by linux's diskstats interface.
//
// Except for iopsInProgress, these metrics act like counters (always
// increasing, and best interpreted as a rate).
type diskStats struct {
readBytes int64
readCount int64
// readTime (and writeTime) may increase more than 1s per second if
// access to storage is parallelized.
readTime time.Duration
writeBytes int64
writeCount int64
writeTime time.Duration
// ioTime is the amount of time that iopsInProgress is non-zero (so
// its increase is capped at 1s/s). Only available on linux.
ioTime time.Duration
// weightedIOTime is a linux-specific metric that attempts to
// represent "an easy measure of both I/O completion time and the
// backlog that may be accumulating."
weightedIOTime time.Duration
// iopsInProgress is a gauge of the number of pending IO operations.
// Not available on macOS.
iopsInProgress int64
}
func getSummedDiskCounters(ctx context.Context) (diskStats, error) {
diskCounters, err := getDiskCounters(ctx)
if err != nil {
return diskStats{}, err
}
return sumDiskCounters(diskCounters), nil
}
func getSummedNetStats(ctx context.Context) (net.IOCountersStat, error) {
netCounters, err := net.IOCountersWithContext(ctx, true /* per NIC */)
if err != nil {
return net.IOCountersStat{}, err
}
return sumNetworkCounters(netCounters), nil
}
// sumDiskCounters returns a new disk.IOCountersStat whose values are the sum of the
// values in the slice of disk.IOCountersStats passed in.
func sumDiskCounters(disksStats []diskStats) diskStats {
output := diskStats{}
for _, stats := range disksStats {
output.readBytes += stats.readBytes
output.readCount += stats.readCount
output.readTime += stats.readTime
output.writeBytes += stats.writeBytes
output.writeCount += stats.writeCount
output.writeTime += stats.writeTime
output.ioTime += stats.ioTime
output.weightedIOTime += stats.weightedIOTime
output.iopsInProgress += stats.iopsInProgress
}
return output
}
// subtractDiskCounters subtracts the counters in `sub` from the counters in `from`,
// saving the results in `from`.
func subtractDiskCounters(from *diskStats, sub diskStats) {
from.writeCount -= sub.writeCount
from.writeBytes -= sub.writeBytes
from.writeTime -= sub.writeTime
from.readCount -= sub.readCount
from.readBytes -= sub.readBytes
from.readTime -= sub.readTime
from.ioTime -= sub.ioTime
from.weightedIOTime -= sub.weightedIOTime
}
// sumNetworkCounters returns a new net.IOCountersStat whose values are the sum of the
// values in the slice of net.IOCountersStats passed in.
func sumNetworkCounters(netCounters []net.IOCountersStat) net.IOCountersStat {
output := net.IOCountersStat{}
for _, counter := range netCounters {
output.BytesRecv += counter.BytesRecv
output.BytesSent += counter.BytesSent
output.PacketsRecv += counter.PacketsRecv
output.PacketsSent += counter.PacketsSent
}
return output
}
// subtractNetworkCounters subtracts the counters in `sub` from the counters in `from`,
// saving the results in `from`.
func subtractNetworkCounters(from *net.IOCountersStat, sub net.IOCountersStat) {
from.BytesRecv -= sub.BytesRecv
from.BytesSent -= sub.BytesSent
from.PacketsRecv -= sub.PacketsRecv
from.PacketsSent -= sub.PacketsSent
}