Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(inputs.diskio): Add field io await and util #15950

Merged
merged 4 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions plugins/inputs/diskio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ docker run --privileged -v /:/hostfs:ro -v /run/udev:/run/udev:ro -e HOST_PROC=/
- iops_in_progress (integer, gauge)
- merged_reads (integer, counter)
- merged_writes (integer, counter)
- io_util (float64, gauge, percent)
- io_await (float64, gauge, milliseconds)
- io_svctm (float64, gauge, milliseconds)

On linux these values correspond to the values in [`/proc/diskstats`][1] and
[`/sys/block/<dev>/stat`][2].
Expand Down Expand Up @@ -123,6 +126,18 @@ efficiency. Thus two 4K reads may become one 8K read before it is
ultimately handed to the disk, and so it will be counted (and queued)
as only one I/O. These fields lets you know how often this was done.

### `io_await`

The average time per I/O operation (ms)

### `io_svctm`

The service time per I/O operation, excluding wait time (ms)

### `io_util`

The percentage of time the disk was active (%)

## Sample Queries

### Calculate percent IO utilization per disk and host
Expand All @@ -147,3 +162,9 @@ diskio,name=sda1 merged_reads=0i,reads=2353i,writes=10i,write_bytes=2117632i,wri
diskio,name=centos/var_log reads=1063077i,writes=591025i,read_bytes=139325491712i,write_bytes=144233131520i,read_time=650221i,write_time=24368817i,io_time=852490i,weighted_io_time=25037394i,iops_in_progress=1i,merged_reads=0i,merged_writes=0i 1578326400000000000
diskio,name=sda write_time=49i,io_time=1317i,weighted_io_time=1404i,reads=2495i,read_time=1357i,write_bytes=2117632i,iops_in_progress=0i,merged_reads=0i,merged_writes=0i,writes=10i,read_bytes=38956544i 1578326400000000000
```

```text
diskio,name=sda io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
diskio,name=sda1 io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
diskio,name=sda2 io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
```
36 changes: 28 additions & 8 deletions plugins/inputs/diskio/diskio.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import (
"fmt"
"regexp"
"strings"
"time"

"github.com/shirou/gopsutil/v3/disk"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
Expand All @@ -32,11 +35,13 @@ type DiskIO struct {
SkipSerialNumber bool `toml:"skip_serial_number"`
Log telegraf.Logger `toml:"-"`

ps system.PS
infoCache map[string]diskInfoCache
deviceFilter filter.Filter
warnDiskName map[string]bool
warnDiskTags map[string]bool
ps system.PS
infoCache map[string]diskInfoCache
deviceFilter filter.Filter
warnDiskName map[string]bool
warnDiskTags map[string]bool
lastIOCounterStat map[string]disk.IOCountersStat
lastCollectTime time.Time
}

func (*DiskIO) SampleConfig() string {
Expand All @@ -57,6 +62,7 @@ func (d *DiskIO) Init() error {
d.infoCache = make(map[string]diskInfoCache)
d.warnDiskName = make(map[string]bool)
d.warnDiskTags = make(map[string]bool)
d.lastIOCounterStat = make(map[string]disk.IOCountersStat)

return nil
}
Expand All @@ -73,8 +79,8 @@ func (d *DiskIO) Gather(acc telegraf.Accumulator) error {
if err != nil {
return fmt.Errorf("error getting disk io info: %w", err)
}

for _, io := range diskio {
collectTime := time.Now()
for k, io := range diskio {
match := false
if d.deviceFilter != nil && d.deviceFilter.Match(io.Name) {
match = true
Expand Down Expand Up @@ -125,9 +131,23 @@ func (d *DiskIO) Gather(acc telegraf.Accumulator) error {
"merged_reads": io.MergedReadCount,
"merged_writes": io.MergedWriteCount,
}
if lastValue, exists := d.lastIOCounterStat[k]; exists {
deltaRWCount := float64(io.ReadCount + io.WriteCount - lastValue.ReadCount - lastValue.WriteCount)
deltaRWTime := float64(io.ReadTime + io.WriteTime - lastValue.ReadTime - lastValue.WriteTime)
deltaIOTime := float64(io.IoTime - lastValue.IoTime)
if deltaRWCount > 0 {
fields["io_await"] = deltaRWTime / deltaRWCount
fields["io_svctm"] = deltaIOTime / deltaRWCount
srebhan marked this conversation as resolved.
Show resolved Hide resolved
}
itv := float64(collectTime.Sub(d.lastCollectTime).Milliseconds())
if itv > 0 {
fields["io_util"] = 100 * deltaIOTime / itv
}
}
acc.AddCounter("diskio", fields, tags)
}

d.lastCollectTime = collectTime
d.lastIOCounterStat = diskio
return nil
}

Expand Down
63 changes: 63 additions & 0 deletions plugins/inputs/diskio/diskio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package diskio

import (
"testing"
"time"

"github.com/shirou/gopsutil/v3/disk"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -127,3 +128,65 @@ func TestDiskIO(t *testing.T) {
})
}
}

func TestDiskIOUtil(t *testing.T) {
cts := map[string]disk.IOCountersStat{
"sda": {
ReadCount: 888,
WriteCount: 5341,
ReadBytes: 100000,
WriteBytes: 200000,
ReadTime: 7123,
WriteTime: 9087,
MergedReadCount: 11,
MergedWriteCount: 12,
Name: "sda",
IoTime: 123552,
SerialNumber: "ab-123-ad",
},
}

cts2 := map[string]disk.IOCountersStat{
"sda": {
ReadCount: 1000,
WriteCount: 6000,
ReadBytes: 200000,
WriteBytes: 300000,
ReadTime: 8123,
WriteTime: 9187,
MergedReadCount: 16,
MergedWriteCount: 30,
Name: "sda",
IoTime: 163552,
SerialNumber: "ab-123-ad",
},
}

var acc testutil.Accumulator
var mps system.MockPS
mps.On("DiskIO").Return(cts, nil)
diskio := &DiskIO{
Log: testutil.Logger{},
Devices: []string{"sd*"},
ps: &mps,
}
require.NoError(t, diskio.Init())
// gather
require.NoError(t, diskio.Gather(&acc))
// sleep
time.Sleep(1 * time.Second)
// gather twice
mps2 := system.MockPS{}
mps2.On("DiskIO").Return(cts2, nil)
diskio.ps = &mps2

err := diskio.Gather(&acc)
require.NoError(t, err)
require.True(t, acc.HasField("diskio", "io_util"), "miss io util")
require.True(t, acc.HasField("diskio", "io_svctm"), "miss io_svctm")
require.True(t, acc.HasField("diskio", "io_await"), "miss io_await")

require.True(t, acc.HasFloatField("diskio", "io_util"), "io_util not have value")
require.True(t, acc.HasFloatField("diskio", "io_svctm"), "io_svctm not have value")
require.True(t, acc.HasFloatField("diskio", "io_await"), "io_await not have value")
}
Loading