Skip to content

Commit

Permalink
fix(inputs.nvidia_smi): Add process info metrics (#15023)
Browse files Browse the repository at this point in the history
  • Loading branch information
powersj authored Mar 20, 2024
1 parent fcb713a commit 9080580
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 4 deletions.
70 changes: 70 additions & 0 deletions plugins/inputs/nvidia_smi/nvidia_smi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,76 @@ func TestGatherValidXML(t *testing.T) {
"vbios_version": "94.02.71.40.72",
},
time.Unix(1689872450, 0)),
testutil.MustMetric(
"nvidia_smi_process",
map[string]string{
"name": "/usr/lib/Xorg",
"type": "G",
},
map[string]interface{}{
"pid": int64(835),
"used_memory": int64(550),
},
time.Unix(1689872450, 0)),
testutil.MustMetric(
"nvidia_smi_process",
map[string]string{
"name": "/usr/bin/gnome-shell",
"type": "G",
},
map[string]interface{}{
"pid": int64(1481),
"used_memory": int64(18),
},
time.Unix(1689872450, 0)),
testutil.MustMetric(
"nvidia_smi_process",
map[string]string{
"name": "/opt/microsoft/msedge/msedge --type=gpu-process " +
"--crashpad-handler-pid=2176 --enable-crash-reporter=, " +
"--change-stack-guard-on-fork=enable --gpu-preferences=" +
"WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABgAAAAAAA4AAAAAAAAAAA" +
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAGAAAAAAAAAAYAA" +
"AAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== --shared-files " +
"--field-trial-handle=0,i,3110290512380155730," +
"7457693378709978105,262144 --variations-seed-version",
"type": "G",
},
map[string]interface{}{
"pid": int64(2214),
"used_memory": int64(79),
},
time.Unix(1689872450, 0)),
testutil.MustMetric(
"nvidia_smi_process",
map[string]string{
"name": "/usr/lib/firefox/firefox",
"type": "G",
},
map[string]interface{}{
"pid": int64(4044),
"used_memory": int64(541),
},
time.Unix(1689872450, 0)),
testutil.MustMetric(
"nvidia_smi_process",
map[string]string{
"name": "/opt/visual-studio-code/code --type=gpu-process " +
"--enable-crash-reporter=6f39585a-ecc4-42e2-b899-9456cbe56b44" +
",no_channel --user-data-dir=/home/powersj/.config/Code " +
"--gpu-preferences=WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABg" +
"AAAAAAA4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
"ABAAAAGAAAAAAAAAAYAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== " +
"--shared-files --field-trial-handle=0,i,685715063932313394," +
"4769839452661094675,262144 --disable-features=" +
"CalculateNativeWinOcclusion,SpareRendererForSitePerProcess",
"type": "G",
},
map[string]interface{}{
"pid": int64(42416),
"used_memory": int64(159),
},
time.Unix(1689872450, 0)),
},
},
{
Expand Down
12 changes: 12 additions & 0 deletions plugins/inputs/nvidia_smi/schema_v12/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ func Parse(acc telegraf.Accumulator, buf []byte) error {

acc.AddFields("nvidia_smi_mig", fields, tags, timestamp)
}

for _, process := range gpu.Processes.ProcessInfo {
tags := map[string]string{}
common.SetTagIfUsed(tags, "name", process.ProcessName)
common.SetTagIfUsed(tags, "type", process.Type)

fields := map[string]interface{}{}
common.SetIfUsed("int", fields, "pid", process.Pid)
common.SetIfUsed("int", fields, "used_memory", process.UsedMemory)

acc.AddFields("nvidia_smi_process", fields, tags, timestamp)
}
}

return nil
Expand Down
15 changes: 11 additions & 4 deletions plugins/inputs/nvidia_smi/schema_v12/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,17 @@ type smi struct {
MinPowerLimit string `xml:"min_power_limit"`
MaxPowerLimit string `xml:"max_power_limit"`
} `xml:"power_readings"`
Processes struct{} `xml:"processes"`
ProductArchitecture string `xml:"product_architecture"`
ProductBrand string `xml:"product_brand"`
ProductName string `xml:"product_name"`
Processes struct {
ProcessInfo []struct {
Pid string `xml:"pid"`
Type string `xml:"type"`
ProcessName string `xml:"process_name"`
UsedMemory string `xml:"used_memory"`
} `xml:"process_info"`
} `xml:"processes"`
ProductArchitecture string `xml:"product_architecture"`
ProductBrand string `xml:"product_brand"`
ProductName string `xml:"product_name"`
RemappedRows struct {
// Manually added
Correctable string `xml:"remapped_row_corr"`
Expand Down
40 changes: 40 additions & 0 deletions plugins/inputs/nvidia_smi/testdata/rtx-3080-v12.xml
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,46 @@
</supported_mem_clock>
</supported_clocks>
<processes>
<process_info>
<gpu_instance_id>N/A</gpu_instance_id>
<compute_instance_id>N/A</compute_instance_id>
<pid>835</pid>
<type>G</type>
<process_name>/usr/lib/Xorg</process_name>
<used_memory>550 MiB</used_memory>
</process_info>
<process_info>
<gpu_instance_id>N/A</gpu_instance_id>
<compute_instance_id>N/A</compute_instance_id>
<pid>1481</pid>
<type>G</type>
<process_name>/usr/bin/gnome-shell</process_name>
<used_memory>18 MiB</used_memory>
</process_info>
<process_info>
<gpu_instance_id>N/A</gpu_instance_id>
<compute_instance_id>N/A</compute_instance_id>
<pid>2214</pid>
<type>G</type>
<process_name>/opt/microsoft/msedge/msedge --type=gpu-process --crashpad-handler-pid=2176 --enable-crash-reporter=, --change-stack-guard-on-fork=enable --gpu-preferences=WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABgAAAAAAA4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAGAAAAAAAAAAYAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== --shared-files --field-trial-handle=0,i,3110290512380155730,7457693378709978105,262144 --variations-seed-version</process_name>
<used_memory>79 MiB</used_memory>
</process_info>
<process_info>
<gpu_instance_id>N/A</gpu_instance_id>
<compute_instance_id>N/A</compute_instance_id>
<pid>4044</pid>
<type>G</type>
<process_name>/usr/lib/firefox/firefox</process_name>
<used_memory>541 MiB</used_memory>
</process_info>
<process_info>
<gpu_instance_id>N/A</gpu_instance_id>
<compute_instance_id>N/A</compute_instance_id>
<pid>42416</pid>
<type>G</type>
<process_name>/opt/visual-studio-code/code --type=gpu-process --enable-crash-reporter=6f39585a-ecc4-42e2-b899-9456cbe56b44,no_channel --user-data-dir=/home/powersj/.config/Code --gpu-preferences=WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABgAAAAAAA4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAGAAAAAAAAAAYAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== --shared-files --field-trial-handle=0,i,685715063932313394,4769839452661094675,262144 --disable-features=CalculateNativeWinOcclusion,SpareRendererForSitePerProcess</process_name>
<used_memory>159 MiB</used_memory>
</process_info>
</processes>
<accounted_processes>
</accounted_processes>
Expand Down

0 comments on commit 9080580

Please sign in to comment.