Skip to content

Commit

Permalink
Add procs running (#33614)
Browse files Browse the repository at this point in the history
* start adding thread counts

* finish adding thread metrics to process_summary

* add changelog

* fix test

* fix CI

* still fixing CI
  • Loading branch information
fearful-symmetry authored and chrisberkhout committed Jun 1, 2023
1 parent 37d1eb3 commit f1efcde
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ https://github.com/elastic/beats/compare/v8.2.0\...main[Check the HEAD diff]

- Add Data Granularity option to AWS module to allow for for fewer API calls of longer periods and keep small intervals. {issue}33133[33133] {pull}33166[33166]
- Update README file on how to run Metricbeat on Kubernetes. {pull}33308[33308]
- Add per-thread metrics to system_summary {pull}33614[33614]

*Packetbeat*

Expand Down
24 changes: 24 additions & 0 deletions metricbeat/docs/fields.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -63063,6 +63063,30 @@ type: long
Number of processes for which the state couldn't be retrieved or is unknown.


type: long

--

[float]
=== threads

Counts of individual threads on a system.


*`system.process.summary.threads.running`*::
+
--
Count of currently running threads.

type: long

--

*`system.process.summary.threads.blocked`*::
+
--
Count of threads blocked by I/O.

type: long

--
Expand Down
2 changes: 1 addition & 1 deletion metricbeat/module/system/fields.go

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions metricbeat/module/system/process_summary/_meta/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@
"system": {
"process": {
"summary": {
"idle": 122,
"idle": 110,
"running": 1,
"sleeping": 222,
"total": 345
"sleeping": 138,
"threads": {
"blocked": 0,
"running": 2
},
"total": 249
}
}
}
Expand Down
11 changes: 11 additions & 0 deletions metricbeat/module/system/process_summary/_meta/fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,14 @@
type: long
description: >
Number of processes for which the state couldn't be retrieved or is unknown.
- name: threads
title: Process Threads
type: group
description: Counts of individual threads on a system.
fields:
- name: running
type: long
description: Count of currently running threads.
- name: blocked
type: long
description: Count of threads blocked by I/O.
20 changes: 20 additions & 0 deletions metricbeat/module/system/process_summary/_meta/testdata/proc/stat
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
cpu 1958638 6202 264378 107797637 610519 48962 29230 0 0 0
cpu0 202944 915 25801 8936097 52700 4560 2877 0 0 0
cpu1 177481 161 24227 8962725 51442 4397 2880 0 0 0
cpu2 163745 223 22570 8981414 51892 3899 2866 0 0 0
cpu3 166884 667 22302 8982047 49632 3841 1990 0 0 0
cpu4 162061 1532 22361 8983333 51060 4063 2514 0 0 0
cpu5 164845 1064 23137 8982450 47903 4178 2832 0 0 0
cpu6 128551 121 16962 9024912 49464 3859 2271 0 0 0
cpu7 152482 50 19820 8998298 49329 4045 2168 0 0 0
cpu8 160705 494 21774 8988485 48981 4108 2170 0 0 0
cpu9 163737 399 23160 8970031 62795 4192 2188 0 0 0
cpu10 158907 233 20849 8991461 49155 3932 2074 0 0 0
cpu11 156288 339 21410 8996378 46160 3882 2394 0 0 0
intr 87587498 33 0 0 0 0 0 0 0 1 0 285 0 0 0 0 0 0 0 0 0 0 0 34 159 0 0 0 0 0 0 0 0 0 0 0 0 0 45137 0 226 134037 135047 117182 116322 117562 142962 104763 122559 114616 110349 105725 123676 0 0 0 0 0 163501 138328 95724 141345 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ctxt 104885124
btime 1667843900
processes 319096
procs_running 3
procs_blocked 1
softirq 58311163 7 11951252 388 1078388 45103 0 2624 20249646 917 24982838
51 changes: 48 additions & 3 deletions metricbeat/module/system/process_summary/process_summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
package process_summary

import (
"github.com/pkg/errors"
"fmt"
"io/ioutil"
"runtime"
"strconv"
"strings"

"github.com/elastic/beats/v7/libbeat/common/transform/typeconv"
"github.com/elastic/beats/v7/metricbeat/mb"
Expand Down Expand Up @@ -67,7 +71,7 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error {

procList, err := process.ListStates(m.sys)
if err != nil {
return errors.Wrap(err, "error fetching process list")
return fmt.Errorf("error fetching process list: %w", err)
}

procStates := map[string]int{}
Expand All @@ -80,7 +84,17 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error {
}

outMap := mapstr.M{}
typeconv.Convert(&outMap, procStates)
err = typeconv.Convert(&outMap, procStates)
if err != nil {
return fmt.Errorf("error formatting process stats: %w", err)
}
if runtime.GOOS == "linux" {
threads, err := threadStats(m.sys)
if err != nil {
return fmt.Errorf("error fetching thread stats: %w", err)
}
outMap["threads"] = threads
}
outMap["total"] = len(procList)
r.Event(mb.Event{
// change the name space to use . instead of _
Expand All @@ -90,3 +104,34 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error {

return nil
}

// threadStats returns a map of state counts for running threads on a system
func threadStats(sys resolve.Resolver) (mapstr.M, error) {
statPath := sys.ResolveHostFS("/proc/stat")
procData, err := ioutil.ReadFile(statPath)
if err != nil {
return nil, fmt.Errorf("error reading procfs file %s: %w", statPath, err)
}
threadData := mapstr.M{}
for _, line := range strings.Split(string(procData), "\n") {
// look for format procs_[STATE] [COUNT]
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
if strings.Contains(fields[0], "procs_") {
keyFields := strings.Split(fields[0], "_")
// the field isn't what we're expecting, continue
if len(keyFields) < 2 {
continue
}
procsInt, err := strconv.ParseInt(fields[1], 10, 64)
if err != nil {
return nil, fmt.Errorf("Error parsing value %s from %s: %w", fields[0], statPath, err)
}

threadData[keyFields[1]] = procsInt
}
}
return threadData, nil
}
21 changes: 18 additions & 3 deletions metricbeat/module/system/process_summary/process_summary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/elastic-agent-system-metrics/metric/system/process"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

func TestData(t *testing.T) {
Expand All @@ -42,7 +43,8 @@ func TestData(t *testing.T) {
}

func TestFetch(t *testing.T) {
logp.DevelopmentSetup()
err := logp.DevelopmentSetup()
require.NoError(t, err)
f := mbtest.NewReportingMetricSetV2Error(t, getConfig())
events, errs := mbtest.ReportingFetchV2Error(f)

Expand All @@ -52,13 +54,14 @@ func TestFetch(t *testing.T) {
t.Logf("%s/%s event: %+v", f.Module().Name(), f.Name(),
event.StringToPrint())

_, err := event.GetValue("system.process.summary")
_, err = event.GetValue("system.process.summary")
require.NoError(t, err)

}

func TestStateNames(t *testing.T) {
logp.DevelopmentSetup()
err := logp.DevelopmentSetup()
require.NoError(t, err)
f := mbtest.NewReportingMetricSetV2Error(t, getConfig())
events, errs := mbtest.ReportingFetchV2Error(f)

Expand All @@ -81,6 +84,9 @@ func TestStateNames(t *testing.T) {
if key == "total" {
continue
}
if _, ok := val.(int); !ok {
continue
}
// Check to make sure the values we got actually exist
exists := false
for _, proc := range process.PidStates {
Expand All @@ -97,6 +103,15 @@ func TestStateNames(t *testing.T) {

}

func TestThreads(t *testing.T) {
root := resolve.NewTestResolver("_meta/testdata")
stats, err := threadStats(root)
require.NoError(t, err)
require.Equal(t, int64(1), stats["blocked"])
require.Equal(t, int64(3), stats["running"])
t.Logf("metrics: %#v", stats)
}

func getConfig() map[string]interface{} {
return map[string]interface{}{
"module": "system",
Expand Down

0 comments on commit f1efcde

Please sign in to comment.