diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 1f5127cda30..42685e384ec 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -172,6 +172,7 @@ https://github.com/elastic/beats/compare/v8.2.0\...main[Check the HEAD diff] - Add Data Granularity option to AWS module to allow for for fewer API calls of longer periods and keep small intervals. {issue}33133[33133] {pull}33166[33166] - Update README file on how to run Metricbeat on Kubernetes. {pull}33308[33308] +- Add per-thread metrics to system_summary {pull}33614[33614] *Packetbeat* diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 6251287b33c..42151251891 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -63063,6 +63063,30 @@ type: long Number of processes for which the state couldn't be retrieved or is unknown. +type: long + +-- + +[float] +=== threads + +Counts of individual threads on a system. + + +*`system.process.summary.threads.running`*:: ++ +-- +Count of currently running threads. + +type: long + +-- + +*`system.process.summary.threads.blocked`*:: ++ +-- +Count of threads blocked by I/O. + type: long -- diff --git a/metricbeat/module/system/fields.go b/metricbeat/module/system/fields.go index 76225cbdb26..13bc793b2b3 100644 --- a/metricbeat/module/system/fields.go +++ b/metricbeat/module/system/fields.go @@ -32,5 +32,5 @@ func init() { // AssetSystem returns asset data. // This is the base64 encoded zlib format compressed contents of module/system. func AssetSystem() string { - return "" + return "" } diff --git a/metricbeat/module/system/process_summary/_meta/data.json b/metricbeat/module/system/process_summary/_meta/data.json index d1af140066f..1f076ab493c 100644 --- a/metricbeat/module/system/process_summary/_meta/data.json +++ b/metricbeat/module/system/process_summary/_meta/data.json @@ -15,10 +15,14 @@ "system": { "process": { "summary": { - "idle": 122, + "idle": 110, "running": 1, - "sleeping": 222, - "total": 345 + "sleeping": 138, + "threads": { + "blocked": 0, + "running": 2 + }, + "total": 249 } } } diff --git a/metricbeat/module/system/process_summary/_meta/fields.yml b/metricbeat/module/system/process_summary/_meta/fields.yml index 9f9fadf8c1b..f1361aff152 100644 --- a/metricbeat/module/system/process_summary/_meta/fields.yml +++ b/metricbeat/module/system/process_summary/_meta/fields.yml @@ -49,3 +49,14 @@ type: long description: > Number of processes for which the state couldn't be retrieved or is unknown. + - name: threads + title: Process Threads + type: group + description: Counts of individual threads on a system. + fields: + - name: running + type: long + description: Count of currently running threads. + - name: blocked + type: long + description: Count of threads blocked by I/O. \ No newline at end of file diff --git a/metricbeat/module/system/process_summary/_meta/testdata/proc/stat b/metricbeat/module/system/process_summary/_meta/testdata/proc/stat new file mode 100644 index 00000000000..03f19e5dd46 --- /dev/null +++ b/metricbeat/module/system/process_summary/_meta/testdata/proc/stat @@ -0,0 +1,20 @@ +cpu 1958638 6202 264378 107797637 610519 48962 29230 0 0 0 +cpu0 202944 915 25801 8936097 52700 4560 2877 0 0 0 +cpu1 177481 161 24227 8962725 51442 4397 2880 0 0 0 +cpu2 163745 223 22570 8981414 51892 3899 2866 0 0 0 +cpu3 166884 667 22302 8982047 49632 3841 1990 0 0 0 +cpu4 162061 1532 22361 8983333 51060 4063 2514 0 0 0 +cpu5 164845 1064 23137 8982450 47903 4178 2832 0 0 0 +cpu6 128551 121 16962 9024912 49464 3859 2271 0 0 0 +cpu7 152482 50 19820 8998298 49329 4045 2168 0 0 0 +cpu8 160705 494 21774 8988485 48981 4108 2170 0 0 0 +cpu9 163737 399 23160 8970031 62795 4192 2188 0 0 0 +cpu10 158907 233 20849 8991461 49155 3932 2074 0 0 0 +cpu11 156288 339 21410 8996378 46160 3882 2394 0 0 0 +intr 87587498 33 0 0 0 0 0 0 0 1 0 285 0 0 0 0 0 0 0 0 0 0 0 34 159 0 0 0 0 0 0 0 0 0 0 0 0 0 45137 0 226 134037 135047 117182 116322 117562 142962 104763 122559 114616 110349 105725 123676 0 0 0 0 0 163501 138328 95724 141345 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 104885124 +btime 1667843900 +processes 319096 +procs_running 3 +procs_blocked 1 +softirq 58311163 7 11951252 388 1078388 45103 0 2624 20249646 917 24982838 diff --git a/metricbeat/module/system/process_summary/process_summary.go b/metricbeat/module/system/process_summary/process_summary.go index 76de5443d9f..2fdf850b530 100644 --- a/metricbeat/module/system/process_summary/process_summary.go +++ b/metricbeat/module/system/process_summary/process_summary.go @@ -21,7 +21,11 @@ package process_summary import ( - "github.com/pkg/errors" + "fmt" + "io/ioutil" + "runtime" + "strconv" + "strings" "github.com/elastic/beats/v7/libbeat/common/transform/typeconv" "github.com/elastic/beats/v7/metricbeat/mb" @@ -67,7 +71,7 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { procList, err := process.ListStates(m.sys) if err != nil { - return errors.Wrap(err, "error fetching process list") + return fmt.Errorf("error fetching process list: %w", err) } procStates := map[string]int{} @@ -80,7 +84,17 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { } outMap := mapstr.M{} - typeconv.Convert(&outMap, procStates) + err = typeconv.Convert(&outMap, procStates) + if err != nil { + return fmt.Errorf("error formatting process stats: %w", err) + } + if runtime.GOOS == "linux" { + threads, err := threadStats(m.sys) + if err != nil { + return fmt.Errorf("error fetching thread stats: %w", err) + } + outMap["threads"] = threads + } outMap["total"] = len(procList) r.Event(mb.Event{ // change the name space to use . instead of _ @@ -90,3 +104,34 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error { return nil } + +// threadStats returns a map of state counts for running threads on a system +func threadStats(sys resolve.Resolver) (mapstr.M, error) { + statPath := sys.ResolveHostFS("/proc/stat") + procData, err := ioutil.ReadFile(statPath) + if err != nil { + return nil, fmt.Errorf("error reading procfs file %s: %w", statPath, err) + } + threadData := mapstr.M{} + for _, line := range strings.Split(string(procData), "\n") { + // look for format procs_[STATE] [COUNT] + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + if strings.Contains(fields[0], "procs_") { + keyFields := strings.Split(fields[0], "_") + // the field isn't what we're expecting, continue + if len(keyFields) < 2 { + continue + } + procsInt, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("Error parsing value %s from %s: %w", fields[0], statPath, err) + } + + threadData[keyFields[1]] = procsInt + } + } + return threadData, nil +} diff --git a/metricbeat/module/system/process_summary/process_summary_test.go b/metricbeat/module/system/process_summary/process_summary_test.go index eaf03413d2c..6d42704e196 100644 --- a/metricbeat/module/system/process_summary/process_summary_test.go +++ b/metricbeat/module/system/process_summary/process_summary_test.go @@ -31,6 +31,7 @@ import ( "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" "github.com/elastic/elastic-agent-system-metrics/metric/system/process" + "github.com/elastic/elastic-agent-system-metrics/metric/system/resolve" ) func TestData(t *testing.T) { @@ -42,7 +43,8 @@ func TestData(t *testing.T) { } func TestFetch(t *testing.T) { - logp.DevelopmentSetup() + err := logp.DevelopmentSetup() + require.NoError(t, err) f := mbtest.NewReportingMetricSetV2Error(t, getConfig()) events, errs := mbtest.ReportingFetchV2Error(f) @@ -52,13 +54,14 @@ func TestFetch(t *testing.T) { t.Logf("%s/%s event: %+v", f.Module().Name(), f.Name(), event.StringToPrint()) - _, err := event.GetValue("system.process.summary") + _, err = event.GetValue("system.process.summary") require.NoError(t, err) } func TestStateNames(t *testing.T) { - logp.DevelopmentSetup() + err := logp.DevelopmentSetup() + require.NoError(t, err) f := mbtest.NewReportingMetricSetV2Error(t, getConfig()) events, errs := mbtest.ReportingFetchV2Error(f) @@ -81,6 +84,9 @@ func TestStateNames(t *testing.T) { if key == "total" { continue } + if _, ok := val.(int); !ok { + continue + } // Check to make sure the values we got actually exist exists := false for _, proc := range process.PidStates { @@ -97,6 +103,15 @@ func TestStateNames(t *testing.T) { } +func TestThreads(t *testing.T) { + root := resolve.NewTestResolver("_meta/testdata") + stats, err := threadStats(root) + require.NoError(t, err) + require.Equal(t, int64(1), stats["blocked"]) + require.Equal(t, int64(3), stats["running"]) + t.Logf("metrics: %#v", stats) +} + func getConfig() map[string]interface{} { return map[string]interface{}{ "module": "system",