Skip to content

Commit

Permalink
[metricbeat][system/process, system/process_summary]: mark module as …
Browse files Browse the repository at this point in the history
…healthy if metrics are partially filled (#40565)

* chore: mark module as healthy if metrics are partially filled

* chore: mark module as healthy if metrics are partially filled

* fix: use errors.As

* fix: fix lint

* Update metricbeat/mb/event.go

Co-authored-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>

* fix: changelog

---------

Co-authored-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
(cherry picked from commit 1f033c9)

# Conflicts:
#	metricbeat/mb/module/wrapper.go
#	metricbeat/module/system/process_summary/process_summary.go
  • Loading branch information
VihasMakwana authored and mergify[bot] committed Aug 23, 2024
1 parent 9d353b4 commit 34548c3
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
*Metricbeat*

- Setting period for counter cache for Prometheus remote_write at least to 60sec {pull}38553[38553]
- Add support of Graphite series 1.1.0+ tagging extension for statsd module. {pull}39619[39619]
- Allow metricsets to report their status via control v2 protocol. {pull}40025[40025]
- Remove fallback to the node limit for the `kubernetes.pod.cpu.usage.limit.pct` and `kubernetes.pod.memory.usage.limit.pct` metrics calculation
- Add support for Kibana status metricset in v8 format {pull}40275[40275]
- Add new metrics for the vSphere Datastore metricset. {pull}40441[40441]
- Update metrics for the vSphere Host metricset. {pull}40429[40429]
- Mark system process metricsets as running if metrics are partially available {pull}40565[40565]
- Added back `elasticsearch.node.stats.jvm.mem.pools.*` to the `node_stats` metricset {pull}40571[40571]

*Osquerybeat*
Expand Down
15 changes: 15 additions & 0 deletions metricbeat/mb/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,18 @@ func tryToMapStr(v interface{}) (mapstr.M, bool) {
return nil, false
}
}

// PartialMetricsError indicates that metrics are only partially filled.
// This will be removed once we fix the underlying problem.
// See https://github.com/elastic/beats/issues/40542 for more details.
type PartialMetricsError struct {
Err error
}

func (p PartialMetricsError) Error() string {
return p.Err.Error()
}

func (p PartialMetricsError) Unwrap() error {
return p.Err
}
21 changes: 21 additions & 0 deletions metricbeat/mb/module/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package module

import (
"context"
"errors"
"fmt"
"math/rand"
"sync"
Expand Down Expand Up @@ -253,13 +254,33 @@ func (msw *metricSetWrapper) fetch(ctx context.Context, reporter reporter) {
err := fetcher.Fetch(reporter.V2())
if err != nil {
reporter.V2().Error(err)
<<<<<<< HEAD

Check failure on line 257 in metricbeat/mb/module/wrapper.go

View workflow job for this annotation

GitHub Actions / lint (darwin)

expected statement, found '<<' (typecheck)
=======
if errors.As(err, &mb.PartialMetricsError{}) {
// mark module as running if metrics are partially available and display the error message
msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
} else {
// mark it as degraded for any other issue encountered
msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
}
>>>>>>> 1f033c9eed ([metricbeat][system/process, system/process_summary]: mark module as healthy if metrics are partially filled (#40565))

Check failure on line 266 in metricbeat/mb/module/wrapper.go

View workflow job for this annotation

GitHub Actions / lint (darwin)

expected statement, found '>>' (typecheck)
logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
}
case mb.ReportingMetricSetV2WithContext:
reporter.StartFetchTimer()
err := fetcher.Fetch(ctx, reporter.V2())
if err != nil {

Check failure on line 272 in metricbeat/mb/module/wrapper.go

View workflow job for this annotation

GitHub Actions / lint (darwin)

expected ')', found 'if' (typecheck)
reporter.V2().Error(err)
<<<<<<< HEAD
=======
if errors.As(err, &mb.PartialMetricsError{}) {
// mark module as running if metrics are partially available and display the error message
msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
} else {
// mark it as degraded for any other issue encountered
msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
}
>>>>>>> 1f033c9eed ([metricbeat][system/process, system/process_summary]: mark module as healthy if metrics are partially filled (#40565))

Check failure on line 283 in metricbeat/mb/module/wrapper.go

View workflow job for this annotation

GitHub Actions / lint (darwin)

expected statement, found '>>' (typecheck)
logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
}
default:
Expand Down
2 changes: 2 additions & 0 deletions metricbeat/module/system/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ func (m *MetricSet) Fetch(r mb.ReporterV2) error {
procs, roots, err := m.stats.Get()
if err != nil {
return fmt.Errorf("process stats: %w", err)
} else if (err != nil && errors.Is(err, process.NonFatalErr{})) {
err = mb.PartialMetricsError{Err: err}
}

for evtI := range procs {
Expand Down
9 changes: 9 additions & 0 deletions metricbeat/module/system/process_summary/process_summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,18 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) {
// descriptive error must be returned.
func (m *MetricSet) Fetch(r mb.ReporterV2) error {

<<<<<<< HEAD

Check failure on line 71 in metricbeat/module/system/process_summary/process_summary.go

View workflow job for this annotation

GitHub Actions / lint (darwin)

expected statement, found '<<' (typecheck)
procList, err := process.ListStates(m.sys)
if err != nil {
return fmt.Errorf("error fetching process list: %w", err)
=======
procList, degradeErr := process.ListStates(m.sys)
if degradeErr != nil && !errors.Is(degradeErr, process.NonFatalErr{}) {
// return only if the error is fatal in nature
return fmt.Errorf("error fetching process list: %w", degradeErr)
} else if (degradeErr != nil && errors.Is(degradeErr, process.NonFatalErr{})) {
degradeErr = mb.PartialMetricsError{Err: degradeErr}
>>>>>>> 1f033c9eed ([metricbeat][system/process, system/process_summary]: mark module as healthy if metrics are partially filled (#40565))
}

procStates := map[string]int{}
Expand Down

0 comments on commit 34548c3

Please sign in to comment.