Skip to content

Commit

Permalink
fix(inputs.procstat): Do not report dead processes as running for orp…
Browse files Browse the repository at this point in the history
…han PID files (#15260)
  • Loading branch information
srebhan authored May 2, 2024
1 parent 60cf977 commit 920f92f
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 24 deletions.
32 changes: 19 additions & 13 deletions plugins/inputs/procstat/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,32 @@ import (
"strconv"
"strings"

"github.com/influxdata/telegraf/filter"
"github.com/shirou/gopsutil/v3/process"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
)

type Filter struct {
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Log telegraf.Logger `toml:"-"`

filterSupervisorUnit string
filterCmds []*regexp.Regexp
filterUser filter.Filter
filterExecutable filter.Filter
filterProcessName filter.Filter
finder *processFinder
}

func (f *Filter) Init() error {
Expand Down Expand Up @@ -80,6 +84,8 @@ func (f *Filter) Init() error {
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
}

// Setup the process finder
f.finder = newProcessFinder(f.Log)
return nil
}

Expand All @@ -89,7 +95,7 @@ func (f *Filter) ApplyFilter() ([]processGroup, error) {
var groups []processGroup
switch {
case len(f.PidFiles) > 0:
g, err := findByPidFiles(f.PidFiles)
g, err := f.finder.findByPidFiles(f.PidFiles)
if err != nil {
return nil, err
}
Expand Down
21 changes: 14 additions & 7 deletions plugins/inputs/procstat/procstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func (p *Procstat) Init() error {

// New-style operations
for i := range p.Filter {
p.Filter[i].Log = p.Log
if err := p.Filter[i].Init(); err != nil {
return fmt.Errorf("initializing filter %d failed: %w", i, err)
}
Expand Down Expand Up @@ -200,17 +201,23 @@ func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
}
count += len(r.PIDs)
for _, pid := range r.PIDs {
// Check if the process is still running
proc, err := p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it or it
// might be delivered from a non-checking source like a PID file
// of a dead process.
continue
}

// Use the cached processes as we need the existing instances
// to compute delta-metrics (e.g. cpu-usage).
proc, found := p.processes[pid]
if !found {
if cached, found := p.processes[pid]; found {
proc = cached
} else {
// We've found a process that was not recorded before so add it
// to the list of processes
proc, err = p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it
continue
}

// Assumption: if a process has no name, it probably does not exist
if name, _ := proc.Name(); name == "" {
continue
Expand Down
22 changes: 18 additions & 4 deletions plugins/inputs/procstat/service_finders.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,23 @@ import (
"strconv"
"strings"

"github.com/influxdata/telegraf"
"github.com/shirou/gopsutil/v3/process"
)

func findByPidFiles(paths []string) ([]processGroup, error) {
type processFinder struct {
errPidFiles map[string]bool
log telegraf.Logger
}

func newProcessFinder(log telegraf.Logger) *processFinder {
return &processFinder{
errPidFiles: make(map[string]bool),
log: log,
}
}

func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(paths))
for _, path := range paths {
buf, err := os.ReadFile(path)
Expand All @@ -24,8 +37,9 @@ func findByPidFiles(paths []string) ([]processGroup, error) {
}

p, err := process.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of file %q: %w", pid, path, err)
if err != nil && !f.errPidFiles[path] {
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
f.errPidFiles[path] = true
}
groups = append(groups, processGroup{
processes: []*process.Process{p},
Expand All @@ -46,7 +60,7 @@ func findByCgroups(cgroups []string) ([]processGroup, error) {

files, err := filepath.Glob(path)
if err != nil {
return nil, fmt.Errorf("failed to determin files for cgroup %q: %w", cgroup, err)
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
}

for _, fpath := range files {
Expand Down

0 comments on commit 920f92f

Please sign in to comment.