Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(inputs.procstat): Do not report dead processes as running for orphan PID files #15260

Merged
merged 1 commit into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions plugins/inputs/procstat/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,32 @@ import (
"strconv"
"strings"

"github.com/influxdata/telegraf/filter"
"github.com/shirou/gopsutil/v3/process"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
)

type Filter struct {
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Log telegraf.Logger `toml:"-"`

filterSupervisorUnit string
filterCmds []*regexp.Regexp
filterUser filter.Filter
filterExecutable filter.Filter
filterProcessName filter.Filter
finder *processFinder
}

func (f *Filter) Init() error {
Expand Down Expand Up @@ -80,6 +84,8 @@ func (f *Filter) Init() error {
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
}

// Setup the process finder
f.finder = newProcessFinder(f.Log)
return nil
}

Expand All @@ -89,7 +95,7 @@ func (f *Filter) ApplyFilter() ([]processGroup, error) {
var groups []processGroup
switch {
case len(f.PidFiles) > 0:
g, err := findByPidFiles(f.PidFiles)
g, err := f.finder.findByPidFiles(f.PidFiles)
if err != nil {
return nil, err
}
Expand Down
21 changes: 14 additions & 7 deletions plugins/inputs/procstat/procstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func (p *Procstat) Init() error {

// New-style operations
for i := range p.Filter {
p.Filter[i].Log = p.Log
if err := p.Filter[i].Init(); err != nil {
return fmt.Errorf("initializing filter %d failed: %w", i, err)
}
Expand Down Expand Up @@ -200,17 +201,23 @@ func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
}
count += len(r.PIDs)
for _, pid := range r.PIDs {
// Check if the process is still running
proc, err := p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it or it
// might be delivered from a non-checking source like a PID file
// of a dead process.
continue
}

// Use the cached processes as we need the existing instances
// to compute delta-metrics (e.g. cpu-usage).
proc, found := p.processes[pid]
if !found {
if cached, found := p.processes[pid]; found {
proc = cached
} else {
// We've found a process that was not recorded before so add it
// to the list of processes
proc, err = p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it
continue
}

// Assumption: if a process has no name, it probably does not exist
if name, _ := proc.Name(); name == "" {
continue
Expand Down
22 changes: 18 additions & 4 deletions plugins/inputs/procstat/service_finders.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,23 @@ import (
"strconv"
"strings"

"github.com/influxdata/telegraf"
"github.com/shirou/gopsutil/v3/process"
)

func findByPidFiles(paths []string) ([]processGroup, error) {
type processFinder struct {
errPidFiles map[string]bool
log telegraf.Logger
}

func newProcessFinder(log telegraf.Logger) *processFinder {
return &processFinder{
errPidFiles: make(map[string]bool),
log: log,
}
}

func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(paths))
for _, path := range paths {
buf, err := os.ReadFile(path)
Expand All @@ -24,8 +37,9 @@ func findByPidFiles(paths []string) ([]processGroup, error) {
}

p, err := process.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of file %q: %w", pid, path, err)
if err != nil && !f.errPidFiles[path] {
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
f.errPidFiles[path] = true
}
groups = append(groups, processGroup{
processes: []*process.Process{p},
Expand All @@ -46,7 +60,7 @@ func findByCgroups(cgroups []string) ([]processGroup, error) {

files, err := filepath.Glob(path)
if err != nil {
return nil, fmt.Errorf("failed to determin files for cgroup %q: %w", cgroup, err)
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
}

for _, fpath := range files {
Expand Down
Loading