Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(inputs.ipmi_sensor): Collect additional commands #15495

Merged
merged 4 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 38 additions & 21 deletions plugins/inputs/ipmi_sensor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,49 +44,66 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
```toml @sample.conf
# Read metrics from the bare metal servers via IPMI
[[inputs.ipmi_sensor]]
## optionally specify the path to the ipmitool executable
## Specify the path to the ipmitool executable
# path = "/usr/bin/ipmitool"
##

## Use sudo
## Setting 'use_sudo' to true will make use of sudo to run ipmitool.
## Sudo must be configured to allow the telegraf user to run ipmitool
## without a password.
# use_sudo = false
##
## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR
# privilege = "ADMINISTRATOR"
##
## optionally specify one or more servers via a url matching

## Servers
## Specify one or more servers via a url. If no servers are specified, local
## machine sensor stats will be queried. Uses the format:
## [username[:password]@][protocol[(address)]]
## e.g.
## root:passwd@lan(127.0.0.1)
##
## if no servers are specified, local machine sensor stats will be queried
##
## e.g. root:passwd@lan(127.0.0.1)
# servers = ["USERID:PASSW0RD@lan(192.168.1.1)"]

## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid
## gaps or overlap in pulled data
interval = "30s"
## Session privilege level
## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR
# privilege = "ADMINISTRATOR"

## Timeout
## Timeout for the ipmitool command to complete.
# timeout = "20s"

## Timeout for the ipmitool command to complete. Default is 20 seconds.
timeout = "20s"
## Metric schema version
## See the plugin readme for more information on schema versioning.
# metric_version = 1

## Schema Version: (Optional, defaults to version 1)
metric_version = 2
## Sensors to collect
## Choose from:
## * sdr: default, collects sensor data records
## * chassis_power_status: collects the power status of the chassis
## * dcmi_power_reading: collects the power readings from the Data Center Management Interface
# sensors = ["sdr"]

## Hex key
## Optionally provide the hex key for the IMPI connection.
# hex_key = ""

## Cache
## If ipmitool should use a cache
## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04)
## the cache file may not work well for you if some sensors come up late
## Using a cache can speed up collection times depending on your device.
# use_cache = false

## Path to the ipmitools cache file (defaults to OS temp dir)
## The provided path must exist and must be writable
# cache_path = ""
```

## Sensors

By default the plugin collects data via the `sdr` command and returns those
values. However, there are additonal sensor options that be call on:

- `chassis_power_status` - returns 0 or 1 depending on the output of
`chassis power status`
- `dcmi_power_reading` - Returns the watt values from `dcmi power reading`

These sensor options are not affected by the metric version.

## Metrics

Version 1 schema:
Expand Down
137 changes: 114 additions & 23 deletions plugins/inputs/ipmi_sensor/ipmi_sensor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
)

Expand All @@ -31,21 +32,22 @@ var (
reV2ParseLine = regexp.MustCompile(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`)
reV2ParseDescription = regexp.MustCompile(`^(?P<analogValue>-?[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`)
reV2ParseUnit = regexp.MustCompile(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`)
dcmiPowerReading = regexp.MustCompile(`^(?P<name>[^|]*)\:(?P<value>.* Watts)?`)
)

// Ipmi stores the configuration values for the ipmi_sensor input plugin
type Ipmi struct {
Path string
Privilege string
HexKey string `toml:"hex_key"`
Servers []string
Timeout config.Duration
MetricVersion int
UseSudo bool
UseCache bool
CachePath string

Log telegraf.Logger `toml:"-"`
Path string `toml:"path"`
Privilege string `toml:"privilege"`
HexKey string `toml:"hex_key"`
Servers []string `toml:"servers"`
Sensors []string `toml:"sensors"`
Timeout config.Duration `toml:"timeout"`
MetricVersion int `toml:"metric_version"`
UseSudo bool `toml:"use_sudo"`
UseCache bool `toml:"use_cache"`
CachePath string `toml:"cache_path"`
Log telegraf.Logger `toml:"-"`
}

const cmd = "ipmitool"
Expand All @@ -66,6 +68,12 @@ func (m *Ipmi) Init() error {
if m.CachePath == "" {
m.CachePath = os.TempDir()
}
if len(m.Sensors) == 0 {
m.Sensors = []string{"sdr"}
}
if err := choice.CheckSlice(m.Sensors, []string{"sdr", "chassis_power_status", "dcmi_power_reading"}); err != nil {
return err
}

// Check parameters
if m.Path == "" {
Expand All @@ -87,32 +95,47 @@ func (m *Ipmi) Gather(acc telegraf.Accumulator) error {
wg.Add(1)
go func(a telegraf.Accumulator, s string) {
defer wg.Done()
err := m.parse(a, s)
if err != nil {
a.AddError(err)
for _, sensor := range m.Sensors {
a.AddError(m.parse(a, s, sensor))
}
}(acc, server)
}
wg.Wait()
} else {
err := m.parse(acc, "")
if err != nil {
return err
for _, sensor := range m.Sensors {
err := m.parse(acc, "", sensor)
if err != nil {
return err
}
}
}

return nil
}

func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
func (m *Ipmi) parse(acc telegraf.Accumulator, server string, sensor string) error {
var command []string
switch sensor {
case "sdr":
command = append(command, "sdr")
case "chassis_power_status":
command = append(command, "chassis", "power", "status")
case "dcmi_power_reading":
command = append(command, "dcmi", "power", "reading")
default:
return fmt.Errorf("unknown sensor type %q", sensor)
}
powersj marked this conversation as resolved.
Show resolved Hide resolved

opts := make([]string, 0)
hostname := ""
if server != "" {
conn := NewConnection(server, m.Privilege, m.HexKey)
hostname = conn.Hostname
opts = conn.options()
}
opts = append(opts, "sdr")

opts = append(opts, command...)

if m.UseCache {
cacheFile := filepath.Join(m.CachePath, server+"_ipmi_cache")
_, err := os.Stat(cacheFile)
Expand All @@ -134,7 +157,7 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
}
opts = append(opts, "-S", cacheFile)
}
if m.MetricVersion == 2 {
if m.MetricVersion == 2 && sensor == "sdr" {
opts = append(opts, "elist")
}
name := m.Path
Expand All @@ -149,10 +172,78 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
if err != nil {
return fmt.Errorf("failed to run command %q: %w - %s", strings.Join(sanitizeIPMICmd(cmd.Args), " "), err, string(out))
}
if m.MetricVersion == 2 {
return m.parseV2(acc, hostname, out, timestamp)

switch sensor {
case "sdr":
if m.MetricVersion == 2 {
return m.parseV2(acc, hostname, out, timestamp)
} else {
return m.parseV1(acc, hostname, out, timestamp)
}
case "chassis_power_status":
return m.parseChassisPowerStatus(acc, hostname, out, timestamp)
case "dcmi_power_reading":
return m.parseDCMIPowerReading(acc, hostname, out, timestamp)
}

return fmt.Errorf("unknown sensor type %q", sensor)
}

func (m *Ipmi) parseChassisPowerStatus(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
// each line will look something like
// Chassis Power is on
// Chassis Power is off
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "Chassis Power is on") {
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 1}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
} else if strings.Contains(line, "Chassis Power is off") {
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 0}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
}
}

return scanner.Err()
}

func (m *Ipmi) parseDCMIPowerReading(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
// each line will look something like
// Current Power Reading : 0.000
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
for scanner.Scan() {
ipmiFields := m.extractFieldsFromRegex(dcmiPowerReading, scanner.Text())
if len(ipmiFields) != 2 {
continue
}

tags := map[string]string{
"name": transform(ipmiFields["name"]),
}

// tag the server is we have one
if hostname != "" {
tags["server"] = hostname
}

fields := make(map[string]interface{})
valunit := strings.Split(ipmiFields["value"], " ")
if len(valunit) != 2 {
continue
}

var err error
fields["value"], err = aToFloat(valunit[0])
if err != nil {
continue
}
srebhan marked this conversation as resolved.
Show resolved Hide resolved
if len(valunit) > 1 {
tags["unit"] = transform(valunit[1])
}

acc.AddFields("ipmi_sensor", fields, tags, measuredAt)
}
return m.parseV1(acc, hostname, out, timestamp)

return scanner.Err()
}

func (m *Ipmi) parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
Expand Down
Loading
Loading