Skip to content

Commit

Permalink
[receiver/windowsperfcounters] fix: Drop metrics with empty datapoint…
Browse files Browse the repository at this point in the history
…s. (#32384)

**Description:** When scraping Windows Performance Counters, it's
possible that some counter objects do not exist. When that is the case,
`windowsperfcounters` will still create the `Metric` object with no
datapoints in it. Some exporters throw errors when encountering this.
The fix proposed in this PR does an extra pass after all metrics have
been scraped and removes the `Metric` objects for which no datapoints
were scraped.

**Link to tracking Issue:** #4972 

**Testing:** 
- Confirmed that `debug` exporter sees `ResourceMetrics` with no metrics
and doesn't throw.
- Confirmed that `prometheusremotewrite` exporter no longer complains
about empty datapoints and that it skips the export when no metrics are
available
- ~No unit tests added for now. I will add a unit test once I have
confirmation that this is the right way to remove empty datapoints~ Unit
test covering the changes and enabling fixture validation which was not
implemented.
  • Loading branch information
alxbl committed Jul 15, 2024
1 parent e2293ba commit f34e5ee
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 52 deletions.
6 changes: 6 additions & 0 deletions .chloggen/windowsperfcounters-empty-datapoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
change_type: bug_fix
component: windowsperfcountersreceiver
note: Metric definitions with no matching performance counter are no longer included as metrics with zero datapoints in the scrape output.
issues: [4972]
subtext:
change_logs: [user]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
resourceMetrics:
- resource: {}
scopeMetrics:
- metrics:
- description: percentage of time CPU is idle.
gauge:
dataPoints:
- asDouble: 0
timeUnixNano: "1646857199239674900"
name: cpu.idle
unit: '%'
# Should not be present in the scrape output.
# - name: no.counter
scope: {}
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ resourceMetrics:
- asInt: "25089622016"
timeUnixNano: "1647459021285009300"
name: \Memory\Committed Bytes
unit: '1'
scope: {}
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,6 @@ resourceMetrics:
value:
stringValue: "0"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "1"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "2"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "3"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "4"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "5"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "6"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "7"
timeUnixNano: "1646857199239674900"
name: cpu.idle
unit: '%'
- description: number of bytes committed to memory
Expand All @@ -71,12 +29,6 @@ resourceMetrics:
value:
stringValue: "1"
timeUnixNano: "1646857199239674900"
- asDouble: 0
attributes:
- key: instance
value:
stringValue: "2"
timeUnixNano: "1646857199239674900"
name: processor.time
unit: '%'
scope: {}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ resourceMetrics:
name: bytes.committed
sum:
aggregationTemporality: 2
isMonotonic: true
dataPoints:
- asDouble: 1.94461696e+10
timeUnixNano: "1646862225775600200"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,24 @@ func (s *scraper) scrape(context.Context) (pmetric.Metrics, error) {
initializeMetricDps(metric, now, val, watcher.MetricRep.Attributes)
}
}

// Drop metrics with no datapoints. This happens when configured counters don't exist on the host.
// This may result in a Metrics message with no metrics if all counters are missing.
metricSlice.RemoveIf(func(m pmetric.Metric) bool {
switch m.Type() {
case pmetric.MetricTypeGauge:
return m.Gauge().DataPoints().Len() == 0
case pmetric.MetricTypeSum:
return m.Sum().DataPoints().Len() == 0
default:
return false
}
})

if scrapeFailures != 0 && scrapeFailures != len(s.watchers) {
errs = scrapererror.NewPartialScrapeError(errs, scrapeFailures)
}

return md, errs
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func Test_WindowsPerfCounterScraper(t *testing.T) {
"bytes.committed": {
Description: "number of bytes committed to memory",
Unit: "By",
Sum: SumMetric{},
Sum: SumMetric{Aggregation: "cumulative", Monotonic: true},
},
},
PerfCounters: []ObjectConfig{
Expand Down Expand Up @@ -156,6 +156,28 @@ func Test_WindowsPerfCounterScraper(t *testing.T) {
startMessage: "some performance counters could not be initialized",
startErr: "failed to create perf counter with path \\Invalid Object\\Invalid Counter: The specified object was not found on the computer.\r\n",
},
{
name: "MetricDefinedButNoScrapedValue",
cfg: &Config{
MetricMetaData: map[string]MetricConfig{
"cpu.idle": {
Description: "percentage of time CPU is idle.",
Unit: "%",
Gauge: GaugeMetric{},
},
"no.counter": {
Description: "there is no counter or data for this metric",
Unit: "By",
Gauge: GaugeMetric{},
},
},
PerfCounters: []ObjectConfig{
{Object: "Processor", Instances: []string{"_Total"}, Counters: []CounterConfig{{Name: "% Idle Time", MetricRep: MetricRep{Name: "cpu.idle"}}}},
},
ControllerConfig: scraperhelper.ControllerConfig{CollectionInterval: time.Minute, InitialDelay: time.Second},
},
expectedMetricPath: filepath.Join("testdata", "scraper", "metric_not_scraped.yaml"),
},
}

for _, test := range testCases {
Expand Down Expand Up @@ -192,8 +214,16 @@ func Test_WindowsPerfCounterScraper(t *testing.T) {
expectedMetrics, err := golden.ReadMetrics(test.expectedMetricPath)
require.NoError(t, err)

// TODO: Metrics comparison is failing, not verifying the result until that is fixed.
_ = pmetrictest.CompareMetrics(expectedMetrics, actualMetrics, pmetrictest.IgnoreMetricValues())
require.NoError(t, pmetrictest.CompareMetrics(expectedMetrics, actualMetrics,
// Scraping test host means static values, timestamps and instance counts are unreliable. ScopeMetrics order is also unpredictable.
// The check only takes the first instance of multi-instance counters and assumes that the other instances would be included.
pmetrictest.IgnoreSubsequentDataPoints("cpu.idle"),
pmetrictest.IgnoreSubsequentDataPoints("processor.time"),
pmetrictest.IgnoreScopeMetricsOrder(),
pmetrictest.IgnoreResourceMetricsOrder(),
pmetrictest.IgnoreMetricValues(),
pmetrictest.IgnoreTimestamp(),
))
})
}
}
Expand Down Expand Up @@ -473,18 +503,28 @@ func TestScrape(t *testing.T) {
metrics.Sort(func(a, b pmetric.Metric) bool {
return a.Name() < b.Name()
})

assert.Equal(t, len(test.mockPerfCounters)-len(expectedErrors), metrics.Len())

curMetricsNum := 0
for _, pc := range test.cfg.PerfCounters {

for counterIdx, counterCfg := range pc.Counters {
counterValues := test.mockPerfCounters[counterIdx].counterValues
scrapeErr := test.mockPerfCounters[counterIdx].scrapeErr

if scrapeErr != nil {
require.Empty(t, counterValues, "Invalid test case. Scrape error and counter values simultaneously.")
continue // no data for this counter.
}

metric := metrics.At(curMetricsNum)
assert.Equal(t, counterCfg.MetricRep.Name, metric.Name())
metricData := test.cfg.MetricMetaData[counterCfg.MetricRep.Name]
assert.Equal(t, metricData.Description, metric.Description())
assert.Equal(t, metricData.Unit, metric.Unit())
dps := metric.Gauge().DataPoints()

counterValues := test.mockPerfCounters[counterIdx].counterValues
assert.Equal(t, len(counterValues), dps.Len())
for dpIdx, val := range counterValues {
assert.Equal(t, val.Value, dps.At(dpIdx).DoubleValue())
Expand Down

0 comments on commit f34e5ee

Please sign in to comment.