From 263ddc89b44f49ca5527794bf5e50f31d25b57f9 Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Mon, 16 Sep 2024 14:55:21 +0200 Subject: [PATCH 1/7] feat: log conversion errors in prometheusremotewrite plugin With this change, prometheusremotewrite will log the last recorded conversion error in `Serialize` call, if any errors at all. The error might be helpful for user to understand why some of the series were dropped during processing. In the same time, logging only the last error should prevent logs from pollution if too many conversion errors are taking place. See https://github.com/influxdata/telegraf/issues/15782 --- .../prometheusremotewrite.go | 27 +++++- .../prometheusremotewrite_test.go | 93 +++++++++++++++++++ 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go index 255f9e721be58..2c1c9f8d478d0 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go @@ -20,8 +20,9 @@ import ( type MetricKey uint64 type Serializer struct { - SortMetrics bool `toml:"prometheus_sort_metrics"` - StringAsLabel bool `toml:"prometheus_string_as_label"` + SortMetrics bool `toml:"prometheus_sort_metrics"` + StringAsLabel bool `toml:"prometheus_string_as_label"` + Log telegraf.Logger `toml:"-"` } func (s *Serializer) Serialize(metric telegraf.Metric) ([]byte, error) { @@ -30,6 +31,7 @@ func (s *Serializer) Serialize(metric telegraf.Metric) ([]byte, error) { func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { var buf bytes.Buffer + var lastErr error var entries = make(map[MetricKey]prompb.TimeSeries) var labels = make([]prompb.Label, 0) @@ -41,6 +43,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { metricName := prometheus.MetricName(metric.Name(), field.Key, metric.Type()) metricName, ok := prometheus.SanitizeMetricName(metricName) if !ok { + lastErr = fmt.Errorf("failed to parse metric name %q", metricName) continue } @@ -52,6 +55,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case telegraf.Untyped: value, ok := prometheus.SampleValue(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } metrickey, promts = getPromTS(metricName, labels, value, metric.Time()) @@ -78,14 +82,17 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { le, ok := metric.GetTag("le") if !ok { + lastErr = fmt.Errorf("failed to parse %q: can't find `le` label", metricName) continue } bound, err := strconv.ParseFloat(le, 64) if err != nil { + lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %s", metricName, le, err) continue } count, ok := prometheus.SampleCount(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -97,6 +104,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_sum"): sum, ok := prometheus.SampleSum(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -104,6 +112,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_count"): count, ok := prometheus.SampleCount(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -119,6 +128,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { metrickey, promts = getPromTS(metricName+"_count", labels, float64(count), metric.Time()) default: + lastErr = fmt.Errorf("failed to parse %q: series %q should have `_count`, `_sum` or `_bucket` suffix", metricName, field.Key) continue } case telegraf.Summary: @@ -126,6 +136,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_sum"): sum, ok := prometheus.SampleSum(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -133,6 +144,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_count"): count, ok := prometheus.SampleCount(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -140,14 +152,17 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { default: quantileTag, ok := metric.GetTag("quantile") if !ok { + lastErr = fmt.Errorf("failed to parse %q: can't find `quantile` label", metricName) continue } quantile, err := strconv.ParseFloat(quantileTag, 64) if err != nil { + lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %s", metricName, quantileTag, err) continue } value, ok := prometheus.SampleValue(field.Value) if !ok { + lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -162,11 +177,12 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } // A batch of metrics can contain multiple values for a single - // Prometheus sample. If this metric is older than the existing + // Prometheus sample. If this metric is older than the existing // sample then we can skip over it. m, ok := entries[metrickey] if ok { if metric.Time().Before(time.Unix(0, m.Samples[0].Timestamp*1_000_000)) { + lastErr = fmt.Errorf("metric %q has samples with timestamp %v older than already registered before", metric.Name(), metric.Time()) continue } } @@ -174,6 +190,11 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } } + if lastErr != nil && s.Log != nil { + // log only the last recorded error, as it could be too verbose to log every one + s.Log.Errorf("some series were dropped, %d series left to send; last recorded error: %v", len(entries), lastErr) + } + var promTS = make([]prompb.TimeSeries, len(entries)) var i int for _, promts := range entries { diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go index 3c3ba2bb34ad0..7dcccb977009c 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/logger" "github.com/influxdata/telegraf/plugins/serializers" "github.com/influxdata/telegraf/testutil" ) @@ -201,6 +202,98 @@ http_request_duration_seconds_bucket{le="0.5"} 129389 } } +// fakeLogger immitates telegraf.Logger but preserves +// the last recorded message. +type fakeLogger struct { + telegraf.Logger + lastMsg string +} + +// Errorf overrides telegraf.Logger method to store the message in lastMsg. +// lastMsg can be then used for testing the output +func (fl *fakeLogger) Errorf(format string, args ...interface{}) { + fl.lastMsg = fmt.Sprintf(format, args...) +} + +func (fl *fakeLogger) has(msg string) bool { + return strings.Contains(fl.lastMsg, msg) +} + +func TestRemoteWriteSerializeNegative(t *testing.T) { + log := &fakeLogger{Logger: logger.New("", "", "")} + s := &Serializer{Log: log} + + assert := func(msg string, err error) { + t.Helper() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if log.lastMsg == "" { + t.Fatal("expected non-empty last message") + } + if !log.has(msg) { + t.Fatalf("expected to have log message %q; got %q instead", msg, log.lastMsg) + } + // reset log message, so logger can be reused again + log.lastMsg = "" + } + + m := testutil.MustMetric("@@!!", nil, map[string]interface{}{"!!": "@@"}, time.Unix(0, 0)) + _, err := s.Serialize(m) + assert("failed to parse metric name", err) + + m = testutil.MustMetric("prometheus", nil, + map[string]interface{}{ + "http_requests_total": "asd", + }, + time.Unix(0, 0), + ) + _, err = s.Serialize(m) + assert("bad sample", err) + + m = testutil.MustMetric( + "prometheus", + map[string]string{ + "le": "0.5", + }, + map[string]interface{}{ + "http_request_duration_seconds_bucket": "asd", + }, + time.Unix(0, 0), + telegraf.Histogram, + ) + _, err = s.Serialize(m) + assert("bad sample", err) + + m = testutil.MustMetric( + "prometheus", + map[string]string{ + "code": "400", + "method": "post", + }, + map[string]interface{}{ + "http_requests_total": 3.0, + "http_requests_errors_total": "3.0", + }, + time.Unix(0, 0), + telegraf.Gauge, + ) + _, err = s.Serialize(m) + assert("bad sample", err) + + m = testutil.MustMetric( + "prometheus", + map[string]string{"quantile": "0.01a"}, + map[string]interface{}{ + "rpc_duration_seconds": 3102.0, + }, + time.Unix(0, 0), + telegraf.Summary, + ) + _, err = s.Serialize(m) + assert("failed to parse", err) +} + func TestRemoteWriteSerializeBatch(t *testing.T) { tests := []struct { name string From 1054347af4ad9233f20de7815d3d68fef88d6925 Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Mon, 16 Sep 2024 15:48:06 +0200 Subject: [PATCH 2/7] make linter happy --- .../prometheusremotewrite/prometheusremotewrite.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go index 2c1c9f8d478d0..662388cee8383 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go @@ -87,7 +87,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } bound, err := strconv.ParseFloat(le, 64) if err != nil { - lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %s", metricName, le, err) + lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %w", metricName, le, err) continue } count, ok := prometheus.SampleCount(field.Value) @@ -157,7 +157,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } quantile, err := strconv.ParseFloat(quantileTag, 64) if err != nil { - lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %s", metricName, quantileTag, err) + lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %w", metricName, quantileTag, err) continue } value, ok := prometheus.SampleValue(field.Value) From 265065b915a2753a7597139d3886f14e570fd816 Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Tue, 17 Sep 2024 11:45:51 +0200 Subject: [PATCH 3/7] review fixes * rm unnecessary Logger check for nil * use CaptureLogger instead of custom logger in tests --- .../prometheusremotewrite.go | 2 +- .../prometheusremotewrite_test.go | 34 +++++-------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go index 662388cee8383..cbbfa6a70333d 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go @@ -190,7 +190,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } } - if lastErr != nil && s.Log != nil { + if lastErr != nil { // log only the last recorded error, as it could be too verbose to log every one s.Log.Errorf("some series were dropped, %d series left to send; last recorded error: %v", len(entries), lastErr) } diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go index 7dcccb977009c..30f2b52e60624 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go @@ -13,7 +13,6 @@ import ( "github.com/stretchr/testify/require" "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/logger" "github.com/influxdata/telegraf/plugins/serializers" "github.com/influxdata/telegraf/testutil" ) @@ -202,40 +201,25 @@ http_request_duration_seconds_bucket{le="0.5"} 129389 } } -// fakeLogger immitates telegraf.Logger but preserves -// the last recorded message. -type fakeLogger struct { - telegraf.Logger - lastMsg string -} - -// Errorf overrides telegraf.Logger method to store the message in lastMsg. -// lastMsg can be then used for testing the output -func (fl *fakeLogger) Errorf(format string, args ...interface{}) { - fl.lastMsg = fmt.Sprintf(format, args...) -} - -func (fl *fakeLogger) has(msg string) bool { - return strings.Contains(fl.lastMsg, msg) -} - func TestRemoteWriteSerializeNegative(t *testing.T) { - log := &fakeLogger{Logger: logger.New("", "", "")} - s := &Serializer{Log: log} + clog := &testutil.CaptureLogger{} + s := &Serializer{Log: clog} assert := func(msg string, err error) { t.Helper() if err != nil { t.Fatalf("unexpected error: %v", err) } - if log.lastMsg == "" { + + lastMsg := clog.LastError() + if lastMsg == "" { t.Fatal("expected non-empty last message") } - if !log.has(msg) { - t.Fatalf("expected to have log message %q; got %q instead", msg, log.lastMsg) + if !strings.Contains(lastMsg, msg) { + t.Fatalf("expected to have log message %q; got %q instead", msg, lastMsg) } - // reset log message, so logger can be reused again - log.lastMsg = "" + // reset logger so it can be reused again + clog.Clear() } m := testutil.MustMetric("@@!!", nil, map[string]interface{}{"!!": "@@"}, time.Unix(0, 0)) From 347f84c255ccf0f3c81fd52756c4042845ab7253 Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Tue, 17 Sep 2024 12:13:23 +0200 Subject: [PATCH 4/7] add logger object to all tests --- .../prometheusremotewrite/prometheusremotewrite_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go index 30f2b52e60624..1eac455c99f3a 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite_test.go @@ -33,7 +33,7 @@ func BenchmarkRemoteWrite(b *testing.B) { time.Unix(0, 0), ) } - s := &Serializer{} + s := &Serializer{Log: &testutil.CaptureLogger{}} for n := 0; n < b.N; n++ { //nolint:errcheck // Benchmarking so skip the error check to avoid the unnecessary operations s.SerializeBatch(batch) @@ -188,6 +188,7 @@ http_request_duration_seconds_bucket{le="0.5"} 129389 for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { s := &Serializer{ + Log: &testutil.CaptureLogger{}, SortMetrics: true, } data, err := s.Serialize(tt.metric) @@ -756,6 +757,7 @@ rpc_duration_seconds_sum 17560473 for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { s := &Serializer{ + Log: &testutil.CaptureLogger{}, SortMetrics: true, StringAsLabel: tt.stringAsLabel, } @@ -810,7 +812,7 @@ func protoToSamples(req *prompb.WriteRequest) model.Samples { } func BenchmarkSerialize(b *testing.B) { - s := &Serializer{} + s := &Serializer{Log: &testutil.CaptureLogger{}} metrics := serializers.BenchmarkMetrics(b) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -820,7 +822,7 @@ func BenchmarkSerialize(b *testing.B) { } func BenchmarkSerializeBatch(b *testing.B) { - s := &Serializer{} + s := &Serializer{Log: &testutil.CaptureLogger{}} m := serializers.BenchmarkMetrics(b) metrics := m[:] b.ResetTimer() From 9872ec67e93561df8b0ddd85798feadb8007eedc Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Sun, 22 Sep 2024 08:23:45 +0200 Subject: [PATCH 5/7] * add better comments to explain why only last error is logged * log all parsing errors with `trace` level --- .../prometheusremotewrite/README.md | 1 + .../prometheusremotewrite.go | 39 +++++++++++-------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/plugins/serializers/prometheusremotewrite/README.md b/plugins/serializers/prometheusremotewrite/README.md index f44f95203fa46..4191422341df7 100644 --- a/plugins/serializers/prometheusremotewrite/README.md +++ b/plugins/serializers/prometheusremotewrite/README.md @@ -42,3 +42,4 @@ it is not included in the final metric name. Prometheus labels are produced for each tag. **Note:** String fields are ignored and do not produce Prometheus metrics. +Set **log_level** to `trace` to see parsing errors. \ No newline at end of file diff --git a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go index cbbfa6a70333d..3f281eaed75a5 100644 --- a/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go +++ b/plugins/serializers/prometheusremotewrite/prometheusremotewrite.go @@ -30,9 +30,15 @@ func (s *Serializer) Serialize(metric telegraf.Metric) ([]byte, error) { } func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { - var buf bytes.Buffer var lastErr error + // traceAndKeepErr logs on Trace level every passed error. + // with each call it updates lastErr, so it can be logged later with higher level. + traceAndKeepErr := func(format string, a ...any) { + lastErr = fmt.Errorf(format, a...) + s.Log.Trace(lastErr) + } + var buf bytes.Buffer var entries = make(map[MetricKey]prompb.TimeSeries) var labels = make([]prompb.Label, 0) for _, metric := range metrics { @@ -43,7 +49,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { metricName := prometheus.MetricName(metric.Name(), field.Key, metric.Type()) metricName, ok := prometheus.SanitizeMetricName(metricName) if !ok { - lastErr = fmt.Errorf("failed to parse metric name %q", metricName) + traceAndKeepErr("failed to parse metric name %q", metricName) continue } @@ -55,7 +61,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case telegraf.Untyped: value, ok := prometheus.SampleValue(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } metrickey, promts = getPromTS(metricName, labels, value, metric.Time()) @@ -82,17 +88,17 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { le, ok := metric.GetTag("le") if !ok { - lastErr = fmt.Errorf("failed to parse %q: can't find `le` label", metricName) + traceAndKeepErr("failed to parse %q: can't find `le` label", metricName) continue } bound, err := strconv.ParseFloat(le, 64) if err != nil { - lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %w", metricName, le, err) + traceAndKeepErr("failed to parse %q: can't parse %q value: %w", metricName, le, err) continue } count, ok := prometheus.SampleCount(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -104,7 +110,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_sum"): sum, ok := prometheus.SampleSum(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -112,7 +118,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_count"): count, ok := prometheus.SampleCount(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -128,7 +134,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { metrickey, promts = getPromTS(metricName+"_count", labels, float64(count), metric.Time()) default: - lastErr = fmt.Errorf("failed to parse %q: series %q should have `_count`, `_sum` or `_bucket` suffix", metricName, field.Key) + traceAndKeepErr("failed to parse %q: series %q should have `_count`, `_sum` or `_bucket` suffix", metricName, field.Key) continue } case telegraf.Summary: @@ -136,7 +142,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_sum"): sum, ok := prometheus.SampleSum(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -144,7 +150,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { case strings.HasSuffix(field.Key, "_count"): count, ok := prometheus.SampleCount(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -152,17 +158,17 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { default: quantileTag, ok := metric.GetTag("quantile") if !ok { - lastErr = fmt.Errorf("failed to parse %q: can't find `quantile` label", metricName) + traceAndKeepErr("failed to parse %q: can't find `quantile` label", metricName) continue } quantile, err := strconv.ParseFloat(quantileTag, 64) if err != nil { - lastErr = fmt.Errorf("failed to parse %q: can't parse %q value: %w", metricName, quantileTag, err) + traceAndKeepErr("failed to parse %q: can't parse %q value: %w", metricName, quantileTag, err) continue } value, ok := prometheus.SampleValue(field.Value) if !ok { - lastErr = fmt.Errorf("failed to parse %q: bad sample value %#v", metricName, field.Value) + traceAndKeepErr("failed to parse %q: bad sample value %#v", metricName, field.Value) continue } @@ -182,7 +188,7 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { m, ok := entries[metrickey] if ok { if metric.Time().Before(time.Unix(0, m.Samples[0].Timestamp*1_000_000)) { - lastErr = fmt.Errorf("metric %q has samples with timestamp %v older than already registered before", metric.Name(), metric.Time()) + traceAndKeepErr("metric %q has samples with timestamp %v older than already registered before", metric.Name(), metric.Time()) continue } } @@ -191,7 +197,8 @@ func (s *Serializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { } if lastErr != nil { - // log only the last recorded error, as it could be too verbose to log every one + // log only the last recorded error in the batch, as it could have many errors and logging each one + // could be too verbose. The following log line still provides enough info for user to act on. s.Log.Errorf("some series were dropped, %d series left to send; last recorded error: %v", len(entries), lastErr) } From 13d5afa85d599ccc7a7445c38b3b584df123ecac Mon Sep 17 00:00:00 2001 From: hagen1778 Date: Sun, 22 Sep 2024 08:33:22 +0200 Subject: [PATCH 6/7] make linter happy --- plugins/serializers/prometheusremotewrite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/serializers/prometheusremotewrite/README.md b/plugins/serializers/prometheusremotewrite/README.md index 4191422341df7..de9eab4db024d 100644 --- a/plugins/serializers/prometheusremotewrite/README.md +++ b/plugins/serializers/prometheusremotewrite/README.md @@ -42,4 +42,4 @@ it is not included in the final metric name. Prometheus labels are produced for each tag. **Note:** String fields are ignored and do not produce Prometheus metrics. -Set **log_level** to `trace` to see parsing errors. \ No newline at end of file +Set **log_level** to `trace` to see parsing errors. From dcc4e5c5b975d7423da056e7fb82db54d39e8c53 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:59:49 +0200 Subject: [PATCH 7/7] Update plugins/serializers/prometheusremotewrite/README.md --- plugins/serializers/prometheusremotewrite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/serializers/prometheusremotewrite/README.md b/plugins/serializers/prometheusremotewrite/README.md index de9eab4db024d..882f49f314557 100644 --- a/plugins/serializers/prometheusremotewrite/README.md +++ b/plugins/serializers/prometheusremotewrite/README.md @@ -42,4 +42,4 @@ it is not included in the final metric name. Prometheus labels are produced for each tag. **Note:** String fields are ignored and do not produce Prometheus metrics. -Set **log_level** to `trace` to see parsing errors. +Set **log_level** to `trace` to see all serialization issues.