From d73235fc9104fdc10f930c36bcf122c7a5bd162c Mon Sep 17 00:00:00 2001 From: Alex Boten <223565+codeboten@users.noreply.github.com> Date: Mon, 6 May 2024 14:01:35 -0700 Subject: [PATCH] [mdatagen] use mdatagen to produce component internal telemetry (#10054) #### Description This updates mdatagen to generate internal telemetry for components based on metadata.yaml configuration. #### Testing Added tests to mdatagen and updated the batch processor to use this as well for synchronous counters and histogram --------- Signed-off-by: Alex Boten <223565+codeboten@users.noreply.github.com> --- .../codeboten_mdatagen-for-batch-metrics.yaml | 27 ++++++++++ .../internal/metadata/generated_telemetry.go | 33 ++++++++++++ .../internal/samplereceiver/metadata.yaml | 16 ++++++ cmd/mdatagen/loader.go | 11 ++++ cmd/mdatagen/loader_test.go | 26 +++++++-- cmd/mdatagen/metadata-schema.yaml | 33 ++++++++++++ cmd/mdatagen/metricdata.go | 54 +++++++++++++++++-- cmd/mdatagen/templates/telemetry.go.tmpl | 35 ++++++++++++ .../internal/metadata/generated_telemetry.go | 47 ++++++++++++++++ processor/batchprocessor/metadata.yaml | 29 ++++++++++ processor/batchprocessor/metrics.go | 44 +++------------ 11 files changed, 311 insertions(+), 44 deletions(-) create mode 100644 .chloggen/codeboten_mdatagen-for-batch-metrics.yaml diff --git a/.chloggen/codeboten_mdatagen-for-batch-metrics.yaml b/.chloggen/codeboten_mdatagen-for-batch-metrics.yaml new file mode 100644 index 00000000000..219510dfdbb --- /dev/null +++ b/.chloggen/codeboten_mdatagen-for-batch-metrics.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: mdatagen + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: add ability to use metadata.yaml to automatically generate instruments for components + +# One or more tracking issues or pull requests related to the change +issues: [10054] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + The `telemetry` section in metadata.yaml is used to generate + instruments for components to measure telemetry about themselves. + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/cmd/mdatagen/internal/samplereceiver/internal/metadata/generated_telemetry.go b/cmd/mdatagen/internal/samplereceiver/internal/metadata/generated_telemetry.go index 2e9683259cd..d916f8b40bb 100644 --- a/cmd/mdatagen/internal/samplereceiver/internal/metadata/generated_telemetry.go +++ b/cmd/mdatagen/internal/samplereceiver/internal/metadata/generated_telemetry.go @@ -3,6 +3,8 @@ package metadata import ( + "errors" + "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" @@ -16,3 +18,34 @@ func Meter(settings component.TelemetrySettings) metric.Meter { func Tracer(settings component.TelemetrySettings) trace.Tracer { return settings.TracerProvider.Tracer("go.opentelemetry.io/collector/internal/receiver/samplereceiver") } + +// TelemetryBuilder provides an interface for components to report telemetry +// as defined in metadata and user config. +type TelemetryBuilder struct { + BatchSizeTriggerSend metric.Int64Counter + RequestDuration metric.Float64Histogram +} + +// telemetryBuilderOption applies changes to default builder. +type telemetryBuilderOption func(*TelemetryBuilder) + +// NewTelemetryBuilder provides a struct with methods to update all internal telemetry +// for a component +func NewTelemetryBuilder(settings component.TelemetrySettings, options ...telemetryBuilderOption) (*TelemetryBuilder, error) { + builder := TelemetryBuilder{} + var err, errs error + meter := Meter(settings) + builder.BatchSizeTriggerSend, err = meter.Int64Counter( + "batch_size_trigger_send", + metric.WithDescription("Number of times the batch was sent due to a size trigger"), + metric.WithUnit("1"), + ) + errs = errors.Join(errs, err) + builder.RequestDuration, err = meter.Float64Histogram( + "request_duration", + metric.WithDescription("Duration of request"), + metric.WithUnit("s"), + ) + errs = errors.Join(errs, err) + return &builder, errs +} diff --git a/cmd/mdatagen/internal/samplereceiver/metadata.yaml b/cmd/mdatagen/internal/samplereceiver/metadata.yaml index 06d544b18af..9394b54fcc8 100644 --- a/cmd/mdatagen/internal/samplereceiver/metadata.yaml +++ b/cmd/mdatagen/internal/samplereceiver/metadata.yaml @@ -149,3 +149,19 @@ metrics: monotonic: true aggregation_temporality: cumulative attributes: [ string_attr, overridden_int_attr, enum_attr, slice_attr, map_attr ] + +telemetry: + metrics: + batch_size_trigger_send: + enabled: true + description: Number of times the batch was sent due to a size trigger + unit: 1 + sum: + value_type: int + monotonic: true + request_duration: + enabled: true + description: Duration of request + unit: s + histogram: + value_type: double diff --git a/cmd/mdatagen/loader.go b/cmd/mdatagen/loader.go index 20b20d0dc19..4ed517c6613 100644 --- a/cmd/mdatagen/loader.go +++ b/cmd/mdatagen/loader.go @@ -117,6 +117,8 @@ type metric struct { Sum *sum `mapstructure:"sum,omitempty"` // Gauge stores metadata for gauge metric type Gauge *gauge `mapstructure:"gauge,omitempty"` + // Gauge stores metadata for gauge metric type + Histogram *histogram `mapstructure:"histogram,omitempty"` // Attributes is the list of attributes that the metric emits. Attributes []attributeName `mapstructure:"attributes"` @@ -135,6 +137,9 @@ func (m metric) Data() MetricData { if m.Gauge != nil { return m.Gauge } + if m.Histogram != nil { + return m.Histogram + } return nil } @@ -221,6 +226,10 @@ type tests struct { ExpectConsumerError bool `mapstructure:"expect_consumer_error"` } +type telemetry struct { + Metrics map[metricName]metric `mapstructure:"metrics"` +} + type metadata struct { // Type of the component. Type string `mapstructure:"type"` @@ -228,6 +237,8 @@ type metadata struct { Parent string `mapstructure:"parent"` // Status information for the component. Status *Status `mapstructure:"status"` + // Telemetry information for the component. + Telemetry telemetry `mapstructure:"telemetry"` // SemConvVersion is a version number of OpenTelemetry semantic conventions applied to the scraped metrics. SemConvVersion string `mapstructure:"sem_conv_version"` // ResourceAttributes that can be emitted by the component. diff --git a/cmd/mdatagen/loader_test.go b/cmd/mdatagen/loader_test.go index cf3d59e07ce..ee720d8ce54 100644 --- a/cmd/mdatagen/loader_test.go +++ b/cmd/mdatagen/loader_test.go @@ -231,6 +231,27 @@ func TestLoadMetadata(t *testing.T) { Attributes: []attributeName{"string_attr", "overridden_int_attr", "enum_attr", "slice_attr", "map_attr"}, }, }, + Telemetry: telemetry{ + Metrics: map[metricName]metric{ + "batch_size_trigger_send": { + Enabled: true, + Description: "Number of times the batch was sent due to a size trigger", + Unit: strPtr("1"), + Sum: &sum{ + MetricValueType: MetricValueType{pmetric.NumberDataPointValueTypeInt}, + Mono: Mono{Monotonic: true}, + }, + }, + "request_duration": { + Enabled: true, + Description: "Duration of request", + Unit: strPtr("s"), + Histogram: &histogram{ + MetricValueType: MetricValueType{pmetric.NumberDataPointValueTypeDouble}, + }, + }, + }, + }, ScopeName: "go.opentelemetry.io/collector/internal/receiver/samplereceiver", ShortFolderName: "sample", }, @@ -264,11 +285,6 @@ func TestLoadMetadata(t *testing.T) { name: "testdata/unknown_value_type.yaml", wantErr: "1 error(s) decoding:\n\n* error decoding 'metrics[system.cpu.time]': 1 error(s) decoding:\n\n* error decoding 'sum': 1 error(s) decoding:\n\n* error decoding 'value_type': invalid value_type: \"unknown\"", }, - { - name: "testdata/no_aggregation.yaml", - want: metadata{}, - wantErr: "1 error(s) decoding:\n\n* error decoding 'metrics[default.metric]': 1 error(s) decoding:\n\n* error decoding 'sum': missing required field: `aggregation_temporality`", - }, { name: "testdata/invalid_aggregation.yaml", want: metadata{}, diff --git a/cmd/mdatagen/metadata-schema.yaml b/cmd/mdatagen/metadata-schema.yaml index 1363346971e..b2746b2cc33 100644 --- a/cmd/mdatagen/metadata-schema.yaml +++ b/cmd/mdatagen/metadata-schema.yaml @@ -125,3 +125,36 @@ tests: ignore: top: [string] # Optional: array of strings representing functions that should be ignore via IgnoreTopFunction any: [string] # Optional: array of strings representing functions that should be ignore via IgnoreAnyFunction + + +# Optional: map of metric names with the key being the metric name and value +# being described below. +telemetry: + : + # Required: whether the metric is collected by default. + enabled: bool + # Required: metric description. + description: + # Optional: extended documentation of the metric. + extended_documentation: + # Optional: warnings that will be shown to user under specified conditions. + warnings: + # A warning that will be displayed if the metric is enabled in user config. + # Should be used for deprecated default metrics that will be removed soon. + if_enabled: + # A warning that will be displayed if `enabled` field is not set explicitly in user config. + # Should be used for metrics that will be turned from default to optional or vice versa. + if_enabled_not_set: + # A warning that will be displayed if the metrics is configured by user in any way. + # Should be used for deprecated optional metrics that will be removed soon. + if_configured: + # Required: metric unit as defined by https://ucum.org/ucum.html. + unit: + # Required: metric type with its settings. + : + # Required for sum and gauge metrics: type of number data point values. + value_type: + # Required for sum metric: whether the metric is monotonic (no negative delta values). + monotonic: bool + # Optional: array of attributes that were defined in the attributes section that are emitted by this metric. + attributes: [string] diff --git a/cmd/mdatagen/metricdata.go b/cmd/mdatagen/metricdata.go index 5bf77985cc9..aa80904e607 100644 --- a/cmd/mdatagen/metricdata.go +++ b/cmd/mdatagen/metricdata.go @@ -7,6 +7,9 @@ import ( "errors" "fmt" + "golang.org/x/text/cases" + "golang.org/x/text/language" + "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -14,6 +17,7 @@ import ( var ( _ MetricData = &gauge{} _ MetricData = &sum{} + _ MetricData = &histogram{} ) // MetricData is generic interface for all metric datatypes. @@ -22,6 +26,7 @@ type MetricData interface { HasMonotonic() bool HasAggregated() bool HasMetricInputType() bool + Instrument() string } // AggregationTemporality defines a metric aggregation type. @@ -140,6 +145,10 @@ func (d gauge) HasAggregated() bool { return false } +func (d gauge) Instrument() string { + return "" +} + type sum struct { AggregationTemporality `mapstructure:"aggregation_temporality"` Mono `mapstructure:",squash"` @@ -149,9 +158,6 @@ type sum struct { // Unmarshal is a custom unmarshaler for sum. Needed mostly to avoid MetricValueType.Unmarshal inheritance. func (d *sum) Unmarshal(parser *confmap.Conf) error { - if !parser.IsSet("aggregation_temporality") { - return errors.New("missing required field: `aggregation_temporality`") - } if err := d.MetricValueType.Unmarshal(parser); err != nil { return err } @@ -180,3 +186,45 @@ func (d sum) HasMonotonic() bool { func (d sum) HasAggregated() bool { return true } + +func (d sum) Instrument() string { + instrumentName := cases.Title(language.English).String(d.MetricValueType.BasicType()) + + if !d.Monotonic { + instrumentName += "UpDown" + } + instrumentName += "Counter" + return instrumentName +} + +type histogram struct { + AggregationTemporality `mapstructure:"aggregation_temporality"` + Mono `mapstructure:",squash"` + MetricValueType `mapstructure:"value_type"` + MetricInputType `mapstructure:",squash"` +} + +func (d histogram) Type() string { + return "Histogram" +} + +func (d histogram) HasMonotonic() bool { + return true +} + +func (d histogram) HasAggregated() bool { + return true +} + +func (d histogram) Instrument() string { + instrumentName := cases.Title(language.English).String(d.MetricValueType.BasicType()) + return instrumentName + d.Type() +} + +// Unmarshal is a custom unmarshaler for histogram. Needed mostly to avoid MetricValueType.Unmarshal inheritance. +func (d *histogram) Unmarshal(parser *confmap.Conf) error { + if err := d.MetricValueType.Unmarshal(parser); err != nil { + return err + } + return parser.Unmarshal(d, confmap.WithIgnoreUnused()) +} diff --git a/cmd/mdatagen/templates/telemetry.go.tmpl b/cmd/mdatagen/templates/telemetry.go.tmpl index f0b430f3653..5d72cabe316 100644 --- a/cmd/mdatagen/templates/telemetry.go.tmpl +++ b/cmd/mdatagen/templates/telemetry.go.tmpl @@ -3,6 +3,10 @@ package {{ .Package }} import ( + {{- if .Telemetry.Metrics }} + "errors" + {{- end }} + "go.opentelemetry.io/collector/component" "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" @@ -15,3 +19,34 @@ func Meter(settings component.TelemetrySettings) metric.Meter { func Tracer(settings component.TelemetrySettings) trace.Tracer { return settings.TracerProvider.Tracer("{{ .ScopeName }}") } +{{- if .Telemetry.Metrics }} + +// TelemetryBuilder provides an interface for components to report telemetry +// as defined in metadata and user config. +type TelemetryBuilder struct { + {{- range $name, $metric := .Telemetry.Metrics }} + {{ $name.Render }} metric.{{ $metric.Data.Instrument }} + {{- end }} +} + +// telemetryBuilderOption applies changes to default builder. +type telemetryBuilderOption func(*TelemetryBuilder) + +// NewTelemetryBuilder provides a struct with methods to update all internal telemetry +// for a component +func NewTelemetryBuilder(settings component.TelemetrySettings, options ...telemetryBuilderOption) (*TelemetryBuilder, error) { + builder := TelemetryBuilder{} + var err, errs error + meter := Meter(settings) + {{- range $name, $metric := .Telemetry.Metrics }} + builder.{{ $name.Render }}, err = meter.{{ $metric.Data.Instrument }}( + "{{ $name }}", + metric.WithDescription("{{ $metric.Description }}"), + metric.WithUnit("{{ $metric.Unit }}"), + ) + errs = errors.Join(errs, err) + {{- end }} + return &builder, errs +} + +{{- end }} diff --git a/processor/batchprocessor/internal/metadata/generated_telemetry.go b/processor/batchprocessor/internal/metadata/generated_telemetry.go index 9a1991a4795..a35d095dd51 100644 --- a/processor/batchprocessor/internal/metadata/generated_telemetry.go +++ b/processor/batchprocessor/internal/metadata/generated_telemetry.go @@ -3,6 +3,8 @@ package metadata import ( + "errors" + "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" @@ -16,3 +18,48 @@ func Meter(settings component.TelemetrySettings) metric.Meter { func Tracer(settings component.TelemetrySettings) trace.Tracer { return settings.TracerProvider.Tracer("go.opentelemetry.io/collector/processor/batchprocessor") } + +// TelemetryBuilder provides an interface for components to report telemetry +// as defined in metadata and user config. +type TelemetryBuilder struct { + ProcessorBatchBatchSendSize metric.Int64Histogram + ProcessorBatchBatchSendSizeBytes metric.Int64Histogram + ProcessorBatchBatchSizeTriggerSend metric.Int64Counter + ProcessorBatchTimeoutTriggerSend metric.Int64Counter +} + +// telemetryBuilderOption applies changes to default builder. +type telemetryBuilderOption func(*TelemetryBuilder) + +// NewTelemetryBuilder provides a struct with methods to update all internal telemetry +// for a component +func NewTelemetryBuilder(settings component.TelemetrySettings, options ...telemetryBuilderOption) (*TelemetryBuilder, error) { + builder := TelemetryBuilder{} + var err, errs error + meter := Meter(settings) + builder.ProcessorBatchBatchSendSize, err = meter.Int64Histogram( + "processor_batch_batch_send_size", + metric.WithDescription("Number of units in the batch"), + metric.WithUnit("1"), + ) + errs = errors.Join(errs, err) + builder.ProcessorBatchBatchSendSizeBytes, err = meter.Int64Histogram( + "processor_batch_batch_send_size_bytes", + metric.WithDescription("Number of bytes in batch that was sent"), + metric.WithUnit("By"), + ) + errs = errors.Join(errs, err) + builder.ProcessorBatchBatchSizeTriggerSend, err = meter.Int64Counter( + "processor_batch_batch_size_trigger_send", + metric.WithDescription("Number of times the batch was sent due to a size trigger"), + metric.WithUnit("1"), + ) + errs = errors.Join(errs, err) + builder.ProcessorBatchTimeoutTriggerSend, err = meter.Int64Counter( + "processor_batch_timeout_trigger_send", + metric.WithDescription("Number of times the batch was sent due to a timeout trigger"), + metric.WithUnit("1"), + ) + errs = errors.Join(errs, err) + return &builder, errs +} diff --git a/processor/batchprocessor/metadata.yaml b/processor/batchprocessor/metadata.yaml index d96e0f80d9b..986ada2ae27 100644 --- a/processor/batchprocessor/metadata.yaml +++ b/processor/batchprocessor/metadata.yaml @@ -7,3 +7,32 @@ status: distributions: [core, contrib] tests: + +telemetry: + metrics: + processor_batch_batch_size_trigger_send: + enabled: true + description: Number of times the batch was sent due to a size trigger + unit: 1 + sum: + value_type: int + monotonic: true + processor_batch_timeout_trigger_send: + enabled: true + description: Number of times the batch was sent due to a timeout trigger + unit: 1 + sum: + value_type: int + monotonic: true + processor_batch_batch_send_size: + enabled: true + description: Number of units in the batch + unit: 1 + histogram: + value_type: int + processor_batch_batch_send_size_bytes: + enabled: true + description: Number of bytes in batch that was sent + unit: By + histogram: + value_type: int diff --git a/processor/batchprocessor/metrics.go b/processor/batchprocessor/metrics.go index 95fe7f28e16..08d3d9749f3 100644 --- a/processor/batchprocessor/metrics.go +++ b/processor/batchprocessor/metrics.go @@ -34,10 +34,7 @@ type batchProcessorTelemetry struct { exportCtx context.Context processorAttr []attribute.KeyValue - batchSizeTriggerSend metric.Int64Counter - timeoutTriggerSend metric.Int64Counter - batchSendSize metric.Int64Histogram - batchSendSizeBytes metric.Int64Histogram + telemetryBuilder *metadata.TelemetryBuilder batchMetadataCardinality metric.Int64ObservableUpDownCounter } @@ -69,37 +66,12 @@ func (bpt *batchProcessorTelemetry) createOtelMetrics(set component.TelemetrySet meter = noopmetric.Meter{} } - bpt.batchSizeTriggerSend, err = meter.Int64Counter( - processorhelper.BuildCustomMetricName(typeStr, "batch_size_trigger_send"), - metric.WithDescription("Number of times the batch was sent due to a size trigger"), - metric.WithUnit("1"), - ) - errors = multierr.Append(errors, err) - - bpt.timeoutTriggerSend, err = meter.Int64Counter( - processorhelper.BuildCustomMetricName(typeStr, "timeout_trigger_send"), - metric.WithDescription("Number of times the batch was sent due to a timeout trigger"), - metric.WithUnit("1"), - ) - errors = multierr.Append(errors, err) - - bpt.batchSendSize, err = meter.Int64Histogram( - processorhelper.BuildCustomMetricName(typeStr, "batch_send_size"), - metric.WithDescription("Number of units in the batch"), - metric.WithUnit("1"), - ) - errors = multierr.Append(errors, err) - - bpt.batchSendSizeBytes, err = meter.Int64Histogram( - processorhelper.BuildCustomMetricName(typeStr, "batch_send_size_bytes"), - metric.WithDescription("Number of bytes in batch that was sent"), - metric.WithUnit("By"), - ) + bpt.telemetryBuilder, err = metadata.NewTelemetryBuilder(set) errors = multierr.Append(errors, err) bpt.batchMetadataCardinality, err = meter.Int64ObservableUpDownCounter( - processorhelper.BuildCustomMetricName(typeStr, "metadata_cardinality"), - metric.WithDescription("Number of distinct metadata value combinations being processed"), + processorhelper.BuildCustomMetricName(typeStr, ""), + metric.WithDescription(""), metric.WithUnit("1"), metric.WithInt64Callback(func(_ context.Context, obs metric.Int64Observer) error { obs.Observe(int64(currentMetadataCardinality())) @@ -114,13 +86,13 @@ func (bpt *batchProcessorTelemetry) createOtelMetrics(set component.TelemetrySet func (bpt *batchProcessorTelemetry) record(trigger trigger, sent, bytes int64) { switch trigger { case triggerBatchSize: - bpt.batchSizeTriggerSend.Add(bpt.exportCtx, 1, metric.WithAttributes(bpt.processorAttr...)) + bpt.telemetryBuilder.ProcessorBatchBatchSizeTriggerSend.Add(bpt.exportCtx, 1, metric.WithAttributes(bpt.processorAttr...)) case triggerTimeout: - bpt.timeoutTriggerSend.Add(bpt.exportCtx, 1, metric.WithAttributes(bpt.processorAttr...)) + bpt.telemetryBuilder.ProcessorBatchTimeoutTriggerSend.Add(bpt.exportCtx, 1, metric.WithAttributes(bpt.processorAttr...)) } - bpt.batchSendSize.Record(bpt.exportCtx, sent, metric.WithAttributes(bpt.processorAttr...)) + bpt.telemetryBuilder.ProcessorBatchBatchSendSize.Record(bpt.exportCtx, sent, metric.WithAttributes(bpt.processorAttr...)) if bpt.detailed { - bpt.batchSendSizeBytes.Record(bpt.exportCtx, bytes, metric.WithAttributes(bpt.processorAttr...)) + bpt.telemetryBuilder.ProcessorBatchBatchSendSizeBytes.Record(bpt.exportCtx, bytes, metric.WithAttributes(bpt.processorAttr...)) } }