From 1115e9d150bdfe2b779ccc4e4c1a75ff23fb93a7 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 15 Nov 2022 16:02:53 -0500 Subject: [PATCH 01/40] Introduce component status reporting This is an alternate to https://github.com/open-telemetry/opentelemetry-collector/pull/6550 - Add component status concept. Components can report their status via Host.ReportComponentStatus(). Interested extensions can watch status events if they implement StatusWatcher interface. This is similar to how PipelineWatcher works today. - Deprecate Host.ReportFatalError() in favour of Host.ReportComponentStatus(). - healthcheck extension must implement StatusWatcher. - Replace all ReportFatalError() calls by ReportComponentStatus() calls in core and contrib. - StatusWatchers need to be able to tell if all current components are healthy. It is assumed that the listeners need to maintain a map of components and track the status of each component. This works only if we assume that the set of components cannot change during the lifetime of the listener. This assumption is true today but can change in the future if we introduce partial pipeline restarts where only modified/added/removed components are recreated (this will break listener's assumption and the map will become invalid). Should we instead keep track of this entire status map in the Host and broadcast the entire status to the listeners as a whole instead of (or in addition to) individual component events? --- component/componenttest/nop_host.go | 2 + .../componenttest/statuswatcher_extension.go | 61 +++++++++++++ .../componenttest/unhealthy_processor.go | 81 +++++++++++++++++ component/host.go | 9 ++ component/status.go | 90 +++++++++++++++++++ component/status_test.go | 11 +++ otelcol/testdata/otelcol-statuswatcher.yaml | 31 +++++++ service/extensions/extensions.go | 28 +++++- service/host.go | 8 +- service/internal/components/host_wrapper.go | 14 ++- .../internal/components/host_wrapper_test.go | 11 ++- service/internal/servicehost/host.go | 38 ++++++++ service/internal/servicehost/nop_host.go | 45 ++++++++++ 13 files changed, 421 insertions(+), 8 deletions(-) create mode 100644 component/componenttest/statuswatcher_extension.go create mode 100644 component/componenttest/unhealthy_processor.go create mode 100644 component/status.go create mode 100644 component/status_test.go create mode 100644 otelcol/testdata/otelcol-statuswatcher.yaml create mode 100644 service/internal/servicehost/host.go create mode 100644 service/internal/servicehost/nop_host.go diff --git a/component/componenttest/nop_host.go b/component/componenttest/nop_host.go index 4accfab0d8c..04c67080df9 100644 --- a/component/componenttest/nop_host.go +++ b/component/componenttest/nop_host.go @@ -17,6 +17,8 @@ func NewNopHost() component.Host { func (nh *nopHost) ReportFatalError(_ error) {} +func (hw *nopHost) ReportComponentStatus(event *component.StatusEvent) {} + func (nh *nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { return nil } diff --git a/component/componenttest/statuswatcher_extension.go b/component/componenttest/statuswatcher_extension.go new file mode 100644 index 00000000000..ffc01a76296 --- /dev/null +++ b/component/componenttest/statuswatcher_extension.go @@ -0,0 +1,61 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package componenttest // import "go.opentelemetry.io/collector/component/componenttest" + +import ( + "context" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" +) + +// NewStatusWatcherExtensionCreateSettings returns a new nop settings for Create*Extension functions. +func NewStatusWatcherExtensionCreateSettings() component.ExtensionCreateSettings { + return component.ExtensionCreateSettings{ + TelemetrySettings: NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + } +} + +type statusWatcherExtensionConfig struct { + config.ExtensionSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct +} + +// NewStatusWatcherExtensionFactory returns a component.ExtensionFactory that constructs nop extensions. +func NewStatusWatcherExtensionFactory( + onStatusChanged func(source component.StatusSource, event *component.StatusEvent), +) component.ExtensionFactory { + return component.NewExtensionFactory( + "statuswatcher", + func() component.ExtensionConfig { + return &statusWatcherExtensionConfig{ + ExtensionSettings: config.NewExtensionSettings(component.NewID("statuswatcher")), + } + }, + func(context.Context, component.ExtensionCreateSettings, component.ExtensionConfig) (component.Extension, error) { + return &statusWatcherExtension{onStatusChanged: onStatusChanged}, nil + }, + component.StabilityLevelStable) +} + +// statusWatcherExtension stores consumed traces and metrics for testing purposes. +type statusWatcherExtension struct { + nopComponent + onStatusChanged func(source component.StatusSource, event *component.StatusEvent) +} + +func (e statusWatcherExtension) ComponentStatusChanged(source component.StatusSource, event *component.StatusEvent) { + e.onStatusChanged(source, event) +} diff --git a/component/componenttest/unhealthy_processor.go b/component/componenttest/unhealthy_processor.go new file mode 100644 index 00000000000..2a84874fa7b --- /dev/null +++ b/component/componenttest/unhealthy_processor.go @@ -0,0 +1,81 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package componenttest // import "go.opentelemetry.io/collector/component/componenttest" + +import ( + "context" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/consumer/consumertest" +) + +// NewUnhealthyProcessorCreateSettings returns a new nop settings for Create*Processor functions. +func NewUnhealthyProcessorCreateSettings() component.ProcessorCreateSettings { + return component.ProcessorCreateSettings{ + TelemetrySettings: NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + } +} + +type unhealthyProcessorConfig struct { + config.ProcessorSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct +} + +// NewUnhealthyProcessorFactory returns a component.ProcessorFactory that constructs nop processors. +func NewUnhealthyProcessorFactory() component.ProcessorFactory { + return component.NewProcessorFactory( + "unhealthy", + func() component.ProcessorConfig { + return &unhealthyProcessorConfig{ + ProcessorSettings: config.NewProcessorSettings(component.NewID("nop")), + } + }, + component.WithTracesProcessor(createUnhealthyTracesProcessor, component.StabilityLevelStable), + component.WithMetricsProcessor(createUnhealthyMetricsProcessor, component.StabilityLevelStable), + component.WithLogsProcessor(createUnhealthyLogsProcessor, component.StabilityLevelStable), + ) +} + +func createUnhealthyTracesProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Traces) (component.TracesProcessor, error) { + return unhealthyProcessorInstance, nil +} + +func createUnhealthyMetricsProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Metrics) (component.MetricsProcessor, error) { + return unhealthyProcessorInstance, nil +} + +func createUnhealthyLogsProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Logs) (component.LogsProcessor, error) { + return unhealthyProcessorInstance, nil +} + +var unhealthyProcessorInstance = &unhealthyProcessor{ + Consumer: consumertest.NewNop(), +} + +// unhealthyProcessor stores consumed traces and metrics for testing purposes. +type unhealthyProcessor struct { + nopComponent + consumertest.Consumer +} + +func (unhealthyProcessor) Start(ctx context.Context, host component.Host) error { + go func() { + evt, _ := component.NewStatusEvent(component.StatusError) + host.ReportComponentStatus(evt) + }() + return nil +} diff --git a/component/host.go b/component/host.go index ea3825d743f..79e653a19a2 100644 --- a/component/host.go +++ b/component/host.go @@ -12,8 +12,17 @@ type Host interface { // // ReportFatalError should be called by the component anytime after Component.Start() ends and // before Component.Shutdown() begins. + // Deprecated: [0.65.0] Use ReportComponentStatus instead (with an event of type status.ComponentError) ReportFatalError(err error) + // ReportComponentStatus can be used by a component to communicate its status to the Host. + // The Host implementations may broadcast this information to interested parties via + // StatusWatcher interface. + // May be called by the component any time after Component.Start is called or while + // Component.Start call is executing. + // May be called concurrently with itself. + ReportComponentStatus(event *StatusEvent) + // GetFactory of the specified kind. Returns the factory for a component type. // This allows components to create other components. For example: // func (r MyReceiver) Start(host component.Host) error { diff --git a/component/status.go b/component/status.go new file mode 100644 index 00000000000..42de901445f --- /dev/null +++ b/component/status.go @@ -0,0 +1,90 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package component + +import ( + "errors" +) + +type Status int32 + +const ( + StatusOK Status = iota + StatusError +) + +// StatusSource component that reports a status about itself. +// The implementation of this interface must be comparable to be useful as a map key. +type StatusSource interface { + ID() ID +} + +type StatusEvent struct { + status Status + err error +} + +func (ev *StatusEvent) Status() Status { + return ev.status +} + +// Err returns the error associated with the ComponentEvent. +func (ev *StatusEvent) Err() error { + return ev.err +} + +// statusEventOption applies options to a StatusEvent. +type statusEventOption func(*StatusEvent) error + +// WithError sets the error object of the Event. It is optional +// and should only be applied to an Event of type ComponentError. +func WithError(err error) statusEventOption { + return func(o *StatusEvent) error { + if o.status == StatusOK { + return errors.New("event with ComponentOK cannot have an error") + } + o.err = err + return nil + } +} + +// NewStatusEvent creates and returns a StatusEvent with default and provided +// options. Will return an error if an error is provided for a non-error event +// type (status.ComponentOK). +// If the timestamp is not provided will set it to time.Now(). +func NewStatusEvent(status Status, options ...statusEventOption) (*StatusEvent, error) { + ev := StatusEvent{ + status: status, + } + + for _, opt := range options { + if err := opt(&ev); err != nil { + return nil, err + } + } + + return &ev, nil +} + +// StatusWatcher is an extra interface for Extension hosted by the OpenTelemetry +// Collector that is to be implemented by extensions interested in changes to component +// status. +type StatusWatcher interface { + // ComponentStatusChanged notifies about a change in the source component status. + // Extensions that implement this interface must be ready that the ComponentStatusChanged + // may be called before, after or concurrently with Component.Shutdown() call. + // The function may be called concurrently with itself. + ComponentStatusChanged(source StatusSource, event *StatusEvent) +} diff --git a/component/status_test.go b/component/status_test.go new file mode 100644 index 00000000000..65603b04555 --- /dev/null +++ b/component/status_test.go @@ -0,0 +1,11 @@ +package component + +import ( + "fmt" + "testing" + "unsafe" +) + +func TestStatusEventSize(t *testing.T) { + fmt.Printf("StatusEvent size=%d", unsafe.Sizeof(StatusEvent{})) +} diff --git a/otelcol/testdata/otelcol-statuswatcher.yaml b/otelcol/testdata/otelcol-statuswatcher.yaml new file mode 100644 index 00000000000..34e6ea80063 --- /dev/null +++ b/otelcol/testdata/otelcol-statuswatcher.yaml @@ -0,0 +1,31 @@ +receivers: + nop: + +processors: + nop: + unhealthy: + +exporters: + nop: + +extensions: + statuswatcher: + +service: + telemetry: + metrics: + address: localhost:8888 + extensions: [statuswatcher] + pipelines: + traces: + receivers: [nop] + processors: [nop,unhealthy,unhealthy] + exporters: [nop] + metrics: + receivers: [nop] + processors: [nop,unhealthy] + exporters: [nop] + logs: + receivers: [nop] + processors: [nop,unhealthy] + exporters: [nop] diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 54af9d6544c..7d036c6743e 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -15,6 +15,7 @@ import ( "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/service/internal/components" + "go.opentelemetry.io/collector/service/internal/servicehost" "go.opentelemetry.io/collector/service/internal/zpages" ) @@ -26,13 +27,26 @@ type Extensions struct { extMap map[component.ID]extension.Extension } +type statusReportingExtension struct { + id component.ID +} + +func (s *statusReportingExtension) GetKind() component.Kind { + return component.KindExtension +} + +func (s *statusReportingExtension) ID() component.ID { + return s.id +} + // Start starts all extensions. -func (bes *Extensions) Start(ctx context.Context, host component.Host) error { +func (bes *Extensions) Start(ctx context.Context, host servicehost.Host) error { bes.telemetry.Logger.Info("Starting extensions...") for extID, ext := range bes.extMap { extLogger := components.ExtensionLogger(bes.telemetry.Logger, extID) extLogger.Info("Extension is starting...") - if err := ext.Start(ctx, components.NewHostWrapper(host, extLogger)); err != nil { + statusSource := &statusReportingExtension{extID} + if err := ext.Start(ctx, components.NewHostWrapper(host, statusSource, extLogger)); err != nil { return err } extLogger.Info("Extension started.") @@ -84,6 +98,16 @@ func (bes *Extensions) NotifyConfig(ctx context.Context, conf *confmap.Conf) err return errs } +func (bes *Extensions) NotifyComponentStatusChange(source component.StatusSource, event *component.StatusEvent) error { + var errs error + for _, ext := range bes.extMap { + if pw, ok := ext.(component.StatusWatcher); ok { + pw.ComponentStatusChanged(source, event) + } + } + return errs +} + func (bes *Extensions) GetExtensions() map[component.ID]component.Component { result := make(map[component.ID]component.Component, len(bes.extMap)) for extID, v := range bes.extMap { diff --git a/service/host.go b/service/host.go index d216ae94adb..ff0d6a07aa2 100644 --- a/service/host.go +++ b/service/host.go @@ -12,9 +12,10 @@ import ( "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/service/extensions" "go.opentelemetry.io/collector/service/internal/graph" + "go.opentelemetry.io/collector/service/internal/servicehost" ) -var _ component.Host = (*serviceHost)(nil) +var _ servicehost.Host = (*serviceHost)(nil) type serviceHost struct { asyncErrorChannel chan error @@ -33,10 +34,15 @@ type serviceHost struct { // ReportFatalError is used to report to the host that the receiver encountered // a fatal error (i.e.: an error that the instance can't recover from) after // its start function has already returned. +// Deprecated: [0.65.0] Replaced by ReportComponentStatus func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } +func (host *serviceHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { + host.extensions.NotifyComponentStatusChange(source, event) +} + func (host *serviceHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { switch kind { case component.KindReceiver: diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index 2d386ddad67..cd6397164bd 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -9,17 +9,21 @@ import ( "go.uber.org/zap" "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/service/internal/servicehost" ) // hostWrapper adds behavior on top of the component.Host being passed when starting the built components. +// TODO: rename this to componentHost or hostComponentConnector to better reflect the purpose. type hostWrapper struct { - component.Host + servicehost.Host + component component.StatusSource *zap.Logger } -func NewHostWrapper(host component.Host, logger *zap.Logger) component.Host { +func NewHostWrapper(host servicehost.Host, component component.StatusSource, logger *zap.Logger) component.Host { return &hostWrapper{ host, + component, logger, } } @@ -30,6 +34,12 @@ func (hw *hostWrapper) ReportFatalError(err error) { hw.Host.ReportFatalError(err) } +var emptyComponentID = component.ID{} + +func (hw *hostWrapper) ReportComponentStatus(event *component.StatusEvent) { + hw.Host.ReportComponentStatus(hw.component, event) +} + // RegisterZPages is used by zpages extension to register handles from service. // When the wrapper is passed to the extension it won't be successful when casting // the interface, for the time being expose the interface here. diff --git a/service/internal/components/host_wrapper_test.go b/service/internal/components/host_wrapper_test.go index 62b7a744681..282701e6a21 100644 --- a/service/internal/components/host_wrapper_test.go +++ b/service/internal/components/host_wrapper_test.go @@ -7,12 +7,17 @@ import ( "errors" "testing" + "github.com/stretchr/testify/assert" "go.uber.org/zap" - "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/service/internal/servicehost" ) -func Test_newHostWrapper(_ *testing.T) { - hw := NewHostWrapper(componenttest.NewNopHost(), zap.NewNop()) +func Test_newHostWrapper(t *testing.T) { + hw := NewHostWrapper(servicehost.NewNopHost(), nil, zap.NewNop()) hw.ReportFatalError(errors.New("test error")) + ev, err := component.NewStatusEvent(component.StatusOK) + assert.NoError(t, err) + hw.ReportComponentStatus(ev) } diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go new file mode 100644 index 00000000000..754f483e49c --- /dev/null +++ b/service/internal/servicehost/host.go @@ -0,0 +1,38 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package servicehost + +import ( + "go.opentelemetry.io/collector/component" +) + +// Host mirrors component.Host interface, with one important difference: servicehost.Host +// is not associated with a component and thus ReportComponentStatus() requires the source +// component to be explicitly specified. +type Host interface { + // ReportComponentStatus is used to communicate the status of a source component to the Host. + // The Host implementations will broadcast this information to interested parties via + // StatusWatcher interface. + ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) + + // See component.Host for the documentation of the rest of the functions. + + // Deprecated: [0.65.0] Replaced by ReportComponentStatus. + ReportFatalError(err error) + + GetFactory(kind component.Kind, componentType component.Type) component.Factory + GetExtensions() map[component.ID]component.Extension + GetExporters() map[component.DataType]map[component.ID]component.Exporter +} diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go new file mode 100644 index 00000000000..7a5717624fb --- /dev/null +++ b/service/internal/servicehost/nop_host.go @@ -0,0 +1,45 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package servicehost + +import ( + "go.opentelemetry.io/collector/component" +) + +// nopHost mocks a receiver.ReceiverHost for test purposes. +type nopHost struct{} + +func (n nopHost) ReportFatalError(err error) { +} + +func (n nopHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { +} + +func (n nopHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { + return nil +} + +func (n nopHost) GetExtensions() map[component.ID]component.Extension { + return nil +} + +func (n nopHost) GetExporters() map[component.DataType]map[component.ID]component.Exporter { + return nil +} + +// NewNopHost returns a new instance of nopHost with proper defaults for most tests. +func NewNopHost() Host { + return &nopHost{} +} From 897ebf838670c87ec12ef74bb7473558746a13ad Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Mon, 31 Jul 2023 17:11:49 -0700 Subject: [PATCH 02/40] Get things working after rebase This picks up where https://github.com/open-telemetry/opentelemetry-collector/pull/6560 left off. The first step is to get the code introduced in that PR working with the collector as it is today. There were significant changes to how pipelines are built and the component package was split into separate packages based on type (extension, processor, etc). This commit makes the necessary changes to get everything working, likely not in the most ideal way, but it's a start that we can iterate on. --- .../extensiontest}/statuswatcher_extension.go | 30 ++++----- otelcol/collector_test.go | 65 +++++++++++++++++++ otelcol/testdata/otelcol-statuswatcher.yaml | 2 +- .../processortest}/unhealthy_processor.go | 40 +++++------- service/host.go | 2 +- service/internal/graph/graph.go | 60 +++++++++++++++-- service/internal/graph/graph_test.go | 17 ++--- service/internal/servicehost/host.go | 4 +- service/internal/servicehost/nop_host.go | 4 +- service/service.go | 3 +- 10 files changed, 169 insertions(+), 58 deletions(-) rename {component/componenttest => extension/extensiontest}/statuswatcher_extension.go (65%) rename {component/componenttest => processor/processortest}/unhealthy_processor.go (52%) diff --git a/component/componenttest/statuswatcher_extension.go b/extension/extensiontest/statuswatcher_extension.go similarity index 65% rename from component/componenttest/statuswatcher_extension.go rename to extension/extensiontest/statuswatcher_extension.go index ffc01a76296..ee0280bf7c1 100644 --- a/component/componenttest/statuswatcher_extension.go +++ b/extension/extensiontest/statuswatcher_extension.go @@ -12,39 +12,34 @@ // See the License for the specific language governing permissions and // limitations under the License. -package componenttest // import "go.opentelemetry.io/collector/component/componenttest" +package extensiontest // import "go.opentelemetry.io/collector/extension/extensiontest" import ( "context" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/extension" ) // NewStatusWatcherExtensionCreateSettings returns a new nop settings for Create*Extension functions. -func NewStatusWatcherExtensionCreateSettings() component.ExtensionCreateSettings { - return component.ExtensionCreateSettings{ - TelemetrySettings: NewNopTelemetrySettings(), +func NewStatusWatcherExtensionCreateSettings() extension.CreateSettings { + return extension.CreateSettings{ + TelemetrySettings: componenttest.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), } } -type statusWatcherExtensionConfig struct { - config.ExtensionSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct -} - // NewStatusWatcherExtensionFactory returns a component.ExtensionFactory that constructs nop extensions. func NewStatusWatcherExtensionFactory( onStatusChanged func(source component.StatusSource, event *component.StatusEvent), -) component.ExtensionFactory { - return component.NewExtensionFactory( +) extension.Factory { + return extension.NewFactory( "statuswatcher", - func() component.ExtensionConfig { - return &statusWatcherExtensionConfig{ - ExtensionSettings: config.NewExtensionSettings(component.NewID("statuswatcher")), - } + func() component.Config { + return &struct{}{} }, - func(context.Context, component.ExtensionCreateSettings, component.ExtensionConfig) (component.Extension, error) { + func(context.Context, extension.CreateSettings, component.Config) (component.Component, error) { return &statusWatcherExtension{onStatusChanged: onStatusChanged}, nil }, component.StabilityLevelStable) @@ -52,7 +47,8 @@ func NewStatusWatcherExtensionFactory( // statusWatcherExtension stores consumed traces and metrics for testing purposes. type statusWatcherExtension struct { - nopComponent + component.StartFunc + component.ShutdownFunc onStatusChanged func(source component.StatusSource, event *component.StatusEvent) } diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index 1a0ad8d0607..c84eb820ef6 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -19,6 +19,8 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/confmap/converter/expandconverter" + "go.opentelemetry.io/collector/extension/extensiontest" + "go.opentelemetry.io/collector/processor/processortest" ) func TestStateString(t *testing.T) { @@ -151,6 +153,69 @@ func TestCollectorReportError(t *testing.T) { assert.Equal(t, StateClosed, col.GetState()) } +func TestComponentStatusWatcher(t *testing.T) { + factories, err := nopFactories() + assert.NoError(t, err) + + // Use a processor factory that creates "unhealthy" processor: one that + // always reports StatusError after successful Start. + unhealthyProcessorFactory := processortest.NewUnhealthyProcessorFactory() + factories.Processors[unhealthyProcessorFactory.Type()] = unhealthyProcessorFactory + + // Keep track of all status changes in a map. + changedComponents := map[component.StatusSource]component.Status{} + var mux sync.Mutex + onStatusChanged := func(source component.StatusSource, event *component.StatusEvent) { + mux.Lock() + defer mux.Unlock() + changedComponents[source] = event.Status() + } + + // Add a "statuswatcher" extension that will receive notifications when processor + // status changes. + factory := extensiontest.NewStatusWatcherExtensionFactory(onStatusChanged) + factories.Extensions[factory.Type()] = factory + + // Read config from file. This config uses 3 "unhealthy" processors. + validProvider, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-statuswatcher.yaml")})) + require.NoError(t, err) + + // Create a collector + col, err := NewCollector(CollectorSettings{ + BuildInfo: component.NewDefaultBuildInfo(), + Factories: factories, + ConfigProvider: validProvider, + }) + require.NoError(t, err) + + // Start the newly created collector. + wg := startCollector(context.Background(), t, col) + + // The "unhealthy" processors will now begin to asynchronously report StatusError. + // We expect to see these reports. + assert.Eventually(t, func() bool { + mux.Lock() + defer mux.Unlock() + + for k, v := range changedComponents { + // All processors must report a status change with the same ID + assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID()) + // And all must be in StatusError + assert.EqualValues(t, component.StatusError, v) + } + // We have 3 processors with exactly the same ID in otelcol-statuswatcher.yaml + // We must have exactly 3 items in our map. This ensures that the "source" argument + // passed to status change func is unique per instance of source component despite + // components having the same IDs (having same ID for different component instances + // is a normal situation for processors). + return len(changedComponents) == 3 + }, time.Second, time.Millisecond*10) + + col.Shutdown() + wg.Wait() + assert.Equal(t, StateClosed, col.GetState()) +} + func TestCollectorSendSignal(t *testing.T) { factories, err := nopFactories() require.NoError(t, err) diff --git a/otelcol/testdata/otelcol-statuswatcher.yaml b/otelcol/testdata/otelcol-statuswatcher.yaml index 34e6ea80063..2dcc322d341 100644 --- a/otelcol/testdata/otelcol-statuswatcher.yaml +++ b/otelcol/testdata/otelcol-statuswatcher.yaml @@ -19,7 +19,7 @@ service: pipelines: traces: receivers: [nop] - processors: [nop,unhealthy,unhealthy] + processors: [nop,unhealthy] exporters: [nop] metrics: receivers: [nop] diff --git a/component/componenttest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go similarity index 52% rename from component/componenttest/unhealthy_processor.go rename to processor/processortest/unhealthy_processor.go index 2a84874fa7b..64a52d1d570 100644 --- a/component/componenttest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -12,53 +12,48 @@ // See the License for the specific language governing permissions and // limitations under the License. -package componenttest // import "go.opentelemetry.io/collector/component/componenttest" +package processortest // import "go.opentelemetry.io/collector/component/componenttest" import ( "context" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/processor" ) // NewUnhealthyProcessorCreateSettings returns a new nop settings for Create*Processor functions. -func NewUnhealthyProcessorCreateSettings() component.ProcessorCreateSettings { - return component.ProcessorCreateSettings{ - TelemetrySettings: NewNopTelemetrySettings(), +func NewUnhealthyProcessorCreateSettings() processor.CreateSettings { + return processor.CreateSettings{ + TelemetrySettings: componenttest.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), } } -type unhealthyProcessorConfig struct { - config.ProcessorSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct -} - // NewUnhealthyProcessorFactory returns a component.ProcessorFactory that constructs nop processors. -func NewUnhealthyProcessorFactory() component.ProcessorFactory { - return component.NewProcessorFactory( +func NewUnhealthyProcessorFactory() processor.Factory { + return processor.NewFactory( "unhealthy", - func() component.ProcessorConfig { - return &unhealthyProcessorConfig{ - ProcessorSettings: config.NewProcessorSettings(component.NewID("nop")), - } + func() component.Config { + return &struct{}{} }, - component.WithTracesProcessor(createUnhealthyTracesProcessor, component.StabilityLevelStable), - component.WithMetricsProcessor(createUnhealthyMetricsProcessor, component.StabilityLevelStable), - component.WithLogsProcessor(createUnhealthyLogsProcessor, component.StabilityLevelStable), + processor.WithTraces(createUnhealthyTracesProcessor, component.StabilityLevelStable), + processor.WithMetrics(createUnhealthyMetricsProcessor, component.StabilityLevelStable), + processor.WithLogs(createUnhealthyLogsProcessor, component.StabilityLevelStable), ) } -func createUnhealthyTracesProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Traces) (component.TracesProcessor, error) { +func createUnhealthyTracesProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Traces) (processor.Traces, error) { return unhealthyProcessorInstance, nil } -func createUnhealthyMetricsProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Metrics) (component.MetricsProcessor, error) { +func createUnhealthyMetricsProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Metrics) (processor.Metrics, error) { return unhealthyProcessorInstance, nil } -func createUnhealthyLogsProcessor(context.Context, component.ProcessorCreateSettings, component.ProcessorConfig, consumer.Logs) (component.LogsProcessor, error) { +func createUnhealthyLogsProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Logs) (processor.Logs, error) { return unhealthyProcessorInstance, nil } @@ -68,7 +63,8 @@ var unhealthyProcessorInstance = &unhealthyProcessor{ // unhealthyProcessor stores consumed traces and metrics for testing purposes. type unhealthyProcessor struct { - nopComponent + component.StartFunc + component.ShutdownFunc consumertest.Consumer } diff --git a/service/host.go b/service/host.go index ff0d6a07aa2..1f2ef006bf3 100644 --- a/service/host.go +++ b/service/host.go @@ -40,7 +40,7 @@ func (host *serviceHost) ReportFatalError(err error) { } func (host *serviceHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { - host.extensions.NotifyComponentStatusChange(source, event) + host.serviceExtensions.NotifyComponentStatusChange(source, event) } func (host *serviceHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 5b8a404d66f..5f6338ec458 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -10,6 +10,7 @@ import ( "strings" "go.uber.org/multierr" + "go.uber.org/zap" "gonum.org/v1/gonum/graph" "gonum.org/v1/gonum/graph/simple" "gonum.org/v1/gonum/graph/topo" @@ -22,6 +23,8 @@ import ( "go.opentelemetry.io/collector/processor" "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/service/internal/capabilityconsumer" + "go.opentelemetry.io/collector/service/internal/components" + "go.opentelemetry.io/collector/service/internal/servicehost" "go.opentelemetry.io/collector/service/pipelines" ) @@ -45,12 +48,16 @@ type Graph struct { // Keep track of how nodes relate to pipelines, so we can declare edges in the graph. pipelines map[component.ID]*pipelineNodes + + // Keep track of status source per node + statusSources map[int64]*statusReportingComponent } func Build(ctx context.Context, set Settings) (*Graph, error) { pipelines := &Graph{ componentGraph: simple.NewDirectedGraph(), pipelines: make(map[component.ID]*pipelineNodes, len(set.PipelineConfigs)), + statusSources: make(map[int64]*statusReportingComponent), } for pipelineID := range set.PipelineConfigs { pipelines.pipelines[pipelineID] = &pipelineNodes{ @@ -84,12 +91,21 @@ func (g *Graph) createNodes(set Settings) error { } rcvrNode := g.createReceiver(pipelineID.Type(), recvID) pipe.receivers[rcvrNode.ID()] = rcvrNode + g.statusSources[rcvrNode.ID()] = &statusReportingComponent{ + id: recvID, + kind: component.KindReceiver, + } } pipe.capabilitiesNode = newCapabilitiesNode(pipelineID) for _, procID := range pipelineCfg.Processors { - pipe.processors = append(pipe.processors, g.createProcessor(pipelineID, procID)) + procNode := g.createProcessor(pipelineID, procID) + pipe.processors = append(pipe.processors, procNode) + g.statusSources[procNode.ID()] = &statusReportingComponent{ + id: procID, + kind: component.KindProcessor, + } } pipe.fanOutNode = newFanOutNode(pipelineID) @@ -102,6 +118,10 @@ func (g *Graph) createNodes(set Settings) error { } expNode := g.createExporter(pipelineID.Type(), exprID) pipe.exporters[expNode.ID()] = expNode + g.statusSources[expNode.ID()] = &statusReportingComponent{ + id: expNode.componentID, + kind: component.KindExporter, + } } } @@ -158,6 +178,10 @@ func (g *Graph) createNodes(set Settings) error { connNode := g.createConnector(eID, rID, connID) g.pipelines[eID].exporters[connNode.ID()] = connNode g.pipelines[rID].receivers[connNode.ID()] = connNode + g.statusSources[connNode.ID()] = &statusReportingComponent{ + id: connNode.componentID, + kind: component.KindConnector, + } } } } @@ -316,7 +340,20 @@ type pipelineNodes struct { exporters map[int64]graph.Node } -func (g *Graph) StartAll(ctx context.Context, host component.Host) error { +type statusReportingComponent struct { + kind component.Kind + id component.ID +} + +func (s *statusReportingComponent) GetKind() component.Kind { + return s.kind +} + +func (s *statusReportingComponent) ID() component.ID { + return s.id +} + +func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { nodes, err := topo.Sort(g.componentGraph) if err != nil { return err @@ -326,12 +363,27 @@ func (g *Graph) StartAll(ctx context.Context, host component.Host) error { // are started before upstream components. This ensures that each // component's consumer is ready to consume. for i := len(nodes) - 1; i >= 0; i-- { - comp, ok := nodes[i].(component.Component) + node := nodes[i] + comp, ok := node.(component.Component) + if !ok { // Skip capabilities/fanout nodes continue } - if compErr := comp.Start(ctx, host); compErr != nil { + + statusSource, ok := g.statusSources[node.ID()] + + if !ok { + // TODO: this should not happen. I'm not sure this code path will remain, but if it does + // we should ensure that we have a valid nop value for statusSource. + } + + // note: there is no longer a per-component logger, hence the zap.NewNop() + // we should be able to remove the logger from components.NewHostWrapper as we deprecate + // and remove host.ReportFatalError + hostWrapper := components.NewHostWrapper(host, statusSource, zap.NewNop()) + + if compErr := comp.Start(ctx, hostWrapper); compErr != nil { return compErr } } diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index 1279ff145d5..9357f2a3d49 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -27,6 +27,7 @@ import ( "go.opentelemetry.io/collector/processor/processortest" "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/receiver/receivertest" + "go.opentelemetry.io/collector/service/internal/servicehost" "go.opentelemetry.io/collector/service/internal/testcomponents" "go.opentelemetry.io/collector/service/pipelines" ) @@ -146,7 +147,7 @@ func TestGraphStartStop(t *testing.T) { pg.componentGraph.SetEdge(simple.Edge{F: f, T: t}) } - require.NoError(t, pg.StartAll(ctx, componenttest.NewNopHost())) + require.NoError(t, pg.StartAll(ctx, servicehost.NewNopHost())) for _, edge := range tt.edges { assert.Greater(t, ctx.order[edge[0]], ctx.order[edge[1]]) } @@ -173,7 +174,7 @@ func TestGraphStartStopCycle(t *testing.T) { pg.componentGraph.SetEdge(simple.Edge{F: c1, T: e1}) pg.componentGraph.SetEdge(simple.Edge{F: c1, T: p1}) // loop back - err := pg.StartAll(context.Background(), componenttest.NewNopHost()) + err := pg.StartAll(context.Background(), servicehost.NewNopHost()) assert.Error(t, err) assert.Contains(t, err.Error(), `topo: no topological ordering: cyclic components`) @@ -194,7 +195,7 @@ func TestGraphStartStopComponentError(t *testing.T) { shutdownErr: errors.New("bar"), }, }) - assert.EqualError(t, pg.StartAll(context.Background(), componenttest.NewNopHost()), "foo") + assert.EqualError(t, pg.StartAll(context.Background(), servicehost.NewNopHost()), "foo") assert.EqualError(t, pg.ShutdownAll(context.Background()), "bar") } @@ -667,7 +668,7 @@ func TestConnectorPipelinesGraph(t *testing.T) { assert.Equal(t, len(test.pipelineConfigs), len(pg.pipelines)) - assert.NoError(t, pg.StartAll(context.Background(), componenttest.NewNopHost())) + assert.NoError(t, pg.StartAll(context.Background(), servicehost.NewNopHost())) mutatingPipelines := make(map[component.ID]bool, len(test.pipelineConfigs)) @@ -2027,7 +2028,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2041,7 +2042,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2055,7 +2056,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2075,7 +2076,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) } diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go index 754f483e49c..fe8181d24e3 100644 --- a/service/internal/servicehost/host.go +++ b/service/internal/servicehost/host.go @@ -33,6 +33,6 @@ type Host interface { ReportFatalError(err error) GetFactory(kind component.Kind, componentType component.Type) component.Factory - GetExtensions() map[component.ID]component.Extension - GetExporters() map[component.DataType]map[component.ID]component.Exporter + GetExtensions() map[component.ID]component.Component + GetExporters() map[component.DataType]map[component.ID]component.Component } diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go index 7a5717624fb..62c265f2a3e 100644 --- a/service/internal/servicehost/nop_host.go +++ b/service/internal/servicehost/nop_host.go @@ -31,11 +31,11 @@ func (n nopHost) GetFactory(kind component.Kind, componentType component.Type) c return nil } -func (n nopHost) GetExtensions() map[component.ID]component.Extension { +func (n nopHost) GetExtensions() map[component.ID]component.Component { return nil } -func (n nopHost) GetExporters() map[component.DataType]map[component.ID]component.Exporter { +func (n nopHost) GetExporters() map[component.DataType]map[component.ID]component.Component { return nil } diff --git a/service/service.go b/service/service.go index ed9540ec948..c17a029bed1 100644 --- a/service/service.go +++ b/service/service.go @@ -29,6 +29,7 @@ import ( "go.opentelemetry.io/collector/service/extensions" "go.opentelemetry.io/collector/service/internal/graph" "go.opentelemetry.io/collector/service/internal/proctelemetry" + "go.opentelemetry.io/collector/service/internal/servicehost" "go.opentelemetry.io/collector/service/telemetry" ) @@ -234,7 +235,7 @@ func (srv *Service) Logger() *zap.Logger { return srv.telemetrySettings.Logger } -func getBallastSize(host component.Host) uint64 { +func getBallastSize(host servicehost.Host) uint64 { for _, ext := range host.GetExtensions() { if bExt, ok := ext.(interface{ GetBallastSize() uint64 }); ok { return bExt.GetBallastSize() From a98e35b46baa0f7becfc6b7d88b046dd50889589 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 2 Aug 2023 08:38:58 -0700 Subject: [PATCH 03/40] Add changelog --- .chloggen/component-status.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100755 .chloggen/component-status.yaml diff --git a/.chloggen/component-status.yaml b/.chloggen/component-status.yaml new file mode 100755 index 00000000000..f920bb8c897 --- /dev/null +++ b/.chloggen/component-status.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: core + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Adds the ability for components to report status and for extensions to subscribe to status events by implementing an optional StatusWatcher interface. + +# One or more tracking issues or pull requests related to the change +issues: [7682] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: From 1e77eb57046b7ae9aa4b469a60bfd662a04ce787 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 2 Aug 2023 14:02:38 -0700 Subject: [PATCH 04/40] Lint --- component/componenttest/nop_host.go | 2 +- component/status.go | 25 +++++------------ component/status_test.go | 28 ++++++++++++++++--- .../extensiontest/statuswatcher_extension.go | 13 +-------- .../processortest/unhealthy_processor.go | 17 ++--------- service/host.go | 3 +- service/internal/components/host_wrapper.go | 4 +-- service/internal/graph/graph.go | 11 +------- service/internal/servicehost/host.go | 15 ++-------- service/internal/servicehost/nop_host.go | 25 +++++------------ 10 files changed, 50 insertions(+), 93 deletions(-) diff --git a/component/componenttest/nop_host.go b/component/componenttest/nop_host.go index 04c67080df9..649e06884b0 100644 --- a/component/componenttest/nop_host.go +++ b/component/componenttest/nop_host.go @@ -17,7 +17,7 @@ func NewNopHost() component.Host { func (nh *nopHost) ReportFatalError(_ error) {} -func (hw *nopHost) ReportComponentStatus(event *component.StatusEvent) {} +func (nh *nopHost) ReportComponentStatus(_ *component.StatusEvent) {} func (nh *nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { return nil diff --git a/component/status.go b/component/status.go index 42de901445f..d797a16b3e7 100644 --- a/component/status.go +++ b/component/status.go @@ -1,18 +1,7 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 -package component +package component // import "go.opentelemetry.io/collector/component" import ( "errors" @@ -45,12 +34,12 @@ func (ev *StatusEvent) Err() error { return ev.err } -// statusEventOption applies options to a StatusEvent. -type statusEventOption func(*StatusEvent) error +// StatusEventOption applies options to a StatusEvent. +type StatusEventOption func(*StatusEvent) error // WithError sets the error object of the Event. It is optional // and should only be applied to an Event of type ComponentError. -func WithError(err error) statusEventOption { +func WithError(err error) StatusEventOption { return func(o *StatusEvent) error { if o.status == StatusOK { return errors.New("event with ComponentOK cannot have an error") @@ -64,7 +53,7 @@ func WithError(err error) statusEventOption { // options. Will return an error if an error is provided for a non-error event // type (status.ComponentOK). // If the timestamp is not provided will set it to time.Now(). -func NewStatusEvent(status Status, options ...statusEventOption) (*StatusEvent, error) { +func NewStatusEvent(status Status, options ...StatusEventOption) (*StatusEvent, error) { ev := StatusEvent{ status: status, } diff --git a/component/status_test.go b/component/status_test.go index 65603b04555..1faed3e3328 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -1,11 +1,31 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 package component import ( - "fmt" + "errors" "testing" - "unsafe" + + "github.com/stretchr/testify/require" ) -func TestStatusEventSize(t *testing.T) { - fmt.Printf("StatusEvent size=%d", unsafe.Sizeof(StatusEvent{})) +func TestStatusEventOK(t *testing.T) { + event, err := NewStatusEvent(StatusOK) + require.NoError(t, err) + require.Equal(t, StatusOK, event.Status()) + require.Nil(t, event.Err()) +} + +func TestStatusEventOKWithError(t *testing.T) { + event, err := NewStatusEvent(StatusOK, WithError(errors.New("an error"))) + require.Error(t, err) + require.Nil(t, event) +} + +func TestStatusEventError(t *testing.T) { + eventErr := errors.New("an error") + event, err := NewStatusEvent(StatusError, WithError(eventErr)) + require.NoError(t, err) + require.Equal(t, StatusError, event.Status()) + require.Equal(t, eventErr, event.Err()) } diff --git a/extension/extensiontest/statuswatcher_extension.go b/extension/extensiontest/statuswatcher_extension.go index ee0280bf7c1..cba925f4435 100644 --- a/extension/extensiontest/statuswatcher_extension.go +++ b/extension/extensiontest/statuswatcher_extension.go @@ -1,16 +1,5 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// SPDX-License-Identifier: Apache-2.0 package extensiontest // import "go.opentelemetry.io/collector/extension/extensiontest" diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 64a52d1d570..3bf6e86e790 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -1,18 +1,7 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// SPDX-License-Identifier: Apache-2.0 -package processortest // import "go.opentelemetry.io/collector/component/componenttest" +package processortest // import "go.opentelemetry.io/collector/processor/processortest" import ( "context" @@ -68,7 +57,7 @@ type unhealthyProcessor struct { consumertest.Consumer } -func (unhealthyProcessor) Start(ctx context.Context, host component.Host) error { +func (unhealthyProcessor) Start(_ context.Context, host component.Host) error { go func() { evt, _ := component.NewStatusEvent(component.StatusError) host.ReportComponentStatus(evt) diff --git a/service/host.go b/service/host.go index 1f2ef006bf3..1a3d9bba261 100644 --- a/service/host.go +++ b/service/host.go @@ -40,7 +40,8 @@ func (host *serviceHost) ReportFatalError(err error) { } func (host *serviceHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { - host.serviceExtensions.NotifyComponentStatusChange(source, event) + // TODO: What should we do if there is an error notifying here? + host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck } func (host *serviceHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index cd6397164bd..245be162a65 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -31,11 +31,9 @@ func NewHostWrapper(host servicehost.Host, component component.StatusSource, log func (hw *hostWrapper) ReportFatalError(err error) { // The logger from the built component already identifies the component. hw.Logger.Error("Component fatal error", zap.Error(err)) - hw.Host.ReportFatalError(err) + hw.Host.ReportFatalError(err) // nolint:staticcheck } -var emptyComponentID = component.ID{} - func (hw *hostWrapper) ReportComponentStatus(event *component.StatusEvent) { hw.Host.ReportComponentStatus(hw.component, event) } diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 5f6338ec458..41e990295fc 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -371,16 +371,7 @@ func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { continue } - statusSource, ok := g.statusSources[node.ID()] - - if !ok { - // TODO: this should not happen. I'm not sure this code path will remain, but if it does - // we should ensure that we have a valid nop value for statusSource. - } - - // note: there is no longer a per-component logger, hence the zap.NewNop() - // we should be able to remove the logger from components.NewHostWrapper as we deprecate - // and remove host.ReportFatalError + statusSource := g.statusSources[node.ID()] hostWrapper := components.NewHostWrapper(host, statusSource, zap.NewNop()) if compErr := comp.Start(ctx, hostWrapper); compErr != nil { diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go index fe8181d24e3..74406735620 100644 --- a/service/internal/servicehost/host.go +++ b/service/internal/servicehost/host.go @@ -1,18 +1,9 @@ // Copyright The OpenTelemetry Authors // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 -package servicehost +package servicehost // import "go.opentelemetry.io/collector/service/internal/servicehost" import ( "go.opentelemetry.io/collector/component" diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go index 62c265f2a3e..926ea122a5a 100644 --- a/service/internal/servicehost/nop_host.go +++ b/service/internal/servicehost/nop_host.go @@ -1,18 +1,7 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package servicehost +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicehost // import "go.opentelemetry.io/collector/service/internal/servicehost" import ( "go.opentelemetry.io/collector/component" @@ -21,13 +10,13 @@ import ( // nopHost mocks a receiver.ReceiverHost for test purposes. type nopHost struct{} -func (n nopHost) ReportFatalError(err error) { +func (n nopHost) ReportFatalError(_ error) { } -func (n nopHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { +func (n nopHost) ReportComponentStatus(_ component.StatusSource, _ *component.StatusEvent) { } -func (n nopHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { +func (n nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { return nil } From 8cf3ece7406fbaed647015f07dbc3d84eab12b40 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 2 Aug 2023 16:58:35 -0700 Subject: [PATCH 05/40] Implement and use component.GlobalID in place of component.StatusSource This is an implementation based on the thread starting with this comment: https://github.com/open-telemetry/opentelemetry-collector/pull/6560#discussion_r1023337517 --- component/component.go | 7 +++ component/status.go | 8 +--- .../extensiontest/statuswatcher_extension.go | 6 +-- otelcol/collector_test.go | 6 +-- service/extensions/extensions.go | 21 +++------ service/host.go | 2 +- service/internal/components/host_wrapper.go | 4 +- service/internal/graph/graph.go | 46 +++++++------------ service/internal/servicehost/host.go | 2 +- service/internal/servicehost/nop_host.go | 2 +- 10 files changed, 42 insertions(+), 62 deletions(-) diff --git a/component/component.go b/component/component.go index 9a6a95d798a..d58a7ff0954 100644 --- a/component/component.go +++ b/component/component.go @@ -175,3 +175,10 @@ type CreateDefaultConfigFunc func() Config func (f CreateDefaultConfigFunc) CreateDefaultConfig() Config { return f() } + +// GlobalID uniquely identifies a component +type GlobalID struct { + ID ID + Kind Kind + PipelineID ID // Not empty only if the Kind is Processor +} diff --git a/component/status.go b/component/status.go index d797a16b3e7..dcf001ec6ce 100644 --- a/component/status.go +++ b/component/status.go @@ -14,12 +14,6 @@ const ( StatusError ) -// StatusSource component that reports a status about itself. -// The implementation of this interface must be comparable to be useful as a map key. -type StatusSource interface { - ID() ID -} - type StatusEvent struct { status Status err error @@ -75,5 +69,5 @@ type StatusWatcher interface { // Extensions that implement this interface must be ready that the ComponentStatusChanged // may be called before, after or concurrently with Component.Shutdown() call. // The function may be called concurrently with itself. - ComponentStatusChanged(source StatusSource, event *StatusEvent) + ComponentStatusChanged(source *GlobalID, event *StatusEvent) } diff --git a/extension/extensiontest/statuswatcher_extension.go b/extension/extensiontest/statuswatcher_extension.go index cba925f4435..aaa1b358beb 100644 --- a/extension/extensiontest/statuswatcher_extension.go +++ b/extension/extensiontest/statuswatcher_extension.go @@ -21,7 +21,7 @@ func NewStatusWatcherExtensionCreateSettings() extension.CreateSettings { // NewStatusWatcherExtensionFactory returns a component.ExtensionFactory that constructs nop extensions. func NewStatusWatcherExtensionFactory( - onStatusChanged func(source component.StatusSource, event *component.StatusEvent), + onStatusChanged func(source *component.GlobalID, event *component.StatusEvent), ) extension.Factory { return extension.NewFactory( "statuswatcher", @@ -38,9 +38,9 @@ func NewStatusWatcherExtensionFactory( type statusWatcherExtension struct { component.StartFunc component.ShutdownFunc - onStatusChanged func(source component.StatusSource, event *component.StatusEvent) + onStatusChanged func(source *component.GlobalID, event *component.StatusEvent) } -func (e statusWatcherExtension) ComponentStatusChanged(source component.StatusSource, event *component.StatusEvent) { +func (e statusWatcherExtension) ComponentStatusChanged(source *component.GlobalID, event *component.StatusEvent) { e.onStatusChanged(source, event) } diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index c84eb820ef6..a9b31626976 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -163,9 +163,9 @@ func TestComponentStatusWatcher(t *testing.T) { factories.Processors[unhealthyProcessorFactory.Type()] = unhealthyProcessorFactory // Keep track of all status changes in a map. - changedComponents := map[component.StatusSource]component.Status{} + changedComponents := map[*component.GlobalID]component.Status{} var mux sync.Mutex - onStatusChanged := func(source component.StatusSource, event *component.StatusEvent) { + onStatusChanged := func(source *component.GlobalID, event *component.StatusEvent) { mux.Lock() defer mux.Unlock() changedComponents[source] = event.Status() @@ -199,7 +199,7 @@ func TestComponentStatusWatcher(t *testing.T) { for k, v := range changedComponents { // All processors must report a status change with the same ID - assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID()) + assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID) // And all must be in StatusError assert.EqualValues(t, component.StatusError, v) } diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 7d036c6743e..e27fe5248ac 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -27,26 +27,17 @@ type Extensions struct { extMap map[component.ID]extension.Extension } -type statusReportingExtension struct { - id component.ID -} - -func (s *statusReportingExtension) GetKind() component.Kind { - return component.KindExtension -} - -func (s *statusReportingExtension) ID() component.ID { - return s.id -} - // Start starts all extensions. func (bes *Extensions) Start(ctx context.Context, host servicehost.Host) error { bes.telemetry.Logger.Info("Starting extensions...") for extID, ext := range bes.extMap { extLogger := components.ExtensionLogger(bes.telemetry.Logger, extID) extLogger.Info("Extension is starting...") - statusSource := &statusReportingExtension{extID} - if err := ext.Start(ctx, components.NewHostWrapper(host, statusSource, extLogger)); err != nil { + globalID := &component.GlobalID{ + ID: extID, + Kind: component.KindExtension, + } + if err := ext.Start(ctx, components.NewHostWrapper(host, globalID, extLogger)); err != nil { return err } extLogger.Info("Extension started.") @@ -98,7 +89,7 @@ func (bes *Extensions) NotifyConfig(ctx context.Context, conf *confmap.Conf) err return errs } -func (bes *Extensions) NotifyComponentStatusChange(source component.StatusSource, event *component.StatusEvent) error { +func (bes *Extensions) NotifyComponentStatusChange(source *component.GlobalID, event *component.StatusEvent) error { var errs error for _, ext := range bes.extMap { if pw, ok := ext.(component.StatusWatcher); ok { diff --git a/service/host.go b/service/host.go index 1a3d9bba261..dbe188f191d 100644 --- a/service/host.go +++ b/service/host.go @@ -39,7 +39,7 @@ func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } -func (host *serviceHost) ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) { +func (host *serviceHost) ReportComponentStatus(source *component.GlobalID, event *component.StatusEvent) { // TODO: What should we do if there is an error notifying here? host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck } diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index 245be162a65..a020aa5d96e 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -16,11 +16,11 @@ import ( // TODO: rename this to componentHost or hostComponentConnector to better reflect the purpose. type hostWrapper struct { servicehost.Host - component component.StatusSource + component *component.GlobalID *zap.Logger } -func NewHostWrapper(host servicehost.Host, component component.StatusSource, logger *zap.Logger) component.Host { +func NewHostWrapper(host servicehost.Host, component *component.GlobalID, logger *zap.Logger) component.Host { return &hostWrapper{ host, component, diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 41e990295fc..036e5938243 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -50,14 +50,14 @@ type Graph struct { pipelines map[component.ID]*pipelineNodes // Keep track of status source per node - statusSources map[int64]*statusReportingComponent + globalIDs map[int64]*component.GlobalID } func Build(ctx context.Context, set Settings) (*Graph, error) { pipelines := &Graph{ componentGraph: simple.NewDirectedGraph(), pipelines: make(map[component.ID]*pipelineNodes, len(set.PipelineConfigs)), - statusSources: make(map[int64]*statusReportingComponent), + globalIDs: make(map[int64]*component.GlobalID), } for pipelineID := range set.PipelineConfigs { pipelines.pipelines[pipelineID] = &pipelineNodes{ @@ -91,9 +91,9 @@ func (g *Graph) createNodes(set Settings) error { } rcvrNode := g.createReceiver(pipelineID.Type(), recvID) pipe.receivers[rcvrNode.ID()] = rcvrNode - g.statusSources[rcvrNode.ID()] = &statusReportingComponent{ - id: recvID, - kind: component.KindReceiver, + g.globalIDs[rcvrNode.ID()] = &component.GlobalID{ + ID: recvID, + Kind: component.KindReceiver, } } @@ -102,9 +102,10 @@ func (g *Graph) createNodes(set Settings) error { for _, procID := range pipelineCfg.Processors { procNode := g.createProcessor(pipelineID, procID) pipe.processors = append(pipe.processors, procNode) - g.statusSources[procNode.ID()] = &statusReportingComponent{ - id: procID, - kind: component.KindProcessor, + g.globalIDs[procNode.ID()] = &component.GlobalID{ + ID: procID, + Kind: component.KindProcessor, + PipelineID: pipelineID, } } @@ -118,9 +119,9 @@ func (g *Graph) createNodes(set Settings) error { } expNode := g.createExporter(pipelineID.Type(), exprID) pipe.exporters[expNode.ID()] = expNode - g.statusSources[expNode.ID()] = &statusReportingComponent{ - id: expNode.componentID, - kind: component.KindExporter, + g.globalIDs[expNode.ID()] = &component.GlobalID{ + ID: expNode.componentID, + Kind: component.KindExporter, } } } @@ -178,9 +179,9 @@ func (g *Graph) createNodes(set Settings) error { connNode := g.createConnector(eID, rID, connID) g.pipelines[eID].exporters[connNode.ID()] = connNode g.pipelines[rID].receivers[connNode.ID()] = connNode - g.statusSources[connNode.ID()] = &statusReportingComponent{ - id: connNode.componentID, - kind: component.KindConnector, + g.globalIDs[connNode.ID()] = &component.GlobalID{ + ID: connNode.componentID, + Kind: component.KindConnector, } } } @@ -340,19 +341,6 @@ type pipelineNodes struct { exporters map[int64]graph.Node } -type statusReportingComponent struct { - kind component.Kind - id component.ID -} - -func (s *statusReportingComponent) GetKind() component.Kind { - return s.kind -} - -func (s *statusReportingComponent) ID() component.ID { - return s.id -} - func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { nodes, err := topo.Sort(g.componentGraph) if err != nil { @@ -371,8 +359,8 @@ func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { continue } - statusSource := g.statusSources[node.ID()] - hostWrapper := components.NewHostWrapper(host, statusSource, zap.NewNop()) + globalID := g.globalIDs[node.ID()] + hostWrapper := components.NewHostWrapper(host, globalID, zap.NewNop()) if compErr := comp.Start(ctx, hostWrapper); compErr != nil { return compErr diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go index 74406735620..78100565208 100644 --- a/service/internal/servicehost/host.go +++ b/service/internal/servicehost/host.go @@ -16,7 +16,7 @@ type Host interface { // ReportComponentStatus is used to communicate the status of a source component to the Host. // The Host implementations will broadcast this information to interested parties via // StatusWatcher interface. - ReportComponentStatus(source component.StatusSource, event *component.StatusEvent) + ReportComponentStatus(source *component.GlobalID, event *component.StatusEvent) // See component.Host for the documentation of the rest of the functions. diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go index 926ea122a5a..9fd802e4e4c 100644 --- a/service/internal/servicehost/nop_host.go +++ b/service/internal/servicehost/nop_host.go @@ -13,7 +13,7 @@ type nopHost struct{} func (n nopHost) ReportFatalError(_ error) { } -func (n nopHost) ReportComponentStatus(_ component.StatusSource, _ *component.StatusEvent) { +func (n nopHost) ReportComponentStatus(_ *component.GlobalID, _ *component.StatusEvent) { } func (n nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { From 35e703f7caaa68c8b28cb71901144179085a1775 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Tue, 15 Aug 2023 16:48:18 -0700 Subject: [PATCH 06/40] Replace GlobalID with InstanceID --- component/component.go | 10 +++--- component/status.go | 2 +- .../extensiontest/statuswatcher_extension.go | 6 ++-- otelcol/collector_test.go | 4 +-- service/extensions/extensions.go | 10 +++--- service/host.go | 2 +- service/internal/components/host_wrapper.go | 4 +-- service/internal/graph/graph.go | 32 ++++++++++++------- service/internal/servicehost/host.go | 2 +- service/internal/servicehost/nop_host.go | 2 +- 10 files changed, 42 insertions(+), 32 deletions(-) diff --git a/component/component.go b/component/component.go index d58a7ff0954..3b3fe3fc7cf 100644 --- a/component/component.go +++ b/component/component.go @@ -176,9 +176,9 @@ func (f CreateDefaultConfigFunc) CreateDefaultConfig() Config { return f() } -// GlobalID uniquely identifies a component -type GlobalID struct { - ID ID - Kind Kind - PipelineID ID // Not empty only if the Kind is Processor +// InstanceID uniquely identifies a component instance +type InstanceID struct { + ID ID + Kind Kind + PipelineIDs map[ID]struct{} } diff --git a/component/status.go b/component/status.go index dcf001ec6ce..91aed56236b 100644 --- a/component/status.go +++ b/component/status.go @@ -69,5 +69,5 @@ type StatusWatcher interface { // Extensions that implement this interface must be ready that the ComponentStatusChanged // may be called before, after or concurrently with Component.Shutdown() call. // The function may be called concurrently with itself. - ComponentStatusChanged(source *GlobalID, event *StatusEvent) + ComponentStatusChanged(source *InstanceID, event *StatusEvent) } diff --git a/extension/extensiontest/statuswatcher_extension.go b/extension/extensiontest/statuswatcher_extension.go index aaa1b358beb..92ea355839a 100644 --- a/extension/extensiontest/statuswatcher_extension.go +++ b/extension/extensiontest/statuswatcher_extension.go @@ -21,7 +21,7 @@ func NewStatusWatcherExtensionCreateSettings() extension.CreateSettings { // NewStatusWatcherExtensionFactory returns a component.ExtensionFactory that constructs nop extensions. func NewStatusWatcherExtensionFactory( - onStatusChanged func(source *component.GlobalID, event *component.StatusEvent), + onStatusChanged func(source *component.InstanceID, event *component.StatusEvent), ) extension.Factory { return extension.NewFactory( "statuswatcher", @@ -38,9 +38,9 @@ func NewStatusWatcherExtensionFactory( type statusWatcherExtension struct { component.StartFunc component.ShutdownFunc - onStatusChanged func(source *component.GlobalID, event *component.StatusEvent) + onStatusChanged func(source *component.InstanceID, event *component.StatusEvent) } -func (e statusWatcherExtension) ComponentStatusChanged(source *component.GlobalID, event *component.StatusEvent) { +func (e statusWatcherExtension) ComponentStatusChanged(source *component.InstanceID, event *component.StatusEvent) { e.onStatusChanged(source, event) } diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index a9b31626976..17174cd10d6 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -163,9 +163,9 @@ func TestComponentStatusWatcher(t *testing.T) { factories.Processors[unhealthyProcessorFactory.Type()] = unhealthyProcessorFactory // Keep track of all status changes in a map. - changedComponents := map[*component.GlobalID]component.Status{} + changedComponents := map[*component.InstanceID]component.Status{} var mux sync.Mutex - onStatusChanged := func(source *component.GlobalID, event *component.StatusEvent) { + onStatusChanged := func(source *component.InstanceID, event *component.StatusEvent) { mux.Lock() defer mux.Unlock() changedComponents[source] = event.Status() diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index e27fe5248ac..14bc9ee823b 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -33,11 +33,11 @@ func (bes *Extensions) Start(ctx context.Context, host servicehost.Host) error { for extID, ext := range bes.extMap { extLogger := components.ExtensionLogger(bes.telemetry.Logger, extID) extLogger.Info("Extension is starting...") - globalID := &component.GlobalID{ + instanceID := &component.InstanceID{ ID: extID, Kind: component.KindExtension, } - if err := ext.Start(ctx, components.NewHostWrapper(host, globalID, extLogger)); err != nil { + if err := ext.Start(ctx, components.NewHostWrapper(host, instanceID, extLogger)); err != nil { return err } extLogger.Info("Extension started.") @@ -89,11 +89,11 @@ func (bes *Extensions) NotifyConfig(ctx context.Context, conf *confmap.Conf) err return errs } -func (bes *Extensions) NotifyComponentStatusChange(source *component.GlobalID, event *component.StatusEvent) error { +func (bes *Extensions) NotifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) error { var errs error for _, ext := range bes.extMap { - if pw, ok := ext.(component.StatusWatcher); ok { - pw.ComponentStatusChanged(source, event) + if sw, ok := ext.(component.StatusWatcher); ok { + sw.ComponentStatusChanged(source, event) } } return errs diff --git a/service/host.go b/service/host.go index dbe188f191d..c11ce0c3521 100644 --- a/service/host.go +++ b/service/host.go @@ -39,7 +39,7 @@ func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } -func (host *serviceHost) ReportComponentStatus(source *component.GlobalID, event *component.StatusEvent) { +func (host *serviceHost) ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { // TODO: What should we do if there is an error notifying here? host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck } diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index a020aa5d96e..056664673af 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -16,11 +16,11 @@ import ( // TODO: rename this to componentHost or hostComponentConnector to better reflect the purpose. type hostWrapper struct { servicehost.Host - component *component.GlobalID + component *component.InstanceID *zap.Logger } -func NewHostWrapper(host servicehost.Host, component *component.GlobalID, logger *zap.Logger) component.Host { +func NewHostWrapper(host servicehost.Host, component *component.InstanceID, logger *zap.Logger) component.Host { return &hostWrapper{ host, component, diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 036e5938243..71bad2b5acf 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -50,14 +50,14 @@ type Graph struct { pipelines map[component.ID]*pipelineNodes // Keep track of status source per node - globalIDs map[int64]*component.GlobalID + instanceIDs map[int64]*component.InstanceID } func Build(ctx context.Context, set Settings) (*Graph, error) { pipelines := &Graph{ componentGraph: simple.NewDirectedGraph(), pipelines: make(map[component.ID]*pipelineNodes, len(set.PipelineConfigs)), - globalIDs: make(map[int64]*component.GlobalID), + instanceIDs: make(map[int64]*component.InstanceID), } for pipelineID := range set.PipelineConfigs { pipelines.pipelines[pipelineID] = &pipelineNodes{ @@ -91,7 +91,7 @@ func (g *Graph) createNodes(set Settings) error { } rcvrNode := g.createReceiver(pipelineID.Type(), recvID) pipe.receivers[rcvrNode.ID()] = rcvrNode - g.globalIDs[rcvrNode.ID()] = &component.GlobalID{ + g.instanceIDs[rcvrNode.ID()] = &component.InstanceID{ ID: recvID, Kind: component.KindReceiver, } @@ -102,10 +102,12 @@ func (g *Graph) createNodes(set Settings) error { for _, procID := range pipelineCfg.Processors { procNode := g.createProcessor(pipelineID, procID) pipe.processors = append(pipe.processors, procNode) - g.globalIDs[procNode.ID()] = &component.GlobalID{ - ID: procID, - Kind: component.KindProcessor, - PipelineID: pipelineID, + g.instanceIDs[procNode.ID()] = &component.InstanceID{ + ID: procID, + Kind: component.KindProcessor, + PipelineIDs: map[component.ID]struct{}{ + pipelineID: {}, + }, } } @@ -119,9 +121,12 @@ func (g *Graph) createNodes(set Settings) error { } expNode := g.createExporter(pipelineID.Type(), exprID) pipe.exporters[expNode.ID()] = expNode - g.globalIDs[expNode.ID()] = &component.GlobalID{ + g.instanceIDs[expNode.ID()] = &component.InstanceID{ ID: expNode.componentID, Kind: component.KindExporter, + PipelineIDs: map[component.ID]struct{}{ + pipelineID: {}, + }, } } } @@ -177,11 +182,16 @@ func (g *Graph) createNodes(set Settings) error { continue } connNode := g.createConnector(eID, rID, connID) + g.pipelines[eID].exporters[connNode.ID()] = connNode g.pipelines[rID].receivers[connNode.ID()] = connNode - g.globalIDs[connNode.ID()] = &component.GlobalID{ + g.instanceIDs[connNode.ID()] = &component.InstanceID{ ID: connNode.componentID, Kind: component.KindConnector, + PipelineIDs: map[component.ID]struct{}{ + eID: {}, + rID: {}, + }, } } } @@ -359,8 +369,8 @@ func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { continue } - globalID := g.globalIDs[node.ID()] - hostWrapper := components.NewHostWrapper(host, globalID, zap.NewNop()) + instanceID := g.instanceIDs[node.ID()] + hostWrapper := components.NewHostWrapper(host, instanceID, zap.NewNop()) if compErr := comp.Start(ctx, hostWrapper); compErr != nil { return compErr diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go index 78100565208..ffbfeb23054 100644 --- a/service/internal/servicehost/host.go +++ b/service/internal/servicehost/host.go @@ -16,7 +16,7 @@ type Host interface { // ReportComponentStatus is used to communicate the status of a source component to the Host. // The Host implementations will broadcast this information to interested parties via // StatusWatcher interface. - ReportComponentStatus(source *component.GlobalID, event *component.StatusEvent) + ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) // See component.Host for the documentation of the rest of the functions. diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go index 9fd802e4e4c..16f5871ef30 100644 --- a/service/internal/servicehost/nop_host.go +++ b/service/internal/servicehost/nop_host.go @@ -13,7 +13,7 @@ type nopHost struct{} func (n nopHost) ReportFatalError(_ error) { } -func (n nopHost) ReportComponentStatus(_ *component.GlobalID, _ *component.StatusEvent) { +func (n nopHost) ReportComponentStatus(_ *component.InstanceID, _ *component.StatusEvent) { } func (n nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { From 124d20e0cfddb66fa744c935b6942243d190b765 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Mon, 21 Aug 2023 18:27:20 -0700 Subject: [PATCH 07/40] Status implemented as a finite state machine --- component/componenttest/nop_host.go | 2 +- component/host.go | 2 +- component/status.go | 80 +++++++++- component/status_test.go | 78 ++++++++-- otelcol/collector_test.go | 6 +- .../processortest/unhealthy_processor.go | 3 +- service/internal/components/host_wrapper.go | 21 ++- .../internal/components/host_wrapper_test.go | 7 +- service/internal/status/status.go | 111 ++++++++++++++ service/internal/status/status_test.go | 143 ++++++++++++++++++ 10 files changed, 414 insertions(+), 39 deletions(-) create mode 100644 service/internal/status/status.go create mode 100644 service/internal/status/status_test.go diff --git a/component/componenttest/nop_host.go b/component/componenttest/nop_host.go index 649e06884b0..3518d584ac0 100644 --- a/component/componenttest/nop_host.go +++ b/component/componenttest/nop_host.go @@ -17,7 +17,7 @@ func NewNopHost() component.Host { func (nh *nopHost) ReportFatalError(_ error) {} -func (nh *nopHost) ReportComponentStatus(_ *component.StatusEvent) {} +func (nh *nopHost) ReportComponentStatus(_ component.Status, _ ...component.StatusEventOption) {} func (nh *nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { return nil diff --git a/component/host.go b/component/host.go index 79e653a19a2..3939ce57a3f 100644 --- a/component/host.go +++ b/component/host.go @@ -21,7 +21,7 @@ type Host interface { // May be called by the component any time after Component.Start is called or while // Component.Start call is executing. // May be called concurrently with itself. - ReportComponentStatus(event *StatusEvent) + ReportComponentStatus(status Status, options ...StatusEventOption) // GetFactory of the specified kind. Returns the factory for a component type. // This allows components to create other components. For example: diff --git a/component/status.go b/component/status.go index 91aed56236b..cb56812ad64 100644 --- a/component/status.go +++ b/component/status.go @@ -5,44 +5,104 @@ package component // import "go.opentelemetry.io/collector/component" import ( "errors" + "fmt" + "time" ) type Status int32 +// Enumeration of possible component statuses const ( - StatusOK Status = iota - StatusError + StatusStarting Status = iota + StatusOK + StatusRecoverableError + StatusPermanentError + StatusFatalError + StatusStopping + StatusStopped ) +// String returns a string representation of a Status +func (s Status) String() string { + switch s { + case StatusStarting: + return "StatusStarting" + case StatusOK: + return "StatusOK" + case StatusRecoverableError: + return "StatusRecoverableError" + case StatusPermanentError: + return "StatusPermanentError" + case StatusFatalError: + return "StatusFatalError" + case StatusStopping: + return "StatusStopping" + case StatusStopped: + return "StatusStopped" + } + return "StatusUnknown" +} + +// errorStatuses is a set of statuses that can have associated errors +var errorStatuses = map[Status]struct{}{ + StatusRecoverableError: {}, + StatusPermanentError: {}, + StatusFatalError: {}, +} + +// StatusEvent contains a status and timestamp, and can contain an error type StatusEvent struct { - status Status - err error + status Status + err error + timestamp time.Time } +// Status returns the Status (enum) associated with the StatusEvent func (ev *StatusEvent) Status() Status { return ev.status } -// Err returns the error associated with the ComponentEvent. +// Err returns the error associated with the StatusEvent. func (ev *StatusEvent) Err() error { return ev.err } +// Timestamp returns the timestamp associated with the StatusEvent +func (ev *StatusEvent) Timestamp() time.Time { + return ev.timestamp +} + // StatusEventOption applies options to a StatusEvent. type StatusEventOption func(*StatusEvent) error -// WithError sets the error object of the Event. It is optional +// errStatusEventInvalidArgument indicates an invalid option was specified when creating a status +// event. This will happen when using WithError for a non-error status. +var errStatusEventInvalidArgument = errors.New("status event argument error") + +// WithError sets the error object of the StatusEvent. It is optional // and should only be applied to an Event of type ComponentError. func WithError(err error) StatusEventOption { return func(o *StatusEvent) error { - if o.status == StatusOK { - return errors.New("event with ComponentOK cannot have an error") + if _, ok := errorStatuses[o.status]; !ok { + return fmt.Errorf( + "event with %s cannot have an error: %w", + o.status, + errStatusEventInvalidArgument, + ) } o.err = err return nil } } +// WithTimestamp is optional, when used it sets the timestamp of the StatusEvent. +func WithTimestamp(t time.Time) StatusEventOption { + return func(o *StatusEvent) error { + o.timestamp = t + return nil + } +} + // NewStatusEvent creates and returns a StatusEvent with default and provided // options. Will return an error if an error is provided for a non-error event // type (status.ComponentOK). @@ -58,6 +118,10 @@ func NewStatusEvent(status Status, options ...StatusEventOption) (*StatusEvent, } } + if ev.timestamp.IsZero() { + ev.timestamp = time.Now() + } + return &ev, nil } diff --git a/component/status_test.go b/component/status_test.go index 1faed3e3328..44870d3c9dc 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -3,29 +3,77 @@ package component import ( - "errors" + "fmt" "testing" + "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestStatusEventOK(t *testing.T) { - event, err := NewStatusEvent(StatusOK) - require.NoError(t, err) - require.Equal(t, StatusOK, event.Status()) - require.Nil(t, event.Err()) +func TestStatusEventWithoutError(t *testing.T) { + statuses := []Status{ + StatusStarting, + StatusOK, + StatusRecoverableError, + StatusPermanentError, + StatusFatalError, + StatusStopping, + StatusStopped, + } + + for _, status := range statuses { + t.Run(fmt.Sprintf("%s without error", status), func(t *testing.T) { + ev, err := NewStatusEvent(status) + require.NoError(t, err) + require.Equal(t, status, ev.Status()) + require.Nil(t, ev.Err()) + require.False(t, ev.Timestamp().IsZero()) + }) + } +} + +func TestStatusEventWithError(t *testing.T) { + statuses := []Status{ + StatusRecoverableError, + StatusRecoverableError, + StatusFatalError, + } + + for _, status := range statuses { + t.Run(fmt.Sprintf("error status: %s with error", status), func(t *testing.T) { + ev, err := NewStatusEvent(status, WithError(assert.AnError)) + require.NoError(t, err) + require.Equal(t, status, ev.Status()) + require.Equal(t, assert.AnError, ev.Err()) + require.False(t, ev.Timestamp().IsZero()) + }) + } } -func TestStatusEventOKWithError(t *testing.T) { - event, err := NewStatusEvent(StatusOK, WithError(errors.New("an error"))) - require.Error(t, err) - require.Nil(t, event) +func TestNonErrorStatusWithError(t *testing.T) { + statuses := []Status{ + StatusStarting, + StatusOK, + StatusStopping, + StatusStopped, + } + + for _, status := range statuses { + t.Run(fmt.Sprintf("non error status: %s with error", status), func(t *testing.T) { + ev, err := NewStatusEvent(status, WithError(assert.AnError)) + require.Error(t, err) + require.ErrorIs(t, err, errStatusEventInvalidArgument) + require.Nil(t, ev) + }) + } } -func TestStatusEventError(t *testing.T) { - eventErr := errors.New("an error") - event, err := NewStatusEvent(StatusError, WithError(eventErr)) +func TestStatusEventWithTimestamp(t *testing.T) { + ts := time.Now() + ev, err := NewStatusEvent(StatusOK, WithTimestamp(ts)) require.NoError(t, err) - require.Equal(t, StatusError, event.Status()) - require.Equal(t, eventErr, event.Err()) + require.Equal(t, StatusOK, ev.Status()) + require.Nil(t, ev.Err()) + require.Equal(t, ts, ev.Timestamp()) } diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index 17174cd10d6..968e138219b 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -166,6 +166,10 @@ func TestComponentStatusWatcher(t *testing.T) { changedComponents := map[*component.InstanceID]component.Status{} var mux sync.Mutex onStatusChanged := func(source *component.InstanceID, event *component.StatusEvent) { + // skip the startup notifications + if event.Status() == component.StatusStarting { + return + } mux.Lock() defer mux.Unlock() changedComponents[source] = event.Status() @@ -201,7 +205,7 @@ func TestComponentStatusWatcher(t *testing.T) { // All processors must report a status change with the same ID assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID) // And all must be in StatusError - assert.EqualValues(t, component.StatusError, v) + assert.EqualValues(t, component.StatusRecoverableError, v) } // We have 3 processors with exactly the same ID in otelcol-statuswatcher.yaml // We must have exactly 3 items in our map. This ensures that the "source" argument diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 3bf6e86e790..9b6daab115f 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -59,8 +59,7 @@ type unhealthyProcessor struct { func (unhealthyProcessor) Start(_ context.Context, host component.Host) error { go func() { - evt, _ := component.NewStatusEvent(component.StatusError) - host.ReportComponentStatus(evt) + host.ReportComponentStatus(component.StatusRecoverableError) }() return nil } diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index 056664673af..89aae9c89e9 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -10,20 +10,22 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/service/internal/servicehost" + "go.opentelemetry.io/collector/service/internal/status" ) // hostWrapper adds behavior on top of the component.Host being passed when starting the built components. -// TODO: rename this to componentHost or hostComponentConnector to better reflect the purpose. type hostWrapper struct { servicehost.Host - component *component.InstanceID + component *component.InstanceID + statusNotifier status.Notifier *zap.Logger } -func NewHostWrapper(host servicehost.Host, component *component.InstanceID, logger *zap.Logger) component.Host { +func NewHostWrapper(host servicehost.Host, instanceID *component.InstanceID, logger *zap.Logger) component.Host { return &hostWrapper{ host, - component, + instanceID, + status.NewNotifier(host, instanceID), logger, } } @@ -34,8 +36,15 @@ func (hw *hostWrapper) ReportFatalError(err error) { hw.Host.ReportFatalError(err) // nolint:staticcheck } -func (hw *hostWrapper) ReportComponentStatus(event *component.StatusEvent) { - hw.Host.ReportComponentStatus(hw.component, event) +func (hw *hostWrapper) ReportComponentStatus(status component.Status, options ...component.StatusEventOption) { + // The following can return an error. The two cases that would result in an error would be: + // - An invalid state transition + // - Invalid arguments (basically providing a component.WithError option to a non-error status) + // The latter is a programming error and should be corrected. The former, is something that is + // likely to happen, but not something the programmer should be concerned about. An example would be + // reporting StatusRecoverableError multiple times, which, could happen while recovering, however, + // only the first invocation would result in a successful status transition. + _ = hw.statusNotifier.Event(status, options...) } // RegisterZPages is used by zpages extension to register handles from service. diff --git a/service/internal/components/host_wrapper_test.go b/service/internal/components/host_wrapper_test.go index 282701e6a21..194c62c4a2e 100644 --- a/service/internal/components/host_wrapper_test.go +++ b/service/internal/components/host_wrapper_test.go @@ -7,17 +7,14 @@ import ( "errors" "testing" - "github.com/stretchr/testify/assert" "go.uber.org/zap" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/service/internal/servicehost" ) -func Test_newHostWrapper(t *testing.T) { +func Test_newHostWrapper(_ *testing.T) { hw := NewHostWrapper(servicehost.NewNopHost(), nil, zap.NewNop()) hw.ReportFatalError(errors.New("test error")) - ev, err := component.NewStatusEvent(component.StatusOK) - assert.NoError(t, err) - hw.ReportComponentStatus(ev) + hw.ReportComponentStatus(component.StatusOK) } diff --git a/service/internal/status/status.go b/service/internal/status/status.go new file mode 100644 index 00000000000..ac0c6736fd9 --- /dev/null +++ b/service/internal/status/status.go @@ -0,0 +1,111 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package status // import "go.opentelemetry.io/collector/service/internal/status" + +import ( + "errors" + "fmt" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/service/internal/servicehost" +) + +// onTransitionFunc receives a component.StatusEvent on a successful state transition +type onTransitionFunc func(*component.StatusEvent) + +var errInvalidStateTransition = errors.New("invalid state transition") + +// fsm is a finite state machine that models transitions for component status +type fsm struct { + current *component.StatusEvent + transitions map[component.Status]map[component.Status]struct{} + onTransition onTransitionFunc +} + +// Event will attempt to execute a state transition. If successful, it calls the onTransitionFunc +// with a StatusEvent representing the new state. Returns an error if the arguments result in an +// invalid status, or if the state transition is not valid. +func (m *fsm) Event(status component.Status, options ...component.StatusEventOption) error { + if _, ok := m.transitions[m.current.Status()][status]; !ok { + return fmt.Errorf( + "cannot transition from %s to %s: %w", + m.current.Status(), + status, + errInvalidStateTransition, + ) + } + + ev, err := component.NewStatusEvent(status, options...) + if err != nil { + return err + } + + m.current = ev + m.onTransition(ev) + + return nil +} + +// newStatusFSM creates a state machine with all valid transitions for component.Status. +// It sets the initial state to component.StatusStarting and triggers the onTransitionFunc +// for the initial state. +func newStatusFSM(onTransition onTransitionFunc) *fsm { + starting, _ := component.NewStatusEvent(component.StatusStarting) + m := &fsm{ + current: starting, + onTransition: onTransition, + transitions: map[component.Status]map[component.Status]struct{}{ + component.StatusStarting: { + component.StatusOK: {}, + component.StatusRecoverableError: {}, + component.StatusPermanentError: {}, + component.StatusFatalError: {}, + component.StatusStopping: {}, + component.StatusStopped: {}, + }, + component.StatusOK: { + component.StatusRecoverableError: {}, + component.StatusPermanentError: {}, + component.StatusFatalError: {}, + component.StatusStopping: {}, + component.StatusStopped: {}, + }, + component.StatusRecoverableError: { + component.StatusOK: {}, + component.StatusPermanentError: {}, + component.StatusFatalError: {}, + component.StatusStopping: {}, + component.StatusStopped: {}, + }, + component.StatusPermanentError: {}, + component.StatusFatalError: {}, + component.StatusStopping: { + component.StatusRecoverableError: {}, + component.StatusPermanentError: {}, + component.StatusFatalError: {}, + component.StatusStopped: {}, + }, + component.StatusStopped: {}, + }, + } + + // fire initial starting event + m.onTransition(starting) + return m +} + +// A Notifier emits status events +type Notifier interface { + Event(status component.Status, options ...component.StatusEventOption) error +} + +// NewNotifier returns a status.Notifier that reports component status through the given +// servicehost. The underlying implementation is a finite state machine. +func NewNotifier(host servicehost.Host, instanceID *component.InstanceID) Notifier { + return newStatusFSM( + func(ev *component.StatusEvent) { + host.ReportComponentStatus(instanceID, ev) + }, + ) +} diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go new file mode 100644 index 00000000000..b5ba41bf793 --- /dev/null +++ b/service/internal/status/status_test.go @@ -0,0 +1,143 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package status + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/service/internal/servicehost" +) + +func TestStatusFSM(t *testing.T) { + for _, tc := range []struct { + name string + reportedStatuses []component.Status + expectedStatuses []component.Status + expectedErrorCount int + }{ + { + name: "successful startup and shutdown", + reportedStatuses: []component.Status{ + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + }, + { + name: "component recovered", + reportedStatuses: []component.Status{ + component.StatusRecoverableError, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusRecoverableError, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + }, + { + name: "repeated events are errors", + reportedStatuses: []component.Status{ + component.StatusOK, + component.StatusRecoverableError, + component.StatusRecoverableError, + component.StatusRecoverableError, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusRecoverableError, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + expectedErrorCount: 2, + }, + { + name: "PermanentError is terminal", + reportedStatuses: []component.Status{ + component.StatusOK, + component.StatusPermanentError, + component.StatusOK, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusPermanentError, + }, + expectedErrorCount: 1, + }, + { + name: "FatalError is terminal", + reportedStatuses: []component.Status{ + component.StatusOK, + component.StatusFatalError, + component.StatusOK, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusFatalError, + }, + expectedErrorCount: 1, + }, + { + name: "Stopped is terminal", + reportedStatuses: []component.Status{ + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + component.StatusOK, + }, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + expectedErrorCount: 1, + }, + } { + t.Run(tc.name, func(t *testing.T) { + var receivedStatuses []component.Status + fsm := newStatusFSM( + func(ev *component.StatusEvent) { + receivedStatuses = append(receivedStatuses, ev.Status()) + }, + ) + + errorCount := 0 + for _, status := range tc.reportedStatuses { + if err := fsm.Event(status); err != nil { + errorCount++ + require.ErrorIs(t, err, errInvalidStateTransition) + } + } + + require.Equal(t, tc.expectedErrorCount, errorCount) + require.Equal(t, tc.expectedStatuses, receivedStatuses) + }) + } +} + +func TestNewNotifier(t *testing.T) { + notifier := NewNotifier(servicehost.NewNopHost(), &component.InstanceID{}) + require.NoError(t, notifier.Event(component.StatusOK)) +} From dd19267be88b1705d70f1e55e0a841244f62e690 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 23 Aug 2023 17:20:30 -0700 Subject: [PATCH 08/40] Add ReportFatalError behavior to ReportComponentStatus --- service/host.go | 3 +++ service/service_test.go | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/service/host.go b/service/host.go index c11ce0c3521..ced03e39e9c 100644 --- a/service/host.go +++ b/service/host.go @@ -42,6 +42,9 @@ func (host *serviceHost) ReportFatalError(err error) { func (host *serviceHost) ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { // TODO: What should we do if there is an error notifying here? host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck + if event.Status() == component.StatusFatalError { + host.asyncErrorChannel <- event.Err() + } } func (host *serviceHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { diff --git a/service/service_test.go b/service/service_test.go index 16c5fd6f82f..8565743db67 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -414,6 +414,29 @@ func TestServiceTelemetryLogger(t *testing.T) { assert.NotNil(t, srv.telemetrySettings.Logger) } +func TestServiceFatalError(t *testing.T) { + set := newNopSettings() + set.AsyncErrorChannel = make(chan error) + + srv, err := New(context.Background(), set, newNopConfig()) + require.NoError(t, err) + + assert.NoError(t, srv.Start(context.Background())) + t.Cleanup(func() { + assert.NoError(t, srv.Shutdown(context.Background())) + }) + + go func() { + ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) + srv.host.ReportComponentStatus(&component.InstanceID{}, ev) + }() + + err = <-srv.host.asyncErrorChannel + + require.Error(t, err) + require.ErrorIs(t, err, assert.AnError) +} + func assertResourceLabels(t *testing.T, res pcommon.Resource, expectedLabels map[string]labelValue) { for key, labelValue := range expectedLabels { lookupKey, ok := prometheusToOtelConv[key] From 0d954fd9de4f4e0fbc64663e693a69ef1d806f17 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 24 Aug 2023 15:30:35 -0700 Subject: [PATCH 09/40] Improved testing; cleanup --- component/componenttest/nop_host_test.go | 2 + component/host.go | 2 +- component/status.go | 6 +-- component/status_test.go | 2 +- .../statuswatcher_extension_test.go | 38 ++++++++++++++ .../processortest/unhealthy_processor_test.go | 49 +++++++++++++++++++ service/host.go | 4 +- service/internal/components/host_wrapper.go | 9 +++- service/internal/graph/graph.go | 5 +- service/internal/servicehost/nop_host_test.go | 27 ++++++++++ service/internal/status/status_test.go | 11 +++++ 11 files changed, 145 insertions(+), 10 deletions(-) create mode 100644 extension/extensiontest/statuswatcher_extension_test.go create mode 100644 processor/processortest/unhealthy_processor_test.go create mode 100644 service/internal/servicehost/nop_host_test.go diff --git a/component/componenttest/nop_host_test.go b/component/componenttest/nop_host_test.go index 1bcb92d1744..f09d8880b78 100644 --- a/component/componenttest/nop_host_test.go +++ b/component/componenttest/nop_host_test.go @@ -18,7 +18,9 @@ func TestNewNopHost(t *testing.T) { require.NotNil(t, nh) require.IsType(t, &nopHost{}, nh) + nh.ReportComponentStatus(component.StatusOK) nh.ReportFatalError(errors.New("TestError")) + assert.Nil(t, nh.GetExporters()) // nolint: staticcheck assert.Nil(t, nh.GetExtensions()) assert.Nil(t, nh.GetFactory(component.KindReceiver, "test")) diff --git a/component/host.go b/component/host.go index 3939ce57a3f..95f6b587a4c 100644 --- a/component/host.go +++ b/component/host.go @@ -12,7 +12,7 @@ type Host interface { // // ReportFatalError should be called by the component anytime after Component.Start() ends and // before Component.Shutdown() begins. - // Deprecated: [0.65.0] Use ReportComponentStatus instead (with an event of type status.ComponentError) + // Deprecated: [0.65.0] Use ReportComponentStatus instead (with an event component.StatusFatalError) ReportFatalError(err error) // ReportComponentStatus can be used by a component to communicate its status to the Host. diff --git a/component/status.go b/component/status.go index cb56812ad64..7e7fcd9b467 100644 --- a/component/status.go +++ b/component/status.go @@ -75,9 +75,9 @@ func (ev *StatusEvent) Timestamp() time.Time { // StatusEventOption applies options to a StatusEvent. type StatusEventOption func(*StatusEvent) error -// errStatusEventInvalidArgument indicates an invalid option was specified when creating a status +// ErrStatusEventInvalidArgument indicates an invalid option was specified when creating a status // event. This will happen when using WithError for a non-error status. -var errStatusEventInvalidArgument = errors.New("status event argument error") +var ErrStatusEventInvalidArgument = errors.New("status event argument error") // WithError sets the error object of the StatusEvent. It is optional // and should only be applied to an Event of type ComponentError. @@ -87,7 +87,7 @@ func WithError(err error) StatusEventOption { return fmt.Errorf( "event with %s cannot have an error: %w", o.status, - errStatusEventInvalidArgument, + ErrStatusEventInvalidArgument, ) } o.err = err diff --git a/component/status_test.go b/component/status_test.go index 44870d3c9dc..2a4d0728822 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -63,7 +63,7 @@ func TestNonErrorStatusWithError(t *testing.T) { t.Run(fmt.Sprintf("non error status: %s with error", status), func(t *testing.T) { ev, err := NewStatusEvent(status, WithError(assert.AnError)) require.Error(t, err) - require.ErrorIs(t, err, errStatusEventInvalidArgument) + require.ErrorIs(t, err, ErrStatusEventInvalidArgument) require.Nil(t, ev) }) } diff --git a/extension/extensiontest/statuswatcher_extension_test.go b/extension/extensiontest/statuswatcher_extension_test.go new file mode 100644 index 00000000000..16163fbc650 --- /dev/null +++ b/extension/extensiontest/statuswatcher_extension_test.go @@ -0,0 +1,38 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package extensiontest + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" +) + +func TestStatusWatcherExtension(t *testing.T) { + statusChanged := false + factory := NewStatusWatcherExtensionFactory( + func(*component.InstanceID, *component.StatusEvent) { + statusChanged = true + }, + ) + require.NotNil(t, factory) + assert.Equal(t, component.Type("statuswatcher"), factory.Type()) + cfg := factory.CreateDefaultConfig() + assert.Equal(t, &struct{}{}, cfg) + + ext, err := factory.CreateExtension(context.Background(), NewStatusWatcherExtensionCreateSettings(), cfg) + require.NoError(t, err) + assert.NoError(t, ext.Start(context.Background(), componenttest.NewNopHost())) + assert.False(t, statusChanged) + + ext.(component.StatusWatcher).ComponentStatusChanged(&component.InstanceID{}, &component.StatusEvent{}) + + assert.True(t, statusChanged) + assert.NoError(t, ext.Shutdown(context.Background())) +} diff --git a/processor/processortest/unhealthy_processor_test.go b/processor/processortest/unhealthy_processor_test.go new file mode 100644 index 00000000000..adc80322f22 --- /dev/null +++ b/processor/processortest/unhealthy_processor_test.go @@ -0,0 +1,49 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package processortest + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/plog" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +func TestNewUnhealthyProcessorFactory(t *testing.T) { + factory := NewUnhealthyProcessorFactory() + require.NotNil(t, factory) + assert.Equal(t, component.Type("unhealthy"), factory.Type()) + cfg := factory.CreateDefaultConfig() + assert.Equal(t, &struct{}{}, cfg) + + traces, err := factory.CreateTracesProcessor(context.Background(), NewUnhealthyProcessorCreateSettings(), cfg, consumertest.NewNop()) + require.NoError(t, err) + assert.Equal(t, consumer.Capabilities{MutatesData: false}, traces.Capabilities()) + assert.NoError(t, traces.Start(context.Background(), componenttest.NewNopHost())) + assert.NoError(t, traces.ConsumeTraces(context.Background(), ptrace.NewTraces())) + assert.NoError(t, traces.Shutdown(context.Background())) + + metrics, err := factory.CreateMetricsProcessor(context.Background(), NewUnhealthyProcessorCreateSettings(), cfg, consumertest.NewNop()) + require.NoError(t, err) + assert.Equal(t, consumer.Capabilities{MutatesData: false}, metrics.Capabilities()) + assert.NoError(t, metrics.Start(context.Background(), componenttest.NewNopHost())) + assert.NoError(t, metrics.ConsumeMetrics(context.Background(), pmetric.NewMetrics())) + assert.NoError(t, metrics.Shutdown(context.Background())) + + logs, err := factory.CreateLogsProcessor(context.Background(), NewUnhealthyProcessorCreateSettings(), cfg, consumertest.NewNop()) + require.NoError(t, err) + assert.Equal(t, consumer.Capabilities{MutatesData: false}, logs.Capabilities()) + assert.NoError(t, logs.Start(context.Background(), componenttest.NewNopHost())) + assert.NoError(t, logs.ConsumeLogs(context.Background(), plog.NewLogs())) + assert.NoError(t, logs.Shutdown(context.Background())) +} diff --git a/service/host.go b/service/host.go index ced03e39e9c..7c4bcea91b2 100644 --- a/service/host.go +++ b/service/host.go @@ -34,13 +34,13 @@ type serviceHost struct { // ReportFatalError is used to report to the host that the receiver encountered // a fatal error (i.e.: an error that the instance can't recover from) after // its start function has already returned. -// Deprecated: [0.65.0] Replaced by ReportComponentStatus +// Deprecated: [x.x.x] Replaced by ReportComponentStatus func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } func (host *serviceHost) ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { - // TODO: What should we do if there is an error notifying here? + // TODO: What should we do if there is an error returned by a StatusWatcher? host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck if event.Status() == component.StatusFatalError { host.asyncErrorChannel <- event.Err() diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index 89aae9c89e9..dd58c8e5e90 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -4,6 +4,7 @@ package components // import "go.opentelemetry.io/collector/service/internal/components" import ( + "errors" "net/http" "go.uber.org/zap" @@ -37,14 +38,18 @@ func (hw *hostWrapper) ReportFatalError(err error) { } func (hw *hostWrapper) ReportComponentStatus(status component.Status, options ...component.StatusEventOption) { - // The following can return an error. The two cases that would result in an error would be: + // The following can return an error for one of two reasons: // - An invalid state transition // - Invalid arguments (basically providing a component.WithError option to a non-error status) // The latter is a programming error and should be corrected. The former, is something that is // likely to happen, but not something the programmer should be concerned about. An example would be // reporting StatusRecoverableError multiple times, which, could happen while recovering, however, // only the first invocation would result in a successful status transition. - _ = hw.statusNotifier.Event(status, options...) + err := hw.statusNotifier.Event(status, options...) + + if err != nil && errors.Is(err, component.ErrStatusEventInvalidArgument) { + hw.Logger.Error("Component status error", zap.Error(err)) + } } // RegisterZPages is used by zpages extension to register handles from service. diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 71bad2b5acf..a510170632c 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -51,6 +51,8 @@ type Graph struct { // Keep track of status source per node instanceIDs map[int64]*component.InstanceID + + logger *zap.Logger } func Build(ctx context.Context, set Settings) (*Graph, error) { @@ -58,6 +60,7 @@ func Build(ctx context.Context, set Settings) (*Graph, error) { componentGraph: simple.NewDirectedGraph(), pipelines: make(map[component.ID]*pipelineNodes, len(set.PipelineConfigs)), instanceIDs: make(map[int64]*component.InstanceID), + logger: set.Telemetry.Logger, } for pipelineID := range set.PipelineConfigs { pipelines.pipelines[pipelineID] = &pipelineNodes{ @@ -370,7 +373,7 @@ func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { } instanceID := g.instanceIDs[node.ID()] - hostWrapper := components.NewHostWrapper(host, instanceID, zap.NewNop()) + hostWrapper := components.NewHostWrapper(host, instanceID, g.logger) if compErr := comp.Start(ctx, hostWrapper); compErr != nil { return compErr diff --git a/service/internal/servicehost/nop_host_test.go b/service/internal/servicehost/nop_host_test.go new file mode 100644 index 00000000000..5d7d9ba5816 --- /dev/null +++ b/service/internal/servicehost/nop_host_test.go @@ -0,0 +1,27 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicehost + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/collector/component" +) + +func TestNewNopHost(t *testing.T) { + nh := NewNopHost() + require.NotNil(t, nh) + require.IsType(t, &nopHost{}, nh) + + nh.ReportComponentStatus(&component.InstanceID{}, &component.StatusEvent{}) + nh.ReportFatalError(errors.New("TestError")) + + assert.Nil(t, nh.GetExporters()) // nolint: staticcheck + assert.Nil(t, nh.GetExtensions()) + assert.Nil(t, nh.GetFactory(component.KindReceiver, "test")) +} diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index b5ba41bf793..b7992811b8c 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -6,6 +6,7 @@ package status import ( "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" @@ -137,6 +138,16 @@ func TestStatusFSM(t *testing.T) { } } +func TestStatusEventError(t *testing.T) { + fsm := newStatusFSM(func(*component.StatusEvent) {}) + + // the combination of StatusOK with an error is invalid + err := fsm.Event(component.StatusOK, component.WithError(assert.AnError)) + + require.Error(t, err) + require.ErrorIs(t, err, component.ErrStatusEventInvalidArgument) +} + func TestNewNotifier(t *testing.T) { notifier := NewNotifier(servicehost.NewNopHost(), &component.InstanceID{}) require.NoError(t, notifier.Event(component.StatusOK)) From b45540e44bd7ad7d114baa983241f7995d72ca5d Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 30 Aug 2023 16:28:31 -0700 Subject: [PATCH 10/40] Move ReportComponentStatus from Host to TelemetrySettings --- component/componenttest/nop_host_test.go | 1 - component/componenttest/nop_telemetry.go | 11 +++--- component/host.go | 8 ----- component/status.go | 2 ++ component/telemetry.go | 6 +++- .../processortest/unhealthy_processor.go | 31 ++++++++++------- service/extensions/extensions.go | 24 +++++++------ service/extensions/extensions_test.go | 6 ++-- service/host.go | 24 +++++++------ service/internal/components/host_wrapper.go | 26 ++------------ .../internal/components/host_wrapper_test.go | 7 ++-- service/internal/graph/graph.go | 29 +++++++++------- service/internal/graph/graph_test.go | 26 +++++++------- service/internal/servicehost/host.go | 29 ---------------- service/internal/servicehost/nop_host.go | 34 ------------------- service/internal/servicehost/nop_host_test.go | 27 --------------- .../internal/servicetelemetry/nop_settings.go | 26 ++++++++++++++ .../servicetelemetry/nop_settings_test.go | 31 +++++++++++++++++ service/internal/servicetelemetry/settings.go | 25 ++++++++++++++ service/internal/status/status.go | 11 +++--- service/internal/status/status_test.go | 10 ++++-- service/service.go | 11 +++--- service/service_test.go | 31 +++++++++-------- service/telemetry.go | 4 +-- service/telemetry_test.go | 3 +- 25 files changed, 217 insertions(+), 226 deletions(-) delete mode 100644 service/internal/servicehost/host.go delete mode 100644 service/internal/servicehost/nop_host.go delete mode 100644 service/internal/servicehost/nop_host_test.go create mode 100644 service/internal/servicetelemetry/nop_settings.go create mode 100644 service/internal/servicetelemetry/nop_settings_test.go create mode 100644 service/internal/servicetelemetry/settings.go diff --git a/component/componenttest/nop_host_test.go b/component/componenttest/nop_host_test.go index f09d8880b78..f6ca2a0b5f8 100644 --- a/component/componenttest/nop_host_test.go +++ b/component/componenttest/nop_host_test.go @@ -18,7 +18,6 @@ func TestNewNopHost(t *testing.T) { require.NotNil(t, nh) require.IsType(t, &nopHost{}, nh) - nh.ReportComponentStatus(component.StatusOK) nh.ReportFatalError(errors.New("TestError")) assert.Nil(t, nh.GetExporters()) // nolint: staticcheck diff --git a/component/componenttest/nop_telemetry.go b/component/componenttest/nop_telemetry.go index 438f9ec761a..9d04e1b360d 100644 --- a/component/componenttest/nop_telemetry.go +++ b/component/componenttest/nop_telemetry.go @@ -16,10 +16,11 @@ import ( // NewNopTelemetrySettings returns a new nop telemetry settings for Create* functions. func NewNopTelemetrySettings() component.TelemetrySettings { return component.TelemetrySettings{ - Logger: zap.NewNop(), - TracerProvider: trace.NewNoopTracerProvider(), - MeterProvider: noop.NewMeterProvider(), - MetricsLevel: configtelemetry.LevelNone, - Resource: pcommon.NewResource(), + Logger: zap.NewNop(), + TracerProvider: trace.NewNoopTracerProvider(), + MeterProvider: noop.NewMeterProvider(), + MetricsLevel: configtelemetry.LevelNone, + Resource: pcommon.NewResource(), + ReportComponentStatus: func(component.Status, ...component.StatusEventOption) {}, } } diff --git a/component/host.go b/component/host.go index 95f6b587a4c..44526b52d46 100644 --- a/component/host.go +++ b/component/host.go @@ -15,14 +15,6 @@ type Host interface { // Deprecated: [0.65.0] Use ReportComponentStatus instead (with an event component.StatusFatalError) ReportFatalError(err error) - // ReportComponentStatus can be used by a component to communicate its status to the Host. - // The Host implementations may broadcast this information to interested parties via - // StatusWatcher interface. - // May be called by the component any time after Component.Start is called or while - // Component.Start call is executing. - // May be called concurrently with itself. - ReportComponentStatus(status Status, options ...StatusEventOption) - // GetFactory of the specified kind. Returns the factory for a component type. // This allows components to create other components. For example: // func (r MyReceiver) Start(host component.Host) error { diff --git a/component/status.go b/component/status.go index 7e7fcd9b467..3ae5b7af8d2 100644 --- a/component/status.go +++ b/component/status.go @@ -135,3 +135,5 @@ type StatusWatcher interface { // The function may be called concurrently with itself. ComponentStatusChanged(source *InstanceID, event *StatusEvent) } + +type ReportStatusFunc func(Status, ...StatusEventOption) diff --git a/component/telemetry.go b/component/telemetry.go index 9617e456319..1a2c04624ba 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -12,7 +12,7 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" ) -type TelemetrySettings struct { +type TelemetrySettingsBase[T any] struct { // Logger that the factory can use during creation and can pass to the created // component to be used later as well. Logger *zap.Logger @@ -29,4 +29,8 @@ type TelemetrySettings struct { // Resource contains the resource attributes for the collector's telemetry. Resource pcommon.Resource + + ReportComponentStatus T } + +type TelemetrySettings TelemetrySettingsBase[ReportStatusFunc] diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 9b6daab115f..002cfe2e8b5 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -34,32 +34,37 @@ func NewUnhealthyProcessorFactory() processor.Factory { ) } -func createUnhealthyTracesProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Traces) (processor.Traces, error) { - return unhealthyProcessorInstance, nil +func createUnhealthyTracesProcessor(_ context.Context, set processor.CreateSettings, _ component.Config, _ consumer.Traces) (processor.Traces, error) { + return &unhealthyProcessor{ + Consumer: consumertest.NewNop(), + telemetry: set.TelemetrySettings, + }, nil } -func createUnhealthyMetricsProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Metrics) (processor.Metrics, error) { - return unhealthyProcessorInstance, nil +func createUnhealthyMetricsProcessor(_ context.Context, set processor.CreateSettings, _ component.Config, _ consumer.Metrics) (processor.Metrics, error) { + return &unhealthyProcessor{ + Consumer: consumertest.NewNop(), + telemetry: set.TelemetrySettings, + }, nil } -func createUnhealthyLogsProcessor(context.Context, processor.CreateSettings, component.Config, consumer.Logs) (processor.Logs, error) { - return unhealthyProcessorInstance, nil +func createUnhealthyLogsProcessor(_ context.Context, set processor.CreateSettings, _ component.Config, _ consumer.Logs) (processor.Logs, error) { + return &unhealthyProcessor{ + Consumer: consumertest.NewNop(), + telemetry: set.TelemetrySettings, + }, nil } -var unhealthyProcessorInstance = &unhealthyProcessor{ - Consumer: consumertest.NewNop(), -} - -// unhealthyProcessor stores consumed traces and metrics for testing purposes. type unhealthyProcessor struct { component.StartFunc component.ShutdownFunc consumertest.Consumer + telemetry component.TelemetrySettings } -func (unhealthyProcessor) Start(_ context.Context, host component.Host) error { +func (p unhealthyProcessor) Start(_ context.Context, host component.Host) error { go func() { - host.ReportComponentStatus(component.StatusRecoverableError) + p.telemetry.ReportComponentStatus(component.StatusRecoverableError) }() return nil } diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 14bc9ee823b..89ec6a94d89 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -15,7 +15,7 @@ import ( "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/service/internal/components" - "go.opentelemetry.io/collector/service/internal/servicehost" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/internal/zpages" ) @@ -23,21 +23,17 @@ const zExtensionName = "zextensionname" // Extensions is a map of extensions created from extension configs. type Extensions struct { - telemetry component.TelemetrySettings + telemetry servicetelemetry.Settings extMap map[component.ID]extension.Extension } // Start starts all extensions. -func (bes *Extensions) Start(ctx context.Context, host servicehost.Host) error { +func (bes *Extensions) Start(ctx context.Context, host component.Host) error { bes.telemetry.Logger.Info("Starting extensions...") for extID, ext := range bes.extMap { extLogger := components.ExtensionLogger(bes.telemetry.Logger, extID) extLogger.Info("Extension is starting...") - instanceID := &component.InstanceID{ - ID: extID, - Kind: component.KindExtension, - } - if err := ext.Start(ctx, components.NewHostWrapper(host, instanceID, extLogger)); err != nil { + if err := ext.Start(ctx, components.NewHostWrapper(host, extLogger)); err != nil { return err } extLogger.Info("Extension started.") @@ -135,7 +131,7 @@ func (bes *Extensions) HandleZPages(w http.ResponseWriter, r *http.Request) { // Settings holds configuration for building Extensions. type Settings struct { - Telemetry component.TelemetrySettings + Telemetry servicetelemetry.Settings BuildInfo component.BuildInfo // Extensions builder for extensions. @@ -149,9 +145,17 @@ func New(ctx context.Context, set Settings, cfg Config) (*Extensions, error) { extMap: make(map[component.ID]extension.Extension), } for _, extID := range cfg { + + instanceID := &component.InstanceID{ + ID: extID, + Kind: component.KindExtension, + } + + telSet := set.Telemetry.ToComponentTelemetrySettings(instanceID) + extSet := extension.CreateSettings{ ID: extID, - TelemetrySettings: set.Telemetry, + TelemetrySettings: telSet, BuildInfo: set.BuildInfo, } extSet.TelemetrySettings.Logger = components.ExtensionLogger(set.Telemetry.Logger, extID) diff --git a/service/extensions/extensions_test.go b/service/extensions/extensions_test.go index cbb3dd5238d..dc4c4f82b95 100644 --- a/service/extensions/extensions_test.go +++ b/service/extensions/extensions_test.go @@ -12,10 +12,10 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/extension/extensiontest" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" ) func TestBuildExtensions(t *testing.T) { @@ -81,7 +81,7 @@ func TestBuildExtensions(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, err := New(context.Background(), Settings{ - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), BuildInfo: component.NewDefaultBuildInfo(), Extensions: extension.NewBuilder(tt.extensionsConfigs, tt.factories), }, tt.config) @@ -167,7 +167,7 @@ func TestNotifyConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { extensions, err := New(context.Background(), Settings{ - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), BuildInfo: component.NewDefaultBuildInfo(), Extensions: extension.NewBuilder(tt.extensionsConfigs, tt.factories), }, tt.serviceExtensions) diff --git a/service/host.go b/service/host.go index 7c4bcea91b2..8abe167570d 100644 --- a/service/host.go +++ b/service/host.go @@ -12,10 +12,9 @@ import ( "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/service/extensions" "go.opentelemetry.io/collector/service/internal/graph" - "go.opentelemetry.io/collector/service/internal/servicehost" ) -var _ servicehost.Host = (*serviceHost)(nil) +var _ component.Host = (*serviceHost)(nil) type serviceHost struct { asyncErrorChannel chan error @@ -38,15 +37,6 @@ type serviceHost struct { func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } - -func (host *serviceHost) ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { - // TODO: What should we do if there is an error returned by a StatusWatcher? - host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck - if event.Status() == component.StatusFatalError { - host.asyncErrorChannel <- event.Err() - } -} - func (host *serviceHost) GetFactory(kind component.Kind, componentType component.Type) component.Factory { switch kind { case component.KindReceiver: @@ -76,3 +66,15 @@ func (host *serviceHost) GetExtensions() map[component.ID]component.Component { func (host *serviceHost) GetExporters() map[component.DataType]map[component.ID]component.Component { return host.pipelines.GetExporters() } + +func (host *serviceHost) reportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { + // TODO: What should we do if there is an error returned by a StatusWatcher? + if host.serviceExtensions == nil { + // TODO: remove this temporary workaround + return + } + host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck + if event.Status() == component.StatusFatalError { + host.asyncErrorChannel <- event.Err() + } +} diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index dd58c8e5e90..07ff2a741d7 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -4,29 +4,22 @@ package components // import "go.opentelemetry.io/collector/service/internal/components" import ( - "errors" "net/http" "go.uber.org/zap" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/service/internal/servicehost" - "go.opentelemetry.io/collector/service/internal/status" ) // hostWrapper adds behavior on top of the component.Host being passed when starting the built components. type hostWrapper struct { - servicehost.Host - component *component.InstanceID - statusNotifier status.Notifier + component.Host *zap.Logger } -func NewHostWrapper(host servicehost.Host, instanceID *component.InstanceID, logger *zap.Logger) component.Host { +func NewHostWrapper(host component.Host, logger *zap.Logger) component.Host { return &hostWrapper{ host, - instanceID, - status.NewNotifier(host, instanceID), logger, } } @@ -37,21 +30,6 @@ func (hw *hostWrapper) ReportFatalError(err error) { hw.Host.ReportFatalError(err) // nolint:staticcheck } -func (hw *hostWrapper) ReportComponentStatus(status component.Status, options ...component.StatusEventOption) { - // The following can return an error for one of two reasons: - // - An invalid state transition - // - Invalid arguments (basically providing a component.WithError option to a non-error status) - // The latter is a programming error and should be corrected. The former, is something that is - // likely to happen, but not something the programmer should be concerned about. An example would be - // reporting StatusRecoverableError multiple times, which, could happen while recovering, however, - // only the first invocation would result in a successful status transition. - err := hw.statusNotifier.Event(status, options...) - - if err != nil && errors.Is(err, component.ErrStatusEventInvalidArgument) { - hw.Logger.Error("Component status error", zap.Error(err)) - } -} - // RegisterZPages is used by zpages extension to register handles from service. // When the wrapper is passed to the extension it won't be successful when casting // the interface, for the time being expose the interface here. diff --git a/service/internal/components/host_wrapper_test.go b/service/internal/components/host_wrapper_test.go index 194c62c4a2e..25567810fff 100644 --- a/service/internal/components/host_wrapper_test.go +++ b/service/internal/components/host_wrapper_test.go @@ -7,14 +7,11 @@ import ( "errors" "testing" + "go.opentelemetry.io/collector/component/componenttest" "go.uber.org/zap" - - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/service/internal/servicehost" ) func Test_newHostWrapper(_ *testing.T) { - hw := NewHostWrapper(servicehost.NewNopHost(), nil, zap.NewNop()) + hw := NewHostWrapper(componenttest.NewNopHost(), zap.NewNop()) hw.ReportFatalError(errors.New("test error")) - hw.ReportComponentStatus(component.StatusOK) } diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index a510170632c..8ccee72df94 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -23,14 +23,13 @@ import ( "go.opentelemetry.io/collector/processor" "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/service/internal/capabilityconsumer" - "go.opentelemetry.io/collector/service/internal/components" - "go.opentelemetry.io/collector/service/internal/servicehost" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/pipelines" ) // Settings holds configuration for building builtPipelines. type Settings struct { - Telemetry component.TelemetrySettings + Telemetry servicetelemetry.Settings BuildInfo component.BuildInfo ReceiverBuilder *receiver.Builder @@ -265,15 +264,22 @@ func (g *Graph) buildComponents(ctx context.Context, set Settings) error { for i := len(nodes) - 1; i >= 0; i-- { node := nodes[i] + + // skipped for capabilitiesNodes and fanoutNodes as they are not assigned componentIDs. + var telemetrySettings component.TelemetrySettings + if instanceID, ok := g.instanceIDs[node.ID()]; ok { + telemetrySettings = set.Telemetry.ToComponentTelemetrySettings(instanceID) + } + switch n := node.(type) { case *receiverNode: - err = n.buildComponent(ctx, set.Telemetry, set.BuildInfo, set.ReceiverBuilder, g.nextConsumers(n.ID())) + err = n.buildComponent(ctx, telemetrySettings, set.BuildInfo, set.ReceiverBuilder, g.nextConsumers(n.ID())) case *processorNode: - err = n.buildComponent(ctx, set.Telemetry, set.BuildInfo, set.ProcessorBuilder, g.nextConsumers(n.ID())[0]) + err = n.buildComponent(ctx, telemetrySettings, set.BuildInfo, set.ProcessorBuilder, g.nextConsumers(n.ID())[0]) case *exporterNode: - err = n.buildComponent(ctx, set.Telemetry, set.BuildInfo, set.ExporterBuilder) + err = n.buildComponent(ctx, telemetrySettings, set.BuildInfo, set.ExporterBuilder) case *connectorNode: - err = n.buildComponent(ctx, set.Telemetry, set.BuildInfo, set.ConnectorBuilder, g.nextConsumers(n.ID())) + err = n.buildComponent(ctx, telemetrySettings, set.BuildInfo, set.ConnectorBuilder, g.nextConsumers(n.ID())) case *capabilitiesNode: capability := consumer.Capabilities{MutatesData: false} for _, proc := range g.pipelines[n.pipelineID].processors { @@ -354,7 +360,7 @@ type pipelineNodes struct { exporters map[int64]graph.Node } -func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { +func (g *Graph) StartAll(ctx context.Context, host component.Host) error { nodes, err := topo.Sort(g.componentGraph) if err != nil { return err @@ -372,10 +378,8 @@ func (g *Graph) StartAll(ctx context.Context, host servicehost.Host) error { continue } - instanceID := g.instanceIDs[node.ID()] - hostWrapper := components.NewHostWrapper(host, instanceID, g.logger) - - if compErr := comp.Start(ctx, hostWrapper); compErr != nil { + // TODO: automatically handle status here + if compErr := comp.Start(ctx, host); compErr != nil { return compErr } } @@ -399,6 +403,7 @@ func (g *Graph) ShutdownAll(ctx context.Context) error { // Skip capabilities/fanout nodes continue } + // TODO: automatically handle status here errs = multierr.Append(errs, comp.Shutdown(ctx)) } return errs diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index 9357f2a3d49..d1efbd698dd 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -27,7 +27,7 @@ import ( "go.opentelemetry.io/collector/processor/processortest" "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/receiver/receivertest" - "go.opentelemetry.io/collector/service/internal/servicehost" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/internal/testcomponents" "go.opentelemetry.io/collector/service/pipelines" ) @@ -147,7 +147,7 @@ func TestGraphStartStop(t *testing.T) { pg.componentGraph.SetEdge(simple.Edge{F: f, T: t}) } - require.NoError(t, pg.StartAll(ctx, servicehost.NewNopHost())) + require.NoError(t, pg.StartAll(ctx, componenttest.NewNopHost())) for _, edge := range tt.edges { assert.Greater(t, ctx.order[edge[0]], ctx.order[edge[1]]) } @@ -174,7 +174,7 @@ func TestGraphStartStopCycle(t *testing.T) { pg.componentGraph.SetEdge(simple.Edge{F: c1, T: e1}) pg.componentGraph.SetEdge(simple.Edge{F: c1, T: p1}) // loop back - err := pg.StartAll(context.Background(), servicehost.NewNopHost()) + err := pg.StartAll(context.Background(), componenttest.NewNopHost()) assert.Error(t, err) assert.Contains(t, err.Error(), `topo: no topological ordering: cyclic components`) @@ -195,7 +195,7 @@ func TestGraphStartStopComponentError(t *testing.T) { shutdownErr: errors.New("bar"), }, }) - assert.EqualError(t, pg.StartAll(context.Background(), servicehost.NewNopHost()), "foo") + assert.EqualError(t, pg.StartAll(context.Background(), componenttest.NewNopHost()), "foo") assert.EqualError(t, pg.ShutdownAll(context.Background()), "bar") } @@ -619,7 +619,7 @@ func TestConnectorPipelinesGraph(t *testing.T) { t.Run(test.name, func(t *testing.T) { // Build the pipeline set := Settings{ - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -668,7 +668,7 @@ func TestConnectorPipelinesGraph(t *testing.T) { assert.Equal(t, len(test.pipelineConfigs), len(pg.pipelines)) - assert.NoError(t, pg.StartAll(context.Background(), servicehost.NewNopHost())) + assert.NoError(t, pg.StartAll(context.Background(), componenttest.NewNopHost())) mutatingPipelines := make(map[component.ID]bool, len(test.pipelineConfigs)) @@ -885,7 +885,7 @@ func TestConnectorRouter(t *testing.T) { ctx := context.Background() set := Settings{ - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -1929,7 +1929,7 @@ func TestGraphBuildErrors(t *testing.T) { t.Run(test.name, func(t *testing.T) { set := Settings{ BuildInfo: component.NewDefaultBuildInfo(), - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), ReceiverBuilder: receiver.NewBuilder( test.receiverCfgs, map[component.Type]receiver.Factory{ @@ -1976,7 +1976,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { nopConnectorFactory := connectortest.NewNopFactory() set := Settings{ - Telemetry: componenttest.NewNopTelemetrySettings(), + Telemetry: servicetelemetry.NewNopSettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -2028,7 +2028,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2042,7 +2042,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2056,7 +2056,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) @@ -2076,7 +2076,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } pipelines, err := Build(context.Background(), set) assert.NoError(t, err) - assert.Error(t, pipelines.StartAll(context.Background(), servicehost.NewNopHost())) + assert.Error(t, pipelines.StartAll(context.Background(), componenttest.NewNopHost())) assert.Error(t, pipelines.ShutdownAll(context.Background())) }) } diff --git a/service/internal/servicehost/host.go b/service/internal/servicehost/host.go deleted file mode 100644 index ffbfeb23054..00000000000 --- a/service/internal/servicehost/host.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright The OpenTelemetry Authors -// -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package servicehost // import "go.opentelemetry.io/collector/service/internal/servicehost" - -import ( - "go.opentelemetry.io/collector/component" -) - -// Host mirrors component.Host interface, with one important difference: servicehost.Host -// is not associated with a component and thus ReportComponentStatus() requires the source -// component to be explicitly specified. -type Host interface { - // ReportComponentStatus is used to communicate the status of a source component to the Host. - // The Host implementations will broadcast this information to interested parties via - // StatusWatcher interface. - ReportComponentStatus(source *component.InstanceID, event *component.StatusEvent) - - // See component.Host for the documentation of the rest of the functions. - - // Deprecated: [0.65.0] Replaced by ReportComponentStatus. - ReportFatalError(err error) - - GetFactory(kind component.Kind, componentType component.Type) component.Factory - GetExtensions() map[component.ID]component.Component - GetExporters() map[component.DataType]map[component.ID]component.Component -} diff --git a/service/internal/servicehost/nop_host.go b/service/internal/servicehost/nop_host.go deleted file mode 100644 index 16f5871ef30..00000000000 --- a/service/internal/servicehost/nop_host.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package servicehost // import "go.opentelemetry.io/collector/service/internal/servicehost" - -import ( - "go.opentelemetry.io/collector/component" -) - -// nopHost mocks a receiver.ReceiverHost for test purposes. -type nopHost struct{} - -func (n nopHost) ReportFatalError(_ error) { -} - -func (n nopHost) ReportComponentStatus(_ *component.InstanceID, _ *component.StatusEvent) { -} - -func (n nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { - return nil -} - -func (n nopHost) GetExtensions() map[component.ID]component.Component { - return nil -} - -func (n nopHost) GetExporters() map[component.DataType]map[component.ID]component.Component { - return nil -} - -// NewNopHost returns a new instance of nopHost with proper defaults for most tests. -func NewNopHost() Host { - return &nopHost{} -} diff --git a/service/internal/servicehost/nop_host_test.go b/service/internal/servicehost/nop_host_test.go deleted file mode 100644 index 5d7d9ba5816..00000000000 --- a/service/internal/servicehost/nop_host_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package servicehost - -import ( - "errors" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.opentelemetry.io/collector/component" -) - -func TestNewNopHost(t *testing.T) { - nh := NewNopHost() - require.NotNil(t, nh) - require.IsType(t, &nopHost{}, nh) - - nh.ReportComponentStatus(&component.InstanceID{}, &component.StatusEvent{}) - nh.ReportFatalError(errors.New("TestError")) - - assert.Nil(t, nh.GetExporters()) // nolint: staticcheck - assert.Nil(t, nh.GetExtensions()) - assert.Nil(t, nh.GetFactory(component.KindReceiver, "test")) -} diff --git a/service/internal/servicetelemetry/nop_settings.go b/service/internal/servicetelemetry/nop_settings.go new file mode 100644 index 00000000000..d61a46ffa23 --- /dev/null +++ b/service/internal/servicetelemetry/nop_settings.go @@ -0,0 +1,26 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicetelemetry // import "go.opentelemetry.io/collector/service/internal/servicetelemetry" + +import ( + "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config/configtelemetry" + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// NewNopSettings returns a new nop settings for Create* functions. +func NewNopSettings() Settings { + return Settings{ + Logger: zap.NewNop(), + TracerProvider: trace.NewNoopTracerProvider(), + MeterProvider: noop.NewMeterProvider(), + MetricsLevel: configtelemetry.LevelNone, + Resource: pcommon.NewResource(), + ReportComponentStatus: func(*component.InstanceID, *component.StatusEvent) {}, + } +} diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_settings_test.go new file mode 100644 index 00000000000..92846e74f29 --- /dev/null +++ b/service/internal/servicetelemetry/nop_settings_test.go @@ -0,0 +1,31 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicetelemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config/configtelemetry" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/trace" +) + +func TestNewNopSettings(t *testing.T) { + set := NewNopSettings() + + require.NotNil(t, set) + require.IsType(t, Settings{}, set) + require.Equal(t, zap.NewNop(), set.Logger) + require.Equal(t, trace.NewNoopTracerProvider(), set.TracerProvider) + require.Equal(t, noop.NewMeterProvider(), set.MeterProvider) + require.Equal(t, configtelemetry.LevelNone, set.MetricsLevel) + require.Equal(t, pcommon.NewResource(), set.Resource) + + set.ReportComponentStatus(&component.InstanceID{}, &component.StatusEvent{}) +} diff --git a/service/internal/servicetelemetry/settings.go b/service/internal/servicetelemetry/settings.go new file mode 100644 index 00000000000..d9ea04fcc7d --- /dev/null +++ b/service/internal/servicetelemetry/settings.go @@ -0,0 +1,25 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicetelemetry // import "go.opentelemetry.io/collector/internal/servicetelemetry" + +import ( + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/service/internal/status" +) + +type Settings component.TelemetrySettingsBase[status.ReportStatusFunc] + +func (s Settings) ToComponentTelemetrySettings(instanceID *component.InstanceID) component.TelemetrySettings { + notifier := status.NewNotifier(instanceID, s.ReportComponentStatus) + return component.TelemetrySettings{ + Logger: s.Logger, + TracerProvider: s.TracerProvider, + MeterProvider: s.MeterProvider, + MetricsLevel: s.MetricsLevel, + Resource: s.Resource, + ReportComponentStatus: func(status component.Status, options ...component.StatusEventOption) { + notifier.Event(status, options...) + }, + } +} diff --git a/service/internal/status/status.go b/service/internal/status/status.go index ac0c6736fd9..8423fdaefe2 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -8,7 +8,6 @@ import ( "fmt" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/service/internal/servicehost" ) // onTransitionFunc receives a component.StatusEvent on a successful state transition @@ -100,12 +99,14 @@ type Notifier interface { Event(status component.Status, options ...component.StatusEventOption) error } -// NewNotifier returns a status.Notifier that reports component status through the given -// servicehost. The underlying implementation is a finite state machine. -func NewNotifier(host servicehost.Host, instanceID *component.InstanceID) Notifier { +// NewNotifier returns a status.Notifier that reports component status for the given +// component instance via an underlying finite state machine +func NewNotifier(instanceID *component.InstanceID, fn func(*component.InstanceID, *component.StatusEvent)) Notifier { return newStatusFSM( func(ev *component.StatusEvent) { - host.ReportComponentStatus(instanceID, ev) + fn(instanceID, ev) }, ) } + +type ReportStatusFunc func(*component.InstanceID, *component.StatusEvent) diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index b7992811b8c..fada5cd3c32 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -10,7 +10,6 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/service/internal/servicehost" ) func TestStatusFSM(t *testing.T) { @@ -149,6 +148,13 @@ func TestStatusEventError(t *testing.T) { } func TestNewNotifier(t *testing.T) { - notifier := NewNotifier(servicehost.NewNopHost(), &component.InstanceID{}) + fnCalled := false + + statusFunc := func(*component.InstanceID, *component.StatusEvent) { + fnCalled = true + } + + notifier := NewNotifier(&component.InstanceID{}, statusFunc) require.NoError(t, notifier.Event(component.StatusOK)) + require.True(t, fnCalled) } diff --git a/service/service.go b/service/service.go index c17a029bed1..d7c1e3ee5d7 100644 --- a/service/service.go +++ b/service/service.go @@ -29,7 +29,7 @@ import ( "go.opentelemetry.io/collector/service/extensions" "go.opentelemetry.io/collector/service/internal/graph" "go.opentelemetry.io/collector/service/internal/proctelemetry" - "go.opentelemetry.io/collector/service/internal/servicehost" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/telemetry" ) @@ -70,7 +70,7 @@ type Settings struct { type Service struct { buildInfo component.BuildInfo telemetry *telemetry.Telemetry - telemetrySettings component.TelemetrySettings + telemetrySettings servicetelemetry.Settings host *serviceHost telemetryInitializer *telemetryInitializer collectorConf *confmap.Conf @@ -105,14 +105,15 @@ func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { res := buildResource(set.BuildInfo, cfg.Telemetry) pcommonRes := pdataFromSdk(res) - srv.telemetrySettings = component.TelemetrySettings{ + srv.telemetrySettings = servicetelemetry.Settings{ Logger: srv.telemetry.Logger(), TracerProvider: srv.telemetry.TracerProvider(), MeterProvider: noop.NewMeterProvider(), MetricsLevel: cfg.Telemetry.Metrics.Level, // Construct telemetry attributes from build info and config's resource attributes. - Resource: pcommonRes, + Resource: pcommonRes, + ReportComponentStatus: srv.host.reportComponentStatus, } if err = srv.telemetryInitializer.init(res, srv.telemetrySettings, cfg.Telemetry, set.AsyncErrorChannel); err != nil { @@ -235,7 +236,7 @@ func (srv *Service) Logger() *zap.Logger { return srv.telemetrySettings.Logger } -func getBallastSize(host servicehost.Host) uint64 { +func getBallastSize(host component.Host) uint64 { for _, ext := range host.GetExtensions() { if bExt, ok := ext.(interface{ GetBallastSize() uint64 }); ok { return bExt.GetBallastSize() diff --git a/service/service_test.go b/service/service_test.go index 8565743db67..faa004875a5 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -415,26 +415,27 @@ func TestServiceTelemetryLogger(t *testing.T) { } func TestServiceFatalError(t *testing.T) { - set := newNopSettings() - set.AsyncErrorChannel = make(chan error) + //TODO: restore this test + // set := newNopSettings() + // set.AsyncErrorChannel = make(chan error) - srv, err := New(context.Background(), set, newNopConfig()) - require.NoError(t, err) + // srv, err := New(context.Background(), set, newNopConfig()) + // require.NoError(t, err) - assert.NoError(t, srv.Start(context.Background())) - t.Cleanup(func() { - assert.NoError(t, srv.Shutdown(context.Background())) - }) + // assert.NoError(t, srv.Start(context.Background())) + // t.Cleanup(func() { + // assert.NoError(t, srv.Shutdown(context.Background())) + // }) - go func() { - ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) - srv.host.ReportComponentStatus(&component.InstanceID{}, ev) - }() + // go func() { + // ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) + // srv.host.ReportComponentStatus(&component.InstanceID{}, ev) + // }() - err = <-srv.host.asyncErrorChannel + // err = <-srv.host.asyncErrorChannel - require.Error(t, err) - require.ErrorIs(t, err, assert.AnError) + // require.Error(t, err) + // require.ErrorIs(t, err, assert.AnError) } func assertResourceLabels(t *testing.T, res pcommon.Resource, expectedLabels map[string]labelValue) { diff --git a/service/telemetry.go b/service/telemetry.go index 70fe22bc637..9ba57604dd3 100644 --- a/service/telemetry.go +++ b/service/telemetry.go @@ -29,10 +29,10 @@ import ( "go.uber.org/multierr" "go.uber.org/zap" - "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/config/configtelemetry" "go.opentelemetry.io/collector/internal/obsreportconfig" "go.opentelemetry.io/collector/service/internal/proctelemetry" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/telemetry" ) @@ -71,7 +71,7 @@ func newColTelemetry(useOtel bool, disableHighCardinality bool, extendedConfig b } } -func (tel *telemetryInitializer) init(res *resource.Resource, settings component.TelemetrySettings, cfg telemetry.Config, asyncErrorChannel chan error) error { +func (tel *telemetryInitializer) init(res *resource.Resource, settings servicetelemetry.Settings, cfg telemetry.Config, asyncErrorChannel chan error) error { if cfg.Metrics.Level == configtelemetry.LevelNone || (cfg.Metrics.Address == "" && len(cfg.Metrics.Readers) == 0) { settings.Logger.Info( "Skipping telemetry setup.", diff --git a/service/telemetry_test.go b/service/telemetry_test.go index a4144e27890..414d2578f7c 100644 --- a/service/telemetry_test.go +++ b/service/telemetry_test.go @@ -23,6 +23,7 @@ import ( "go.opentelemetry.io/collector/internal/testutil" semconv "go.opentelemetry.io/collector/semconv/v1.18.0" "go.opentelemetry.io/collector/service/internal/proctelemetry" + "go.opentelemetry.io/collector/service/internal/servicetelemetry" "go.opentelemetry.io/collector/service/telemetry" ) @@ -272,7 +273,7 @@ func TestTelemetryInit(t *testing.T) { } otelRes := buildResource(buildInfo, *tc.cfg) res := pdataFromSdk(otelRes) - settings := component.TelemetrySettings{ + settings := servicetelemetry.Settings{ Logger: zap.NewNop(), Resource: res, } From 2f0f54e1ae285ca144d7b216f112d612812457df Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 7 Sep 2023 19:18:20 -0700 Subject: [PATCH 11/40] Share state machines between component and service versionss of ReportComponentStatus --- component/status.go | 7 ++- component/telemetry.go | 2 +- .../processortest/unhealthy_processor.go | 1 + service/host.go | 6 +- .../internal/servicetelemetry/nop_settings.go | 2 +- .../servicetelemetry/nop_settings_test.go | 2 +- service/internal/servicetelemetry/settings.go | 19 +++--- service/internal/status/status.go | 62 ++++++++++++------- service/internal/status/status_test.go | 60 +++++++++++++++--- service/service.go | 3 +- 10 files changed, 107 insertions(+), 57 deletions(-) diff --git a/component/status.go b/component/status.go index 3ae5b7af8d2..b98cadd612a 100644 --- a/component/status.go +++ b/component/status.go @@ -13,7 +13,8 @@ type Status int32 // Enumeration of possible component statuses const ( - StatusStarting Status = iota + StatusNone Status = iota + StatusStarting StatusOK StatusRecoverableError StatusPermanentError @@ -40,7 +41,7 @@ func (s Status) String() string { case StatusStopped: return "StatusStopped" } - return "StatusUnknown" + return "StatusNone" } // errorStatuses is a set of statuses that can have associated errors @@ -136,4 +137,4 @@ type StatusWatcher interface { ComponentStatusChanged(source *InstanceID, event *StatusEvent) } -type ReportStatusFunc func(Status, ...StatusEventOption) +type StatusFunc func(Status, ...StatusEventOption) diff --git a/component/telemetry.go b/component/telemetry.go index 1a2c04624ba..fa5546a3373 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -33,4 +33,4 @@ type TelemetrySettingsBase[T any] struct { ReportComponentStatus T } -type TelemetrySettings TelemetrySettingsBase[ReportStatusFunc] +type TelemetrySettings TelemetrySettingsBase[StatusFunc] diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 002cfe2e8b5..a27a487d8e8 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -63,6 +63,7 @@ type unhealthyProcessor struct { } func (p unhealthyProcessor) Start(_ context.Context, host component.Host) error { + p.telemetry.ReportComponentStatus(component.StatusStarting) go func() { p.telemetry.ReportComponentStatus(component.StatusRecoverableError) }() diff --git a/service/host.go b/service/host.go index 8abe167570d..c0a44371c35 100644 --- a/service/host.go +++ b/service/host.go @@ -67,12 +67,8 @@ func (host *serviceHost) GetExporters() map[component.DataType]map[component.ID] return host.pipelines.GetExporters() } -func (host *serviceHost) reportComponentStatus(source *component.InstanceID, event *component.StatusEvent) { +func (host *serviceHost) notifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) { // TODO: What should we do if there is an error returned by a StatusWatcher? - if host.serviceExtensions == nil { - // TODO: remove this temporary workaround - return - } host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck if event.Status() == component.StatusFatalError { host.asyncErrorChannel <- event.Err() diff --git a/service/internal/servicetelemetry/nop_settings.go b/service/internal/servicetelemetry/nop_settings.go index d61a46ffa23..258da4b982f 100644 --- a/service/internal/servicetelemetry/nop_settings.go +++ b/service/internal/servicetelemetry/nop_settings.go @@ -21,6 +21,6 @@ func NewNopSettings() Settings { MeterProvider: noop.NewMeterProvider(), MetricsLevel: configtelemetry.LevelNone, Resource: pcommon.NewResource(), - ReportComponentStatus: func(*component.InstanceID, *component.StatusEvent) {}, + ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) {}, } } diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_settings_test.go index 92846e74f29..4329b7d2ec4 100644 --- a/service/internal/servicetelemetry/nop_settings_test.go +++ b/service/internal/servicetelemetry/nop_settings_test.go @@ -27,5 +27,5 @@ func TestNewNopSettings(t *testing.T) { require.Equal(t, configtelemetry.LevelNone, set.MetricsLevel) require.Equal(t, pcommon.NewResource(), set.Resource) - set.ReportComponentStatus(&component.InstanceID{}, &component.StatusEvent{}) + set.ReportComponentStatus(&component.InstanceID{}, component.StatusStarting) } diff --git a/service/internal/servicetelemetry/settings.go b/service/internal/servicetelemetry/settings.go index d9ea04fcc7d..43e18d972ca 100644 --- a/service/internal/servicetelemetry/settings.go +++ b/service/internal/servicetelemetry/settings.go @@ -8,18 +8,15 @@ import ( "go.opentelemetry.io/collector/service/internal/status" ) -type Settings component.TelemetrySettingsBase[status.ReportStatusFunc] +type Settings component.TelemetrySettingsBase[status.ServiceStatusFunc] -func (s Settings) ToComponentTelemetrySettings(instanceID *component.InstanceID) component.TelemetrySettings { - notifier := status.NewNotifier(instanceID, s.ReportComponentStatus) +func (s Settings) ToComponentTelemetrySettings(id *component.InstanceID) component.TelemetrySettings { return component.TelemetrySettings{ - Logger: s.Logger, - TracerProvider: s.TracerProvider, - MeterProvider: s.MeterProvider, - MetricsLevel: s.MetricsLevel, - Resource: s.Resource, - ReportComponentStatus: func(status component.Status, options ...component.StatusEventOption) { - notifier.Event(status, options...) - }, + Logger: s.Logger, + TracerProvider: s.TracerProvider, + MeterProvider: s.MeterProvider, + MetricsLevel: s.MetricsLevel, + Resource: s.Resource, + ReportComponentStatus: status.NewComponentStatusFunc(id, s.ReportComponentStatus), } } diff --git a/service/internal/status/status.go b/service/internal/status/status.go index 8423fdaefe2..9cf221c5398 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -6,6 +6,7 @@ package status // import "go.opentelemetry.io/collector/service/internal/status" import ( "errors" "fmt" + "sync" "go.opentelemetry.io/collector/component" ) @@ -46,15 +47,17 @@ func (m *fsm) Event(status component.Status, options ...component.StatusEventOpt return nil } -// newStatusFSM creates a state machine with all valid transitions for component.Status. -// It sets the initial state to component.StatusStarting and triggers the onTransitionFunc -// for the initial state. -func newStatusFSM(onTransition onTransitionFunc) *fsm { - starting, _ := component.NewStatusEvent(component.StatusStarting) - m := &fsm{ - current: starting, +// newFSM creates a state machine with all valid transitions for component.Status. +// The initial state is set to component.StatusNone. +func newFSM(onTransition onTransitionFunc) *fsm { + initial, _ := component.NewStatusEvent(component.StatusNone) + return &fsm{ + current: initial, onTransition: onTransition, transitions: map[component.Status]map[component.Status]struct{}{ + component.StatusNone: { + component.StatusStarting: {}, + }, component.StatusStarting: { component.StatusOK: {}, component.StatusRecoverableError: {}, @@ -88,25 +91,36 @@ func newStatusFSM(onTransition onTransitionFunc) *fsm { component.StatusStopped: {}, }, } - - // fire initial starting event - m.onTransition(starting) - return m } -// A Notifier emits status events -type Notifier interface { - Event(status component.Status, options ...component.StatusEventOption) error -} +type NotifyStatusFunc func(*component.InstanceID, *component.StatusEvent) +type ServiceStatusFunc func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) -// NewNotifier returns a status.Notifier that reports component status for the given -// component instance via an underlying finite state machine -func NewNotifier(instanceID *component.InstanceID, fn func(*component.InstanceID, *component.StatusEvent)) Notifier { - return newStatusFSM( - func(ev *component.StatusEvent) { - fn(instanceID, ev) - }, - ) +// NewServiceStatusFunc returns a function to be used as ReportComponentStatus for +// servicetelemetry.Settings, which differs from component.TelemetrySettings in that +// the service version does not correspond to a specific component, and thus needs +// the a component.InstanceID as a parameter. +func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) ServiceStatusFunc { + var fsmMap sync.Map + return func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) { + f, ok := fsmMap.Load(id) + if !ok { + f = newFSM(func(ev *component.StatusEvent) { + notifyStatusChange(id, ev) + }) + if val, loaded := fsmMap.LoadOrStore(id, f); loaded { + f = val + } + } + _ = f.(*fsm).Event(status, opts...) + } } -type ReportStatusFunc func(*component.InstanceID, *component.StatusEvent) +// NewComponentStatusFunc returns a function to be used as ReportComponentStatus for +// component.TelemetrySettings, which differs from servicetelemetry.Settings in that +// the component version is tied to specific component instance. +func NewComponentStatusFunc(id *component.InstanceID, srvStatus ServiceStatusFunc) component.StatusFunc { + return func(status component.Status, opts ...component.StatusEventOption) { + srvStatus(id, status, opts...) + } +} diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index fada5cd3c32..e11f2cf5f13 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -22,6 +22,7 @@ func TestStatusFSM(t *testing.T) { { name: "successful startup and shutdown", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusOK, component.StatusStopping, component.StatusStopped, @@ -36,6 +37,7 @@ func TestStatusFSM(t *testing.T) { { name: "component recovered", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusRecoverableError, component.StatusOK, component.StatusStopping, @@ -52,6 +54,7 @@ func TestStatusFSM(t *testing.T) { { name: "repeated events are errors", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusOK, component.StatusRecoverableError, component.StatusRecoverableError, @@ -73,6 +76,7 @@ func TestStatusFSM(t *testing.T) { { name: "PermanentError is terminal", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusOK, component.StatusPermanentError, component.StatusOK, @@ -87,6 +91,7 @@ func TestStatusFSM(t *testing.T) { { name: "FatalError is terminal", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusOK, component.StatusFatalError, component.StatusOK, @@ -101,6 +106,7 @@ func TestStatusFSM(t *testing.T) { { name: "Stopped is terminal", reportedStatuses: []component.Status{ + component.StatusStarting, component.StatusOK, component.StatusStopping, component.StatusStopped, @@ -117,7 +123,7 @@ func TestStatusFSM(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { var receivedStatuses []component.Status - fsm := newStatusFSM( + fsm := newFSM( func(ev *component.StatusEvent) { receivedStatuses = append(receivedStatuses, ev.Status()) }, @@ -138,8 +144,8 @@ func TestStatusFSM(t *testing.T) { } func TestStatusEventError(t *testing.T) { - fsm := newStatusFSM(func(*component.StatusEvent) {}) - + fsm := newFSM(func(*component.StatusEvent) {}) + fsm.Event(component.StatusStarting) // the combination of StatusOK with an error is invalid err := fsm.Event(component.StatusOK, component.WithError(assert.AnError)) @@ -147,14 +153,48 @@ func TestStatusEventError(t *testing.T) { require.ErrorIs(t, err, component.ErrStatusEventInvalidArgument) } -func TestNewNotifier(t *testing.T) { - fnCalled := false +func TestStatusFuncs(t *testing.T) { + id1 := &component.InstanceID{} + id2 := &component.InstanceID{} + + actualStatuses := make(map[*component.InstanceID][]component.Status) + statusFunc := func(id *component.InstanceID, ev *component.StatusEvent) { + actualStatuses[id] = append(actualStatuses[id], ev.Status()) + } + + statuses1 := []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + } + + statuses2 := []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusRecoverableError, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + } + + expectedStatuses := map[*component.InstanceID][]component.Status{ + id1: statuses1, + id2: statuses2, + } + + serviceStatusFn := NewServiceStatusFunc(statusFunc) + + comp1Func := NewComponentStatusFunc(id1, serviceStatusFn) + comp2Func := NewComponentStatusFunc(id2, serviceStatusFn) + + for _, st := range statuses1 { + comp1Func(st) + } - statusFunc := func(*component.InstanceID, *component.StatusEvent) { - fnCalled = true + for _, st := range statuses2 { + comp2Func(st) } - notifier := NewNotifier(&component.InstanceID{}, statusFunc) - require.NoError(t, notifier.Event(component.StatusOK)) - require.True(t, fnCalled) + require.Equal(t, expectedStatuses, actualStatuses) } diff --git a/service/service.go b/service/service.go index d7c1e3ee5d7..a7187350369 100644 --- a/service/service.go +++ b/service/service.go @@ -30,6 +30,7 @@ import ( "go.opentelemetry.io/collector/service/internal/graph" "go.opentelemetry.io/collector/service/internal/proctelemetry" "go.opentelemetry.io/collector/service/internal/servicetelemetry" + "go.opentelemetry.io/collector/service/internal/status" "go.opentelemetry.io/collector/service/telemetry" ) @@ -113,7 +114,7 @@ func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { // Construct telemetry attributes from build info and config's resource attributes. Resource: pcommonRes, - ReportComponentStatus: srv.host.reportComponentStatus, + ReportComponentStatus: status.NewServiceStatusFunc(srv.host.notifyComponentStatusChange), } if err = srv.telemetryInitializer.init(res, srv.telemetrySettings, cfg.Telemetry, set.AsyncErrorChannel); err != nil { From e690b515d0acadf12a8681639898b16b45f3d39d Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 8 Sep 2023 18:05:49 -0700 Subject: [PATCH 12/40] Automatically report status during startup/shutdown --- service/internal/graph/graph.go | 29 ++++- service/internal/graph/graph_test.go | 184 +++++++++++++++++++++++++-- 2 files changed, 198 insertions(+), 15 deletions(-) diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index 8ccee72df94..b05eac1146f 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -10,7 +10,6 @@ import ( "strings" "go.uber.org/multierr" - "go.uber.org/zap" "gonum.org/v1/gonum/graph" "gonum.org/v1/gonum/graph/simple" "gonum.org/v1/gonum/graph/topo" @@ -51,7 +50,7 @@ type Graph struct { // Keep track of status source per node instanceIDs map[int64]*component.InstanceID - logger *zap.Logger + telemetry servicetelemetry.Settings } func Build(ctx context.Context, set Settings) (*Graph, error) { @@ -59,7 +58,7 @@ func Build(ctx context.Context, set Settings) (*Graph, error) { componentGraph: simple.NewDirectedGraph(), pipelines: make(map[component.ID]*pipelineNodes, len(set.PipelineConfigs)), instanceIDs: make(map[int64]*component.InstanceID), - logger: set.Telemetry.Logger, + telemetry: set.Telemetry, } for pipelineID := range set.PipelineConfigs { pipelines.pipelines[pipelineID] = &pipelineNodes{ @@ -378,10 +377,15 @@ func (g *Graph) StartAll(ctx context.Context, host component.Host) error { continue } - // TODO: automatically handle status here + instanceID := g.instanceIDs[node.ID()] + g.telemetry.ReportComponentStatus(instanceID, component.StatusStarting) + if compErr := comp.Start(ctx, host); compErr != nil { + g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) return compErr } + + g.telemetry.ReportComponentStatus(instanceID, component.StatusOK) } return nil } @@ -398,13 +402,24 @@ func (g *Graph) ShutdownAll(ctx context.Context) error { // before the consumer is stopped. var errs error for i := 0; i < len(nodes); i++ { - comp, ok := nodes[i].(component.Component) + node := nodes[i] + comp, ok := node.(component.Component) + if !ok { // Skip capabilities/fanout nodes continue } - // TODO: automatically handle status here - errs = multierr.Append(errs, comp.Shutdown(ctx)) + + instanceID := g.instanceIDs[node.ID()] + g.telemetry.ReportComponentStatus(instanceID, component.StatusStopping) + + if compErr := comp.Shutdown(ctx); compErr != nil { + errs = multierr.Append(errs, compErr) + g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) + continue + } + + g.telemetry.ReportComponentStatus(instanceID, component.StatusStopped) } return errs } diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index d1efbd698dd..ebeecd6f084 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -9,6 +9,7 @@ import ( "fmt" "sync" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -28,6 +29,7 @@ import ( "go.opentelemetry.io/collector/receiver" "go.opentelemetry.io/collector/receiver/receivertest" "go.opentelemetry.io/collector/service/internal/servicetelemetry" + "go.opentelemetry.io/collector/service/internal/status" "go.opentelemetry.io/collector/service/internal/testcomponents" "go.opentelemetry.io/collector/service/pipelines" ) @@ -142,8 +144,13 @@ func TestGraphStartStop(t *testing.T) { } pg := &Graph{componentGraph: simple.NewDirectedGraph()} + pg.telemetry = servicetelemetry.NewNopSettings() + pg.instanceIDs = make(map[int64]*component.InstanceID) + for _, edge := range tt.edges { f, t := &testNode{id: edge[0]}, &testNode{id: edge[1]} + pg.instanceIDs[f.ID()] = &component.InstanceID{} + pg.instanceIDs[t.ID()] = &component.InstanceID{} pg.componentGraph.SetEdge(simple.Edge{F: f, T: t}) } @@ -169,6 +176,13 @@ func TestGraphStartStopCycle(t *testing.T) { c1 := &testNode{id: component.NewIDWithName("c", "1")} e1 := &testNode{id: component.NewIDWithName("e", "1")} + pg.instanceIDs = map[int64]*component.InstanceID{ + r1.ID(): &component.InstanceID{}, + p1.ID(): &component.InstanceID{}, + c1.ID(): &component.InstanceID{}, + e1.ID(): &component.InstanceID{}, + } + pg.componentGraph.SetEdge(simple.Edge{F: r1, T: p1}) pg.componentGraph.SetEdge(simple.Edge{F: p1, T: c1}) pg.componentGraph.SetEdge(simple.Edge{F: c1, T: e1}) @@ -185,15 +199,22 @@ func TestGraphStartStopCycle(t *testing.T) { func TestGraphStartStopComponentError(t *testing.T) { pg := &Graph{componentGraph: simple.NewDirectedGraph()} + pg.telemetry = servicetelemetry.NewNopSettings() + r1 := &testNode{ + id: component.NewIDWithName("r", "1"), + startErr: errors.New("foo"), + } + e1 := &testNode{ + id: component.NewIDWithName("e", "1"), + shutdownErr: errors.New("bar"), + } + pg.instanceIDs = map[int64]*component.InstanceID{ + r1.ID(): &component.InstanceID{}, + e1.ID(): &component.InstanceID{}, + } pg.componentGraph.SetEdge(simple.Edge{ - F: &testNode{ - id: component.NewIDWithName("r", "1"), - startErr: errors.New("foo"), - }, - T: &testNode{ - id: component.NewIDWithName("e", "1"), - shutdownErr: errors.New("bar"), - }, + F: r1, + T: e1, }) assert.EqualError(t, pg.StartAll(context.Background(), componenttest.NewNopHost()), "foo") assert.EqualError(t, pg.ShutdownAll(context.Background()), "bar") @@ -2083,6 +2104,153 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { } } +func TestStatusReportedOnStartupShutdown(t *testing.T) { + + rNoErr := &testNode{id: component.NewIDWithName("r-no-err", "1")} + rStErr := &testNode{id: component.NewIDWithName("r-st-err", "1"), startErr: assert.AnError} + rSdErr := &testNode{id: component.NewIDWithName("r-sd-err", "1"), shutdownErr: assert.AnError} + + eNoErr := &testNode{id: component.NewIDWithName("e-no-err", "1")} + eStErr := &testNode{id: component.NewIDWithName("e-st-err", "1"), startErr: assert.AnError} + eSdErr := &testNode{id: component.NewIDWithName("e-sd-err", "1"), shutdownErr: assert.AnError} + + instanceIDs := map[*testNode]*component.InstanceID{ + rNoErr: {ID: rNoErr.id}, + rStErr: {ID: rStErr.id}, + rSdErr: {ID: rSdErr.id}, + eNoErr: {ID: eNoErr.id}, + eStErr: {ID: eStErr.id}, + eSdErr: {ID: eSdErr.id}, + } + + newStatusEvent := func(status component.Status, opts ...component.StatusEventOption) *component.StatusEvent { + ev, _ := component.NewStatusEvent(status, opts...) + return ev + } + + now := time.Now() + + for _, tc := range []struct { + name string + edge [2]*testNode + expectedStatuses map[*component.InstanceID][]*component.StatusEvent + startupErr error + shutdownErr error + }{ + { + name: "succesful startup/shutdown", + edge: [2]*testNode{rNoErr, eNoErr}, + expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ + instanceIDs[rNoErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + }, + instanceIDs[eNoErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + }, + }, + }, + { + name: "early startup error", + edge: [2]*testNode{rNoErr, eStErr}, + expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ + instanceIDs[eStErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + }, + }, + startupErr: assert.AnError, + }, + { + name: "late startup error", + edge: [2]*testNode{rStErr, eNoErr}, + expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ + instanceIDs[rStErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + }, + instanceIDs[eNoErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + }, + }, + startupErr: assert.AnError, + }, + { + name: "early shutdown error", + edge: [2]*testNode{rSdErr, eNoErr}, + expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ + instanceIDs[rSdErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + }, + instanceIDs[eNoErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + }, + }, + shutdownErr: assert.AnError, + }, + { + name: "late shutdown error", + edge: [2]*testNode{rNoErr, eSdErr}, + expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ + instanceIDs[rNoErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + }, + instanceIDs[eSdErr]: { + newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), + newStatusEvent(component.StatusOK, component.WithTimestamp(now)), + newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), + newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + }, + }, + shutdownErr: assert.AnError, + }, + } { + t.Run(tc.name, func(t *testing.T) { + pg := &Graph{componentGraph: simple.NewDirectedGraph()} + pg.telemetry = servicetelemetry.NewNopSettings() + + actualStatuses := make(map[*component.InstanceID][]*component.StatusEvent) + pg.telemetry.ReportComponentStatus = status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { + //copy event to normalize timestamp + opts := []component.StatusEventOption{component.WithTimestamp(now)} + if ev.Err() != nil { + opts = append(opts, component.WithError(ev.Err())) + } + evCopy, _ := component.NewStatusEvent(ev.Status(), opts...) + actualStatuses[id] = append(actualStatuses[id], evCopy) + }) + + e0, e1 := tc.edge[0], tc.edge[1] + pg.instanceIDs = map[int64]*component.InstanceID{ + e0.ID(): instanceIDs[e0], + e1.ID(): instanceIDs[e1], + } + pg.componentGraph.SetEdge(simple.Edge{F: e0, T: e1}) + + assert.Equal(t, tc.startupErr, pg.StartAll(context.Background(), componenttest.NewNopHost())) + assert.Equal(t, tc.shutdownErr, pg.ShutdownAll(context.Background())) + assert.Equal(t, tc.expectedStatuses, actualStatuses) + }) + } +} + func (g *Graph) getReceivers() map[component.DataType]map[component.ID]component.Component { receiversMap := make(map[component.DataType]map[component.ID]component.Component) receiversMap[component.DataTypeTraces] = make(map[component.ID]component.Component) From 9bba9bea0b3fc32826d2e55c4c3bb6ae3f1dd722 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sat, 9 Sep 2023 20:54:21 -0700 Subject: [PATCH 13/40] StatusFunc improvements --- component/componenttest/nop_telemetry.go | 14 ++-- component/status.go | 2 +- component/telemetry.go | 11 ++++ otelcol/collector_test.go | 3 +- .../processortest/unhealthy_processor.go | 3 +- service/internal/graph/graph.go | 12 ++-- service/internal/graph/graph_test.go | 21 +++--- .../internal/servicetelemetry/nop_settings.go | 14 ++-- .../servicetelemetry/nop_settings_test.go | 3 +- service/internal/status/status.go | 65 ++++++++++++++----- service/internal/status/status_test.go | 61 +++++++++++++++-- service/service.go | 8 ++- 12 files changed, 157 insertions(+), 60 deletions(-) diff --git a/component/componenttest/nop_telemetry.go b/component/componenttest/nop_telemetry.go index 9d04e1b360d..1aeb60a2290 100644 --- a/component/componenttest/nop_telemetry.go +++ b/component/componenttest/nop_telemetry.go @@ -16,11 +16,13 @@ import ( // NewNopTelemetrySettings returns a new nop telemetry settings for Create* functions. func NewNopTelemetrySettings() component.TelemetrySettings { return component.TelemetrySettings{ - Logger: zap.NewNop(), - TracerProvider: trace.NewNoopTracerProvider(), - MeterProvider: noop.NewMeterProvider(), - MetricsLevel: configtelemetry.LevelNone, - Resource: pcommon.NewResource(), - ReportComponentStatus: func(component.Status, ...component.StatusEventOption) {}, + Logger: zap.NewNop(), + TracerProvider: trace.NewNoopTracerProvider(), + MeterProvider: noop.NewMeterProvider(), + MetricsLevel: configtelemetry.LevelNone, + Resource: pcommon.NewResource(), + ReportComponentStatus: func(component.Status, ...component.StatusEventOption) error { + return nil + }, } } diff --git a/component/status.go b/component/status.go index b98cadd612a..f6399a0dea5 100644 --- a/component/status.go +++ b/component/status.go @@ -137,4 +137,4 @@ type StatusWatcher interface { ComponentStatusChanged(source *InstanceID, event *StatusEvent) } -type StatusFunc func(Status, ...StatusEventOption) +type StatusFunc func(Status, ...StatusEventOption) error diff --git a/component/telemetry.go b/component/telemetry.go index fa5546a3373..eb700a19851 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -30,6 +30,17 @@ type TelemetrySettingsBase[T any] struct { // Resource contains the resource attributes for the collector's telemetry. Resource pcommon.Resource + // ReportComponentStatus allows a component to report runtime changes in status. The service + // will automatically report status for a component during startup and shutdown. Components can + // use this method to report status after start and before shutdown. ReportComponentStatus + // will only return errors if the API used incorrectly. The three scenarios where an error will + // be returned are: + // + // - An illegal state transition + // - Using the WithError() option with a non-error status + // - Calling this method before component startup + // + // If the API is being used properly, these errors are safe to ignore. ReportComponentStatus T } diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index 968e138219b..c8a0b10accd 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -166,8 +166,7 @@ func TestComponentStatusWatcher(t *testing.T) { changedComponents := map[*component.InstanceID]component.Status{} var mux sync.Mutex onStatusChanged := func(source *component.InstanceID, event *component.StatusEvent) { - // skip the startup notifications - if event.Status() == component.StatusStarting { + if event.Status() != component.StatusRecoverableError { return } mux.Lock() diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index a27a487d8e8..979bfbdd0fd 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -63,9 +63,8 @@ type unhealthyProcessor struct { } func (p unhealthyProcessor) Start(_ context.Context, host component.Host) error { - p.telemetry.ReportComponentStatus(component.StatusStarting) go func() { - p.telemetry.ReportComponentStatus(component.StatusRecoverableError) + _ = p.telemetry.ReportComponentStatus(component.StatusRecoverableError) }() return nil } diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index b05eac1146f..ddbcd64697e 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -378,14 +378,14 @@ func (g *Graph) StartAll(ctx context.Context, host component.Host) error { } instanceID := g.instanceIDs[node.ID()] - g.telemetry.ReportComponentStatus(instanceID, component.StatusStarting) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStarting) if compErr := comp.Start(ctx, host); compErr != nil { - g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) return compErr } - g.telemetry.ReportComponentStatus(instanceID, component.StatusOK) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusOK) } return nil } @@ -411,15 +411,15 @@ func (g *Graph) ShutdownAll(ctx context.Context) error { } instanceID := g.instanceIDs[node.ID()] - g.telemetry.ReportComponentStatus(instanceID, component.StatusStopping) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStopping) if compErr := comp.Shutdown(ctx); compErr != nil { errs = multierr.Append(errs, compErr) - g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) continue } - g.telemetry.ReportComponentStatus(instanceID, component.StatusStopped) + _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStopped) } return errs } diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index ebeecd6f084..ad7eaf045b7 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -177,10 +177,10 @@ func TestGraphStartStopCycle(t *testing.T) { e1 := &testNode{id: component.NewIDWithName("e", "1")} pg.instanceIDs = map[int64]*component.InstanceID{ - r1.ID(): &component.InstanceID{}, - p1.ID(): &component.InstanceID{}, - c1.ID(): &component.InstanceID{}, - e1.ID(): &component.InstanceID{}, + r1.ID(): {}, + p1.ID(): {}, + c1.ID(): {}, + e1.ID(): {}, } pg.componentGraph.SetEdge(simple.Edge{F: r1, T: p1}) @@ -209,8 +209,8 @@ func TestGraphStartStopComponentError(t *testing.T) { shutdownErr: errors.New("bar"), } pg.instanceIDs = map[int64]*component.InstanceID{ - r1.ID(): &component.InstanceID{}, - e1.ID(): &component.InstanceID{}, + r1.ID(): {}, + e1.ID(): {}, } pg.componentGraph.SetEdge(simple.Edge{ F: r1, @@ -2138,7 +2138,7 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { shutdownErr error }{ { - name: "succesful startup/shutdown", + name: "successful startup/shutdown", edge: [2]*testNode{rNoErr, eNoErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rNoErr]: { @@ -2227,8 +2227,8 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { pg.telemetry = servicetelemetry.NewNopSettings() actualStatuses := make(map[*component.InstanceID][]*component.StatusEvent) - pg.telemetry.ReportComponentStatus = status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { - //copy event to normalize timestamp + init, statusFunc := status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { + // copy event to normalize timestamp opts := []component.StatusEventOption{component.WithTimestamp(now)} if ev.Err() != nil { opts = append(opts, component.WithError(ev.Err())) @@ -2237,6 +2237,9 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { actualStatuses[id] = append(actualStatuses[id], evCopy) }) + pg.telemetry.ReportComponentStatus = statusFunc + init() + e0, e1 := tc.edge[0], tc.edge[1] pg.instanceIDs = map[int64]*component.InstanceID{ e0.ID(): instanceIDs[e0], diff --git a/service/internal/servicetelemetry/nop_settings.go b/service/internal/servicetelemetry/nop_settings.go index 258da4b982f..8d596570759 100644 --- a/service/internal/servicetelemetry/nop_settings.go +++ b/service/internal/servicetelemetry/nop_settings.go @@ -16,11 +16,13 @@ import ( // NewNopSettings returns a new nop settings for Create* functions. func NewNopSettings() Settings { return Settings{ - Logger: zap.NewNop(), - TracerProvider: trace.NewNoopTracerProvider(), - MeterProvider: noop.NewMeterProvider(), - MetricsLevel: configtelemetry.LevelNone, - Resource: pcommon.NewResource(), - ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) {}, + Logger: zap.NewNop(), + TracerProvider: trace.NewNoopTracerProvider(), + MeterProvider: noop.NewMeterProvider(), + MetricsLevel: configtelemetry.LevelNone, + Resource: pcommon.NewResource(), + ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) error { + return nil + }, } } diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_settings_test.go index 4329b7d2ec4..d2463f5d1f0 100644 --- a/service/internal/servicetelemetry/nop_settings_test.go +++ b/service/internal/servicetelemetry/nop_settings_test.go @@ -26,6 +26,5 @@ func TestNewNopSettings(t *testing.T) { require.Equal(t, noop.NewMeterProvider(), set.MeterProvider) require.Equal(t, configtelemetry.LevelNone, set.MetricsLevel) require.Equal(t, pcommon.NewResource(), set.Resource) - - set.ReportComponentStatus(&component.InstanceID{}, component.StatusStarting) + require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.StatusStarting)) } diff --git a/service/internal/status/status.go b/service/internal/status/status.go index 9cf221c5398..38d91f5ea39 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -23,10 +23,10 @@ type fsm struct { onTransition onTransitionFunc } -// Event will attempt to execute a state transition. If successful, it calls the onTransitionFunc +// Transition will attempt to execute a state transition. If successful, it calls the onTransitionFunc // with a StatusEvent representing the new state. Returns an error if the arguments result in an // invalid status, or if the state transition is not valid. -func (m *fsm) Event(status component.Status, options ...component.StatusEventOption) error { +func (m *fsm) Transition(status component.Status, options ...component.StatusEventOption) error { if _, ok := m.transitions[m.current.Status()][status]; !ok { return fmt.Errorf( "cannot transition from %s to %s: %w", @@ -93,34 +93,65 @@ func newFSM(onTransition onTransitionFunc) *fsm { } } +type InitFunc func() +type readyFunc func() bool + +func initAndReadyFuncs() (InitFunc, readyFunc) { + mu := sync.RWMutex{} + isReady := false + + init := func() { + mu.Lock() + defer mu.Unlock() + isReady = true + } + + ready := func() bool { + mu.RLock() + defer mu.RUnlock() + return isReady + } + + return init, ready +} + type NotifyStatusFunc func(*component.InstanceID, *component.StatusEvent) -type ServiceStatusFunc func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) +type ServiceStatusFunc func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) error + +var errStatusNotReady = errors.New("report component status is not ready until service start") // NewServiceStatusFunc returns a function to be used as ReportComponentStatus for // servicetelemetry.Settings, which differs from component.TelemetrySettings in that // the service version does not correspond to a specific component, and thus needs // the a component.InstanceID as a parameter. -func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) ServiceStatusFunc { - var fsmMap sync.Map - return func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) { - f, ok := fsmMap.Load(id) - if !ok { - f = newFSM(func(ev *component.StatusEvent) { - notifyStatusChange(id, ev) - }) - if val, loaded := fsmMap.LoadOrStore(id, f); loaded { - f = val +func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, ServiceStatusFunc) { + init, isReady := initAndReadyFuncs() + mu := sync.Mutex{} + fsmMap := make(map[*component.InstanceID]*fsm) + return init, + func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) error { + if !isReady() { + return errStatusNotReady } + mu.Lock() + defer mu.Unlock() + fsm, ok := fsmMap[id] + if !ok { + fsm = newFSM(func(ev *component.StatusEvent) { + notifyStatusChange(id, ev) + }) + fsmMap[id] = fsm + } + return fsm.Transition(status, opts...) } - _ = f.(*fsm).Event(status, opts...) - } + } // NewComponentStatusFunc returns a function to be used as ReportComponentStatus for // component.TelemetrySettings, which differs from servicetelemetry.Settings in that // the component version is tied to specific component instance. func NewComponentStatusFunc(id *component.InstanceID, srvStatus ServiceStatusFunc) component.StatusFunc { - return func(status component.Status, opts ...component.StatusEventOption) { - srvStatus(id, status, opts...) + return func(status component.Status, opts ...component.StatusEventOption) error { + return srvStatus(id, status, opts...) } } diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index e11f2cf5f13..884ac056161 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -4,6 +4,7 @@ package status import ( + "sync" "testing" "github.com/stretchr/testify/assert" @@ -131,7 +132,7 @@ func TestStatusFSM(t *testing.T) { errorCount := 0 for _, status := range tc.reportedStatuses { - if err := fsm.Event(status); err != nil { + if err := fsm.Transition(status); err != nil { errorCount++ require.ErrorIs(t, err, errInvalidStateTransition) } @@ -145,9 +146,11 @@ func TestStatusFSM(t *testing.T) { func TestStatusEventError(t *testing.T) { fsm := newFSM(func(*component.StatusEvent) {}) - fsm.Event(component.StatusStarting) + err := fsm.Transition(component.StatusStarting) + require.NoError(t, err) + // the combination of StatusOK with an error is invalid - err := fsm.Event(component.StatusOK, component.WithError(assert.AnError)) + err = fsm.Transition(component.StatusOK, component.WithError(assert.AnError)) require.Error(t, err) require.ErrorIs(t, err, component.ErrStatusEventInvalidArgument) @@ -183,18 +186,62 @@ func TestStatusFuncs(t *testing.T) { id2: statuses2, } - serviceStatusFn := NewServiceStatusFunc(statusFunc) - + init, serviceStatusFn := NewServiceStatusFunc(statusFunc) comp1Func := NewComponentStatusFunc(id1, serviceStatusFn) comp2Func := NewComponentStatusFunc(id2, serviceStatusFn) + init() for _, st := range statuses1 { - comp1Func(st) + require.NoError(t, comp1Func(st)) } for _, st := range statuses2 { - comp2Func(st) + require.NoError(t, comp2Func(st)) } require.Equal(t, expectedStatuses, actualStatuses) } + +func TestStatusFuncsConcurrent(t *testing.T) { + ids := []*component.InstanceID{{}, {}, {}, {}} + count := 0 + statusFunc := func(id *component.InstanceID, ev *component.StatusEvent) { + count++ + } + init, serviceStatusFn := NewServiceStatusFunc(statusFunc) + init() + + wg := sync.WaitGroup{} + wg.Add(len(ids)) + + for _, id := range ids { + id := id + go func() { + compFn := NewComponentStatusFunc(id, serviceStatusFn) + _ = compFn(component.StatusStarting) + for i := 0; i < 1000; i++ { + _ = compFn(component.StatusRecoverableError) + _ = compFn(component.StatusOK) + } + wg.Done() + }() + } + + wg.Wait() + require.Equal(t, 8004, count) +} + +func TestStatusFuncReady(t *testing.T) { + statusFunc := func(*component.InstanceID, *component.StatusEvent) {} + init, serviceStatusFn := NewServiceStatusFunc(statusFunc) + id := &component.InstanceID{} + + err := serviceStatusFn(id, component.StatusStarting) + require.Error(t, err) + require.ErrorIs(t, err, errStatusNotReady) + + init() + + err = serviceStatusFn(id, component.StatusStarting) + require.NoError(t, err) +} diff --git a/service/service.go b/service/service.go index a7187350369..bc3339df673 100644 --- a/service/service.go +++ b/service/service.go @@ -75,6 +75,7 @@ type Service struct { host *serviceHost telemetryInitializer *telemetryInitializer collectorConf *confmap.Conf + statusInit status.InitFunc } func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { @@ -113,8 +114,7 @@ func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { MetricsLevel: cfg.Telemetry.Metrics.Level, // Construct telemetry attributes from build info and config's resource attributes. - Resource: pcommonRes, - ReportComponentStatus: status.NewServiceStatusFunc(srv.host.notifyComponentStatusChange), + Resource: pcommonRes, } if err = srv.telemetryInitializer.init(res, srv.telemetrySettings, cfg.Telemetry, set.AsyncErrorChannel); err != nil { @@ -122,6 +122,8 @@ func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { } srv.telemetrySettings.MeterProvider = srv.telemetryInitializer.mp srv.telemetrySettings.TracerProvider = srv.telemetryInitializer.tp + srv.statusInit, srv.telemetrySettings.ReportComponentStatus = + status.NewServiceStatusFunc(srv.host.notifyComponentStatusChange) // process the configuration and initialize the pipeline if err = srv.initExtensionsAndPipeline(ctx, set, cfg); err != nil { @@ -143,6 +145,8 @@ func (srv *Service) Start(ctx context.Context) error { zap.Int("NumCPU", runtime.NumCPU()), ) + srv.statusInit() + if err := srv.host.serviceExtensions.Start(ctx, srv.host); err != nil { return fmt.Errorf("failed to start extensions: %w", err) } From 7d08aa582e08b094d1552b523bb17b830e5030ad Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sun, 10 Sep 2023 16:08:04 -0700 Subject: [PATCH 14/40] Cleanup --- component/componenttest/nop_host.go | 2 -- component/componenttest/nop_host_test.go | 1 - component/host.go | 2 +- component/status.go | 1 + component/telemetry.go | 2 ++ otelcol/collector_test.go | 6 ++-- .../processortest/unhealthy_processor.go | 2 +- service/extensions/extensions.go | 6 +--- service/internal/components/host_wrapper.go | 2 +- .../internal/components/host_wrapper_test.go | 3 +- .../servicetelemetry/nop_settings_test.go | 4 +-- service/internal/servicetelemetry/settings.go | 7 +++- .../servicetelemetry/settings_test.go | 34 +++++++++++++++++++ service/internal/status/status.go | 20 ++++++++--- service/internal/status/status_test.go | 6 ++-- service/service.go | 1 + service/service_test.go | 31 ++++++++--------- 17 files changed, 88 insertions(+), 42 deletions(-) create mode 100644 service/internal/servicetelemetry/settings_test.go diff --git a/component/componenttest/nop_host.go b/component/componenttest/nop_host.go index 3518d584ac0..4accfab0d8c 100644 --- a/component/componenttest/nop_host.go +++ b/component/componenttest/nop_host.go @@ -17,8 +17,6 @@ func NewNopHost() component.Host { func (nh *nopHost) ReportFatalError(_ error) {} -func (nh *nopHost) ReportComponentStatus(_ component.Status, _ ...component.StatusEventOption) {} - func (nh *nopHost) GetFactory(_ component.Kind, _ component.Type) component.Factory { return nil } diff --git a/component/componenttest/nop_host_test.go b/component/componenttest/nop_host_test.go index f6ca2a0b5f8..1bcb92d1744 100644 --- a/component/componenttest/nop_host_test.go +++ b/component/componenttest/nop_host_test.go @@ -19,7 +19,6 @@ func TestNewNopHost(t *testing.T) { require.IsType(t, &nopHost{}, nh) nh.ReportFatalError(errors.New("TestError")) - assert.Nil(t, nh.GetExporters()) // nolint: staticcheck assert.Nil(t, nh.GetExtensions()) assert.Nil(t, nh.GetFactory(component.KindReceiver, "test")) diff --git a/component/host.go b/component/host.go index 44526b52d46..9b963727d1f 100644 --- a/component/host.go +++ b/component/host.go @@ -12,7 +12,7 @@ type Host interface { // // ReportFatalError should be called by the component anytime after Component.Start() ends and // before Component.Shutdown() begins. - // Deprecated: [0.65.0] Use ReportComponentStatus instead (with an event component.StatusFatalError) + // Deprecated: [x.x.x] Use ReportComponentStatus instead (with an event component.StatusFatalError) ReportFatalError(err error) // GetFactory of the specified kind. Returns the factory for a component type. diff --git a/component/status.go b/component/status.go index f6399a0dea5..e09ecbf90f1 100644 --- a/component/status.go +++ b/component/status.go @@ -137,4 +137,5 @@ type StatusWatcher interface { ComponentStatusChanged(source *InstanceID, event *StatusEvent) } +// StatusFunc is the expected type of ReportComponentStatus for compoment.TelemetrySettings type StatusFunc func(Status, ...StatusEventOption) error diff --git a/component/telemetry.go b/component/telemetry.go index eb700a19851..7c47bf2af2e 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -44,4 +44,6 @@ type TelemetrySettingsBase[T any] struct { ReportComponentStatus T } +// TelemetrySettings and servicetelemetry.Settings differ in the method signature for +// ReportComponentStatus type TelemetrySettings TelemetrySettingsBase[StatusFunc] diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index c8a0b10accd..77a3712943c 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -158,7 +158,7 @@ func TestComponentStatusWatcher(t *testing.T) { assert.NoError(t, err) // Use a processor factory that creates "unhealthy" processor: one that - // always reports StatusError after successful Start. + // always reports StatusRecoverableError after successful Start. unhealthyProcessorFactory := processortest.NewUnhealthyProcessorFactory() factories.Processors[unhealthyProcessorFactory.Type()] = unhealthyProcessorFactory @@ -194,7 +194,7 @@ func TestComponentStatusWatcher(t *testing.T) { // Start the newly created collector. wg := startCollector(context.Background(), t, col) - // The "unhealthy" processors will now begin to asynchronously report StatusError. + // The "unhealthy" processors will now begin to asynchronously report StatusRecoverableError. // We expect to see these reports. assert.Eventually(t, func() bool { mux.Lock() @@ -203,7 +203,7 @@ func TestComponentStatusWatcher(t *testing.T) { for k, v := range changedComponents { // All processors must report a status change with the same ID assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID) - // And all must be in StatusError + // And all must be in StatusRecoverableError assert.EqualValues(t, component.StatusRecoverableError, v) } // We have 3 processors with exactly the same ID in otelcol-statuswatcher.yaml diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 979bfbdd0fd..0ae30e50ec8 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -62,7 +62,7 @@ type unhealthyProcessor struct { telemetry component.TelemetrySettings } -func (p unhealthyProcessor) Start(_ context.Context, host component.Host) error { +func (p unhealthyProcessor) Start(_ context.Context, _ component.Host) error { go func() { _ = p.telemetry.ReportComponentStatus(component.StatusRecoverableError) }() diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 89ec6a94d89..8388f4c6334 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -145,17 +145,13 @@ func New(ctx context.Context, set Settings, cfg Config) (*Extensions, error) { extMap: make(map[component.ID]extension.Extension), } for _, extID := range cfg { - instanceID := &component.InstanceID{ ID: extID, Kind: component.KindExtension, } - - telSet := set.Telemetry.ToComponentTelemetrySettings(instanceID) - extSet := extension.CreateSettings{ ID: extID, - TelemetrySettings: telSet, + TelemetrySettings: set.Telemetry.ToComponentTelemetrySettings(instanceID), BuildInfo: set.BuildInfo, } extSet.TelemetrySettings.Logger = components.ExtensionLogger(set.Telemetry.Logger, extID) diff --git a/service/internal/components/host_wrapper.go b/service/internal/components/host_wrapper.go index 07ff2a741d7..2d386ddad67 100644 --- a/service/internal/components/host_wrapper.go +++ b/service/internal/components/host_wrapper.go @@ -27,7 +27,7 @@ func NewHostWrapper(host component.Host, logger *zap.Logger) component.Host { func (hw *hostWrapper) ReportFatalError(err error) { // The logger from the built component already identifies the component. hw.Logger.Error("Component fatal error", zap.Error(err)) - hw.Host.ReportFatalError(err) // nolint:staticcheck + hw.Host.ReportFatalError(err) } // RegisterZPages is used by zpages extension to register handles from service. diff --git a/service/internal/components/host_wrapper_test.go b/service/internal/components/host_wrapper_test.go index 25567810fff..62b7a744681 100644 --- a/service/internal/components/host_wrapper_test.go +++ b/service/internal/components/host_wrapper_test.go @@ -7,8 +7,9 @@ import ( "errors" "testing" - "go.opentelemetry.io/collector/component/componenttest" "go.uber.org/zap" + + "go.opentelemetry.io/collector/component/componenttest" ) func Test_newHostWrapper(_ *testing.T) { diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_settings_test.go index d2463f5d1f0..90cdec4a9b6 100644 --- a/service/internal/servicetelemetry/nop_settings_test.go +++ b/service/internal/servicetelemetry/nop_settings_test.go @@ -7,13 +7,13 @@ import ( "testing" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/config/configtelemetry" "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/otel/metric/noop" - "go.opentelemetry.io/otel/trace" ) func TestNewNopSettings(t *testing.T) { diff --git a/service/internal/servicetelemetry/settings.go b/service/internal/servicetelemetry/settings.go index 43e18d972ca..c665bf27bb5 100644 --- a/service/internal/servicetelemetry/settings.go +++ b/service/internal/servicetelemetry/settings.go @@ -1,15 +1,20 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package servicetelemetry // import "go.opentelemetry.io/collector/internal/servicetelemetry" +package servicetelemetry // import "go.opentelemetry.io/collector/service/internal/servicetelemetry" import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/service/internal/status" ) +// Settings mirrors component.TelemetrySettings except for the method signature of +// ReportComponentStatus. The service level Settings is not bound a specific component, and +// therefore takes a component.InstanceID as an argument. type Settings component.TelemetrySettingsBase[status.ServiceStatusFunc] +// ToComponentTelemetrySettings returns a TelemetrySettings for a specific component derived from +// this service level Settings object. func (s Settings) ToComponentTelemetrySettings(id *component.InstanceID) component.TelemetrySettings { return component.TelemetrySettings{ Logger: s.Logger, diff --git a/service/internal/servicetelemetry/settings_test.go b/service/internal/servicetelemetry/settings_test.go new file mode 100644 index 00000000000..d5b3ee6a1e1 --- /dev/null +++ b/service/internal/servicetelemetry/settings_test.go @@ -0,0 +1,34 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package servicetelemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config/configtelemetry" + "go.opentelemetry.io/collector/pdata/pcommon" +) + +func TestSettings(t *testing.T) { + set := Settings{ + Logger: zap.NewNop(), + TracerProvider: trace.NewNoopTracerProvider(), + MeterProvider: noop.NewMeterProvider(), + MetricsLevel: configtelemetry.LevelNone, + Resource: pcommon.NewResource(), + ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) error { + return nil + }, + } + require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.StatusOK)) + + compSet := set.ToComponentTelemetrySettings(&component.InstanceID{}) + require.NoError(t, compSet.ReportComponentStatus(component.StatusOK)) +} diff --git a/service/internal/status/status.go b/service/internal/status/status.go index 38d91f5ea39..908e847e527 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -14,6 +14,7 @@ import ( // onTransitionFunc receives a component.StatusEvent on a successful state transition type onTransitionFunc func(*component.StatusEvent) +// errInvalidStateTransition is returned for invalid state transitions var errInvalidStateTransition = errors.New("invalid state transition") // fsm is a finite state machine that models transitions for component status @@ -23,10 +24,10 @@ type fsm struct { onTransition onTransitionFunc } -// Transition will attempt to execute a state transition. If successful, it calls the onTransitionFunc -// with a StatusEvent representing the new state. Returns an error if the arguments result in an -// invalid status, or if the state transition is not valid. -func (m *fsm) Transition(status component.Status, options ...component.StatusEventOption) error { +// transition will attempt to execute a state transition. If it's successful, it calls the +// onTransitionFunc with a StatusEvent representing the new state. Returns an error if the arguments +// result in an invalid status, or if the state transition is not valid. +func (m *fsm) transition(status component.Status, options ...component.StatusEventOption) error { if _, ok := m.transitions[m.current.Status()][status]; !ok { return fmt.Errorf( "cannot transition from %s to %s: %w", @@ -93,9 +94,13 @@ func newFSM(onTransition onTransitionFunc) *fsm { } } +// InitFunc can be used to toggle a ready flag to true type InitFunc func() + +// readFunc can be used to check the value of a ready flag type readyFunc func() bool +// initAndReadyFuncs returns a pair of functions to set and check a boolean ready flag func initAndReadyFuncs() (InitFunc, readyFunc) { mu := sync.RWMutex{} isReady := false @@ -115,9 +120,13 @@ func initAndReadyFuncs() (InitFunc, readyFunc) { return init, ready } +// NotifyStatusFunc is the receiver of status events after successful state transitions type NotifyStatusFunc func(*component.InstanceID, *component.StatusEvent) + +// ServiceStatusFunc is the expected type of ReportComponentStatus for servicetelemetry.Settings type ServiceStatusFunc func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) error +// errStatusNotReady is returned when trying to report status before service start var errStatusNotReady = errors.New("report component status is not ready until service start") // NewServiceStatusFunc returns a function to be used as ReportComponentStatus for @@ -126,6 +135,7 @@ var errStatusNotReady = errors.New("report component status is not ready until s // the a component.InstanceID as a parameter. func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, ServiceStatusFunc) { init, isReady := initAndReadyFuncs() + // mu synchronizes access to the fsmMap and the underlying fsm during a state transition mu := sync.Mutex{} fsmMap := make(map[*component.InstanceID]*fsm) return init, @@ -142,7 +152,7 @@ func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, Servic }) fsmMap[id] = fsm } - return fsm.Transition(status, opts...) + return fsm.transition(status, opts...) } } diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index 884ac056161..a51890e2d9c 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -132,7 +132,7 @@ func TestStatusFSM(t *testing.T) { errorCount := 0 for _, status := range tc.reportedStatuses { - if err := fsm.Transition(status); err != nil { + if err := fsm.transition(status); err != nil { errorCount++ require.ErrorIs(t, err, errInvalidStateTransition) } @@ -146,11 +146,11 @@ func TestStatusFSM(t *testing.T) { func TestStatusEventError(t *testing.T) { fsm := newFSM(func(*component.StatusEvent) {}) - err := fsm.Transition(component.StatusStarting) + err := fsm.transition(component.StatusStarting) require.NoError(t, err) // the combination of StatusOK with an error is invalid - err = fsm.Transition(component.StatusOK, component.WithError(assert.AnError)) + err = fsm.transition(component.StatusOK, component.WithError(assert.AnError)) require.Error(t, err) require.ErrorIs(t, err, component.ErrStatusEventInvalidArgument) diff --git a/service/service.go b/service/service.go index bc3339df673..3ef1f6197b1 100644 --- a/service/service.go +++ b/service/service.go @@ -145,6 +145,7 @@ func (srv *Service) Start(ctx context.Context) error { zap.Int("NumCPU", runtime.NumCPU()), ) + // enable status reporting srv.statusInit() if err := srv.host.serviceExtensions.Start(ctx, srv.host); err != nil { diff --git a/service/service_test.go b/service/service_test.go index faa004875a5..af2765df287 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -415,27 +415,26 @@ func TestServiceTelemetryLogger(t *testing.T) { } func TestServiceFatalError(t *testing.T) { - //TODO: restore this test - // set := newNopSettings() - // set.AsyncErrorChannel = make(chan error) + set := newNopSettings() + set.AsyncErrorChannel = make(chan error) - // srv, err := New(context.Background(), set, newNopConfig()) - // require.NoError(t, err) + srv, err := New(context.Background(), set, newNopConfig()) + require.NoError(t, err) - // assert.NoError(t, srv.Start(context.Background())) - // t.Cleanup(func() { - // assert.NoError(t, srv.Shutdown(context.Background())) - // }) + assert.NoError(t, srv.Start(context.Background())) + t.Cleanup(func() { + assert.NoError(t, srv.Shutdown(context.Background())) + }) - // go func() { - // ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) - // srv.host.ReportComponentStatus(&component.InstanceID{}, ev) - // }() + go func() { + ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) + srv.host.notifyComponentStatusChange(&component.InstanceID{}, ev) + }() - // err = <-srv.host.asyncErrorChannel + err = <-srv.host.asyncErrorChannel - // require.Error(t, err) - // require.ErrorIs(t, err, assert.AnError) + require.Error(t, err) + require.ErrorIs(t, err, assert.AnError) } func assertResourceLabels(t *testing.T, res pcommon.Resource, expectedLabels map[string]labelValue) { From 7f35ad5fdc9605b1b39e87ad4c083b625e18c3a6 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Mon, 11 Sep 2023 09:53:47 -0700 Subject: [PATCH 15/40] Do not automatically report StatusOK during startup This needs to be left to the component to report, as there is a race condition for components that startup asynchronous work from their Start methods. --- otelcol/collector_test.go | 23 ++++++++++++++++++----- service/internal/graph/graph.go | 2 -- service/internal/graph/graph_test.go | 7 ------- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index 77a3712943c..e9fad20f7d0 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -163,15 +163,15 @@ func TestComponentStatusWatcher(t *testing.T) { factories.Processors[unhealthyProcessorFactory.Type()] = unhealthyProcessorFactory // Keep track of all status changes in a map. - changedComponents := map[*component.InstanceID]component.Status{} + changedComponents := map[*component.InstanceID][]component.Status{} var mux sync.Mutex onStatusChanged := func(source *component.InstanceID, event *component.StatusEvent) { - if event.Status() != component.StatusRecoverableError { + if source.ID.Type() != unhealthyProcessorFactory.Type() { return } mux.Lock() defer mux.Unlock() - changedComponents[source] = event.Status() + changedComponents[source] = append(changedComponents[source], event.Status()) } // Add a "statuswatcher" extension that will receive notifications when processor @@ -194,6 +194,12 @@ func TestComponentStatusWatcher(t *testing.T) { // Start the newly created collector. wg := startCollector(context.Background(), t, col) + // An unhealthy processor asynchronously reports a recoverable error. + expectedStatuses := []component.Status{ + component.StatusStarting, + component.StatusRecoverableError, + } + // The "unhealthy" processors will now begin to asynchronously report StatusRecoverableError. // We expect to see these reports. assert.Eventually(t, func() bool { @@ -203,8 +209,8 @@ func TestComponentStatusWatcher(t *testing.T) { for k, v := range changedComponents { // All processors must report a status change with the same ID assert.EqualValues(t, component.NewID(unhealthyProcessorFactory.Type()), k.ID) - // And all must be in StatusRecoverableError - assert.EqualValues(t, component.StatusRecoverableError, v) + // And all must have the expected statuses + assert.Equal(t, expectedStatuses, v) } // We have 3 processors with exactly the same ID in otelcol-statuswatcher.yaml // We must have exactly 3 items in our map. This ensures that the "source" argument @@ -216,6 +222,13 @@ func TestComponentStatusWatcher(t *testing.T) { col.Shutdown() wg.Wait() + + // Check for additional statuses after Shutdown. + expectedStatuses = append(expectedStatuses, component.StatusStopping, component.StatusStopped) + for _, v := range changedComponents { + assert.Equal(t, expectedStatuses, v) + } + assert.Equal(t, StateClosed, col.GetState()) } diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index ddbcd64697e..c54ce95f011 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -384,8 +384,6 @@ func (g *Graph) StartAll(ctx context.Context, host component.Host) error { _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) return compErr } - - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusOK) } return nil } diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index ad7eaf045b7..8456496bb0e 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -2143,13 +2143,11 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rNoErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), }, instanceIDs[eNoErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), }, @@ -2176,7 +2174,6 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { }, instanceIDs[eNoErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), }, @@ -2189,13 +2186,11 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rSdErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), }, instanceIDs[eNoErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), }, @@ -2208,13 +2203,11 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rNoErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), }, instanceIDs[eSdErr]: { newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusOK, component.WithTimestamp(now)), newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), }, From 26e59f13d1335fceaaccca0a00f23996816da08a Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 13 Sep 2023 18:26:11 -0700 Subject: [PATCH 16/40] Refactor instanceID creation --- service/internal/graph/graph.go | 72 ++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index c54ce95f011..e5acf7f32ba 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -90,12 +90,8 @@ func (g *Graph) createNodes(set Settings) error { connectorsAsReceiver[recvID] = append(connectorsAsReceiver[recvID], pipelineID) continue } - rcvrNode := g.createReceiver(pipelineID.Type(), recvID) + rcvrNode := g.createReceiver(pipelineID, recvID) pipe.receivers[rcvrNode.ID()] = rcvrNode - g.instanceIDs[rcvrNode.ID()] = &component.InstanceID{ - ID: recvID, - Kind: component.KindReceiver, - } } pipe.capabilitiesNode = newCapabilitiesNode(pipelineID) @@ -103,13 +99,6 @@ func (g *Graph) createNodes(set Settings) error { for _, procID := range pipelineCfg.Processors { procNode := g.createProcessor(pipelineID, procID) pipe.processors = append(pipe.processors, procNode) - g.instanceIDs[procNode.ID()] = &component.InstanceID{ - ID: procID, - Kind: component.KindProcessor, - PipelineIDs: map[component.ID]struct{}{ - pipelineID: {}, - }, - } } pipe.fanOutNode = newFanOutNode(pipelineID) @@ -120,15 +109,8 @@ func (g *Graph) createNodes(set Settings) error { connectorsAsExporter[exprID] = append(connectorsAsExporter[exprID], pipelineID) continue } - expNode := g.createExporter(pipelineID.Type(), exprID) + expNode := g.createExporter(pipelineID, exprID) pipe.exporters[expNode.ID()] = expNode - g.instanceIDs[expNode.ID()] = &component.InstanceID{ - ID: expNode.componentID, - Kind: component.KindExporter, - PipelineIDs: map[component.ID]struct{}{ - pipelineID: {}, - }, - } } } @@ -186,50 +168,76 @@ func (g *Graph) createNodes(set Settings) error { g.pipelines[eID].exporters[connNode.ID()] = connNode g.pipelines[rID].receivers[connNode.ID()] = connNode - g.instanceIDs[connNode.ID()] = &component.InstanceID{ - ID: connNode.componentID, - Kind: component.KindConnector, - PipelineIDs: map[component.ID]struct{}{ - eID: {}, - rID: {}, - }, - } } } } return nil } -func (g *Graph) createReceiver(pipelineType component.DataType, recvID component.ID) *receiverNode { - rcvrNode := newReceiverNode(pipelineType, recvID) +func (g *Graph) createReceiver(pipelineID, recvID component.ID) *receiverNode { + rcvrNode := newReceiverNode(pipelineID.Type(), recvID) if node := g.componentGraph.Node(rcvrNode.ID()); node != nil { + g.instanceIDs[node.ID()].PipelineIDs[pipelineID] = struct{}{} return node.(*receiverNode) } g.componentGraph.AddNode(rcvrNode) + g.instanceIDs[rcvrNode.ID()] = &component.InstanceID{ + ID: recvID, + Kind: component.KindReceiver, + PipelineIDs: map[component.ID]struct{}{ + pipelineID: {}, + }, + } return rcvrNode } func (g *Graph) createProcessor(pipelineID, procID component.ID) *processorNode { procNode := newProcessorNode(pipelineID, procID) g.componentGraph.AddNode(procNode) + g.instanceIDs[procNode.ID()] = &component.InstanceID{ + ID: procID, + Kind: component.KindProcessor, + PipelineIDs: map[component.ID]struct{}{ + pipelineID: {}, + }, + } return procNode } -func (g *Graph) createExporter(pipelineType component.DataType, exprID component.ID) *exporterNode { - expNode := newExporterNode(pipelineType, exprID) +func (g *Graph) createExporter(pipelineID, exprID component.ID) *exporterNode { + expNode := newExporterNode(pipelineID.Type(), exprID) if node := g.componentGraph.Node(expNode.ID()); node != nil { + g.instanceIDs[expNode.ID()].PipelineIDs[pipelineID] = struct{}{} return node.(*exporterNode) } g.componentGraph.AddNode(expNode) + g.instanceIDs[expNode.ID()] = &component.InstanceID{ + ID: expNode.componentID, + Kind: component.KindExporter, + PipelineIDs: map[component.ID]struct{}{ + pipelineID: {}, + }, + } return expNode } func (g *Graph) createConnector(exprPipelineID, rcvrPipelineID, connID component.ID) *connectorNode { connNode := newConnectorNode(exprPipelineID.Type(), rcvrPipelineID.Type(), connID) if node := g.componentGraph.Node(connNode.ID()); node != nil { + instanceID := g.instanceIDs[connNode.ID()] + instanceID.PipelineIDs[exprPipelineID] = struct{}{} + instanceID.PipelineIDs[rcvrPipelineID] = struct{}{} return node.(*connectorNode) } g.componentGraph.AddNode(connNode) + g.instanceIDs[connNode.ID()] = &component.InstanceID{ + ID: connNode.componentID, + Kind: component.KindConnector, + PipelineIDs: map[component.ID]struct{}{ + exprPipelineID: {}, + rcvrPipelineID: {}, + }, + } return connNode } From 67676a47345c90b455a948281b977ae88eef286a Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 14 Sep 2023 14:35:25 -0700 Subject: [PATCH 17/40] More accurate comments --- component/host.go | 3 ++- component/status.go | 3 ++- extension/extensiontest/statuswatcher_extension.go | 5 +++-- service/host.go | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/component/host.go b/component/host.go index 9b963727d1f..98f4fd79d3c 100644 --- a/component/host.go +++ b/component/host.go @@ -12,7 +12,8 @@ type Host interface { // // ReportFatalError should be called by the component anytime after Component.Start() ends and // before Component.Shutdown() begins. - // Deprecated: [x.x.x] Use ReportComponentStatus instead (with an event component.StatusFatalError) + // Deprecated: [x.x.x] Use TelemetrySettings.ReportComponentStatus instead (with an event + // component.StatusFatalError) ReportFatalError(err error) // GetFactory of the specified kind. Returns the factory for a component type. diff --git a/component/status.go b/component/status.go index e09ecbf90f1..ce3f5d3f0dc 100644 --- a/component/status.go +++ b/component/status.go @@ -81,7 +81,8 @@ type StatusEventOption func(*StatusEvent) error var ErrStatusEventInvalidArgument = errors.New("status event argument error") // WithError sets the error object of the StatusEvent. It is optional -// and should only be applied to an Event of type ComponentError. +// and should only be applied to an event with an error status (e.g. StatusRecoverableError, +// StatusPermanentError, or StatusFatalError). func WithError(err error) StatusEventOption { return func(o *StatusEvent) error { if _, ok := errorStatuses[o.status]; !ok { diff --git a/extension/extensiontest/statuswatcher_extension.go b/extension/extensiontest/statuswatcher_extension.go index 92ea355839a..e501353dcc6 100644 --- a/extension/extensiontest/statuswatcher_extension.go +++ b/extension/extensiontest/statuswatcher_extension.go @@ -19,7 +19,7 @@ func NewStatusWatcherExtensionCreateSettings() extension.CreateSettings { } } -// NewStatusWatcherExtensionFactory returns a component.ExtensionFactory that constructs nop extensions. +// NewStatusWatcherExtensionFactory returns a component.ExtensionFactory to construct a status watcher extension. func NewStatusWatcherExtensionFactory( onStatusChanged func(source *component.InstanceID, event *component.StatusEvent), ) extension.Factory { @@ -34,7 +34,8 @@ func NewStatusWatcherExtensionFactory( component.StabilityLevelStable) } -// statusWatcherExtension stores consumed traces and metrics for testing purposes. +// statusWatcherExtension receives status events reported via component status reporting for testing +// purposes. type statusWatcherExtension struct { component.StartFunc component.ShutdownFunc diff --git a/service/host.go b/service/host.go index c0a44371c35..945ec864a36 100644 --- a/service/host.go +++ b/service/host.go @@ -33,7 +33,7 @@ type serviceHost struct { // ReportFatalError is used to report to the host that the receiver encountered // a fatal error (i.e.: an error that the instance can't recover from) after // its start function has already returned. -// Deprecated: [x.x.x] Replaced by ReportComponentStatus +// Deprecated: [x.x.x] Replaced by servicetelemetry.Settings.ReportComponentStatus func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } From e0a891e2320629231234ca8f19b8251bea35b9c6 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 14 Sep 2023 15:05:14 -0700 Subject: [PATCH 18/40] Fix state transitions to StatusStopped --- service/internal/status/status.go | 3 --- service/internal/status/status_test.go | 37 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/service/internal/status/status.go b/service/internal/status/status.go index 908e847e527..b497e3d8db0 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -65,21 +65,18 @@ func newFSM(onTransition onTransitionFunc) *fsm { component.StatusPermanentError: {}, component.StatusFatalError: {}, component.StatusStopping: {}, - component.StatusStopped: {}, }, component.StatusOK: { component.StatusRecoverableError: {}, component.StatusPermanentError: {}, component.StatusFatalError: {}, component.StatusStopping: {}, - component.StatusStopped: {}, }, component.StatusRecoverableError: { component.StatusOK: {}, component.StatusPermanentError: {}, component.StatusFatalError: {}, component.StatusStopping: {}, - component.StatusStopped: {}, }, component.StatusPermanentError: {}, component.StatusFatalError: {}, diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index a51890e2d9c..0c7fb425092 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -4,6 +4,7 @@ package status import ( + "fmt" "sync" "testing" @@ -144,6 +145,42 @@ func TestStatusFSM(t *testing.T) { } } +func TestValidSeqsToStopped(t *testing.T) { + statuses := []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusRecoverableError, + component.StatusPermanentError, + component.StatusFatalError, + } + + for _, status := range statuses { + name := fmt.Sprintf("transition from: %s to: %s invalid", status, component.StatusStopped) + t.Run(name, func(t *testing.T) { + fsm := newFSM(func(*component.StatusEvent) {}) + if status != component.StatusStarting { + require.NoError(t, fsm.transition(component.StatusStarting)) + } + require.NoError(t, fsm.transition(status)) + // skipping to stopped is not allowed + err := fsm.transition(component.StatusStopped) + require.Error(t, err) + require.ErrorIs(t, err, errInvalidStateTransition) + + // stopping -> stopped is allowed for non-fatal, non-permanent errors + err = fsm.transition(component.StatusStopping) + if status == component.StatusPermanentError || status == component.StatusFatalError { + require.Error(t, err) + require.ErrorIs(t, err, errInvalidStateTransition) + } else { + require.NoError(t, err) + require.NoError(t, fsm.transition(component.StatusStopped)) + } + }) + } + +} + func TestStatusEventError(t *testing.T) { fsm := newFSM(func(*component.StatusEvent) {}) err := fsm.transition(component.StatusStarting) From a0c106c21e2b03f2ec55fec0f3851fbbce26631c Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 14 Sep 2023 18:01:33 -0700 Subject: [PATCH 19/40] Remove functional options; replace with per-error type event constructors --- component/componenttest/nop_telemetry.go | 2 +- component/status.go | 83 ++++++------------- component/status_test.go | 50 +++-------- .../processortest/unhealthy_processor.go | 2 +- service/internal/graph/graph.go | 10 +-- service/internal/graph/graph_test.go | 80 +++++++++--------- .../internal/servicetelemetry/nop_settings.go | 2 +- .../servicetelemetry/nop_settings_test.go | 2 +- .../servicetelemetry/settings_test.go | 6 +- service/internal/status/status.go | 26 ++---- service/internal/status/status_test.go | 59 +++++-------- service/service_test.go | 2 +- 12 files changed, 123 insertions(+), 201 deletions(-) diff --git a/component/componenttest/nop_telemetry.go b/component/componenttest/nop_telemetry.go index 1aeb60a2290..a14abfb8978 100644 --- a/component/componenttest/nop_telemetry.go +++ b/component/componenttest/nop_telemetry.go @@ -21,7 +21,7 @@ func NewNopTelemetrySettings() component.TelemetrySettings { MeterProvider: noop.NewMeterProvider(), MetricsLevel: configtelemetry.LevelNone, Resource: pcommon.NewResource(), - ReportComponentStatus: func(component.Status, ...component.StatusEventOption) error { + ReportComponentStatus: func(*component.StatusEvent) error { return nil }, } diff --git a/component/status.go b/component/status.go index ce3f5d3f0dc..3d9cbdae3ed 100644 --- a/component/status.go +++ b/component/status.go @@ -4,8 +4,6 @@ package component // import "go.opentelemetry.io/collector/component" import ( - "errors" - "fmt" "time" ) @@ -44,13 +42,6 @@ func (s Status) String() string { return "StatusNone" } -// errorStatuses is a set of statuses that can have associated errors -var errorStatuses = map[Status]struct{}{ - StatusRecoverableError: {}, - StatusPermanentError: {}, - StatusFatalError: {}, -} - // StatusEvent contains a status and timestamp, and can contain an error type StatusEvent struct { status Status @@ -73,58 +64,38 @@ func (ev *StatusEvent) Timestamp() time.Time { return ev.timestamp } -// StatusEventOption applies options to a StatusEvent. -type StatusEventOption func(*StatusEvent) error - -// ErrStatusEventInvalidArgument indicates an invalid option was specified when creating a status -// event. This will happen when using WithError for a non-error status. -var ErrStatusEventInvalidArgument = errors.New("status event argument error") - -// WithError sets the error object of the StatusEvent. It is optional -// and should only be applied to an event with an error status (e.g. StatusRecoverableError, -// StatusPermanentError, or StatusFatalError). -func WithError(err error) StatusEventOption { - return func(o *StatusEvent) error { - if _, ok := errorStatuses[o.status]; !ok { - return fmt.Errorf( - "event with %s cannot have an error: %w", - o.status, - ErrStatusEventInvalidArgument, - ) - } - o.err = err - return nil +// NewStatusEvent creates and returns a StatusEvent with the specified status and sets the timestamp +// time.Now(). To provide set an error on the event for an error status use one of the dedicated +// constructors (e.g. NewRecoverableErrorEvent, NewPermanentErrorEvent, NewFatalErrorEvent) +func NewStatusEvent(status Status) *StatusEvent { + return &StatusEvent{ + status: status, + timestamp: time.Now(), } } -// WithTimestamp is optional, when used it sets the timestamp of the StatusEvent. -func WithTimestamp(t time.Time) StatusEventOption { - return func(o *StatusEvent) error { - o.timestamp = t - return nil - } +// NewRecoverableErrorEvent creates and returns a StatusEvent with StatusRecoverableError, the +// specified error, and a timestamp set to time.Now(). +func NewRecoverableErrorEvent(err error) *StatusEvent { + ev := NewStatusEvent(StatusRecoverableError) + ev.err = err + return ev } -// NewStatusEvent creates and returns a StatusEvent with default and provided -// options. Will return an error if an error is provided for a non-error event -// type (status.ComponentOK). -// If the timestamp is not provided will set it to time.Now(). -func NewStatusEvent(status Status, options ...StatusEventOption) (*StatusEvent, error) { - ev := StatusEvent{ - status: status, - } - - for _, opt := range options { - if err := opt(&ev); err != nil { - return nil, err - } - } - - if ev.timestamp.IsZero() { - ev.timestamp = time.Now() - } +// NewPermanentErrorEvent creates and returns a StatusEvent with StatusPermanentError, the +// specified error, and a timestamp set to time.Now(). +func NewPermanentErrorEvent(err error) *StatusEvent { + ev := NewStatusEvent(StatusPermanentError) + ev.err = err + return ev +} - return &ev, nil +// NewFatalErrorEvent creates and returns a StatusEvent with StatusFatalError, the +// specified error, and a timestamp set to time.Now(). +func NewFatalErrorEvent(err error) *StatusEvent { + ev := NewStatusEvent(StatusFatalError) + ev.err = err + return ev } // StatusWatcher is an extra interface for Extension hosted by the OpenTelemetry @@ -139,4 +110,4 @@ type StatusWatcher interface { } // StatusFunc is the expected type of ReportComponentStatus for compoment.TelemetrySettings -type StatusFunc func(Status, ...StatusEventOption) error +type StatusFunc func(*StatusEvent) error diff --git a/component/status_test.go b/component/status_test.go index 2a4d0728822..db207e6ae26 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -5,13 +5,12 @@ package component import ( "fmt" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestStatusEventWithoutError(t *testing.T) { +func TestNewStatusEvent(t *testing.T) { statuses := []Status{ StatusStarting, StatusOK, @@ -24,8 +23,7 @@ func TestStatusEventWithoutError(t *testing.T) { for _, status := range statuses { t.Run(fmt.Sprintf("%s without error", status), func(t *testing.T) { - ev, err := NewStatusEvent(status) - require.NoError(t, err) + ev := NewStatusEvent(status) require.Equal(t, status, ev.Status()) require.Nil(t, ev.Err()) require.False(t, ev.Timestamp().IsZero()) @@ -33,47 +31,19 @@ func TestStatusEventWithoutError(t *testing.T) { } } -func TestStatusEventWithError(t *testing.T) { - statuses := []Status{ - StatusRecoverableError, - StatusRecoverableError, - StatusFatalError, +func TestStatusEventsWithError(t *testing.T) { + statusConstructorMap := map[Status]func(error) *StatusEvent{ + StatusRecoverableError: NewRecoverableErrorEvent, + StatusPermanentError: NewPermanentErrorEvent, + StatusFatalError: NewFatalErrorEvent, } - for _, status := range statuses { - t.Run(fmt.Sprintf("error status: %s with error", status), func(t *testing.T) { - ev, err := NewStatusEvent(status, WithError(assert.AnError)) - require.NoError(t, err) + for status, newEvent := range statusConstructorMap { + t.Run(fmt.Sprintf("error status constructor for: %s", status), func(t *testing.T) { + ev := newEvent(assert.AnError) require.Equal(t, status, ev.Status()) require.Equal(t, assert.AnError, ev.Err()) require.False(t, ev.Timestamp().IsZero()) }) } } - -func TestNonErrorStatusWithError(t *testing.T) { - statuses := []Status{ - StatusStarting, - StatusOK, - StatusStopping, - StatusStopped, - } - - for _, status := range statuses { - t.Run(fmt.Sprintf("non error status: %s with error", status), func(t *testing.T) { - ev, err := NewStatusEvent(status, WithError(assert.AnError)) - require.Error(t, err) - require.ErrorIs(t, err, ErrStatusEventInvalidArgument) - require.Nil(t, ev) - }) - } -} - -func TestStatusEventWithTimestamp(t *testing.T) { - ts := time.Now() - ev, err := NewStatusEvent(StatusOK, WithTimestamp(ts)) - require.NoError(t, err) - require.Equal(t, StatusOK, ev.Status()) - require.Nil(t, ev.Err()) - require.Equal(t, ts, ev.Timestamp()) -} diff --git a/processor/processortest/unhealthy_processor.go b/processor/processortest/unhealthy_processor.go index 0ae30e50ec8..eeb2e1b8d87 100644 --- a/processor/processortest/unhealthy_processor.go +++ b/processor/processortest/unhealthy_processor.go @@ -64,7 +64,7 @@ type unhealthyProcessor struct { func (p unhealthyProcessor) Start(_ context.Context, _ component.Host) error { go func() { - _ = p.telemetry.ReportComponentStatus(component.StatusRecoverableError) + _ = p.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusRecoverableError)) }() return nil } diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index e5acf7f32ba..c367289b7ad 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -386,10 +386,10 @@ func (g *Graph) StartAll(ctx context.Context, host component.Host) error { } instanceID := g.instanceIDs[node.ID()] - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStarting) + _ = g.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStarting)) if compErr := comp.Start(ctx, host); compErr != nil { - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) + _ = g.telemetry.ReportComponentStatus(instanceID, component.NewPermanentErrorEvent(compErr)) return compErr } } @@ -417,15 +417,15 @@ func (g *Graph) ShutdownAll(ctx context.Context) error { } instanceID := g.instanceIDs[node.ID()] - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStopping) + _ = g.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStopping)) if compErr := comp.Shutdown(ctx); compErr != nil { errs = multierr.Append(errs, compErr) - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusPermanentError, component.WithError(compErr)) + _ = g.telemetry.ReportComponentStatus(instanceID, component.NewPermanentErrorEvent(compErr)) continue } - _ = g.telemetry.ReportComponentStatus(instanceID, component.StatusStopped) + _ = g.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStopped)) } return errs } diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index 8456496bb0e..5257344f9ca 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -9,7 +9,6 @@ import ( "fmt" "sync" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -2123,12 +2122,21 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { eSdErr: {ID: eSdErr.id}, } - newStatusEvent := func(status component.Status, opts ...component.StatusEventOption) *component.StatusEvent { - ev, _ := component.NewStatusEvent(status, opts...) - return ev - } + // compare two maps of status events ignoring timestamp + assertEqualStatuses := func(t *testing.T, evMap1, evMap2 map[*component.InstanceID][]*component.StatusEvent) { + assert.Equal(t, len(evMap1), len(evMap2)) + for id, evts1 := range evMap1 { + evts2 := evMap2[id] + assert.Equal(t, len(evts1), len(evts2)) + for i := 0; i < len(evts1); i++ { + ev1 := evts1[i] + ev2 := evts2[i] + assert.Equal(t, ev1.Status(), ev2.Status()) + assert.Equal(t, ev1.Err(), ev2.Err()) + } + } - now := time.Now() + } for _, tc := range []struct { name string @@ -2142,14 +2150,14 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { edge: [2]*testNode{rNoErr, eNoErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rNoErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), }, instanceIDs[eNoErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), }, }, }, @@ -2158,8 +2166,8 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { edge: [2]*testNode{rNoErr, eStErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[eStErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + component.NewStatusEvent(component.StatusStarting), + component.NewPermanentErrorEvent(assert.AnError), }, }, startupErr: assert.AnError, @@ -2169,13 +2177,13 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { edge: [2]*testNode{rStErr, eNoErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rStErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + component.NewStatusEvent(component.StatusStarting), + component.NewPermanentErrorEvent(assert.AnError), }, instanceIDs[eNoErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), }, }, startupErr: assert.AnError, @@ -2185,14 +2193,14 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { edge: [2]*testNode{rSdErr, eNoErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rSdErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewPermanentErrorEvent(assert.AnError), }, instanceIDs[eNoErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), }, }, shutdownErr: assert.AnError, @@ -2202,14 +2210,14 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { edge: [2]*testNode{rNoErr, eSdErr}, expectedStatuses: map[*component.InstanceID][]*component.StatusEvent{ instanceIDs[rNoErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopped, component.WithTimestamp(now)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), }, instanceIDs[eSdErr]: { - newStatusEvent(component.StatusStarting, component.WithTimestamp(now)), - newStatusEvent(component.StatusStopping, component.WithTimestamp(now)), - newStatusEvent(component.StatusPermanentError, component.WithTimestamp(now), component.WithError(assert.AnError)), + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewPermanentErrorEvent(assert.AnError), }, }, shutdownErr: assert.AnError, @@ -2221,13 +2229,7 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { actualStatuses := make(map[*component.InstanceID][]*component.StatusEvent) init, statusFunc := status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { - // copy event to normalize timestamp - opts := []component.StatusEventOption{component.WithTimestamp(now)} - if ev.Err() != nil { - opts = append(opts, component.WithError(ev.Err())) - } - evCopy, _ := component.NewStatusEvent(ev.Status(), opts...) - actualStatuses[id] = append(actualStatuses[id], evCopy) + actualStatuses[id] = append(actualStatuses[id], ev) }) pg.telemetry.ReportComponentStatus = statusFunc @@ -2242,7 +2244,7 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { assert.Equal(t, tc.startupErr, pg.StartAll(context.Background(), componenttest.NewNopHost())) assert.Equal(t, tc.shutdownErr, pg.ShutdownAll(context.Background())) - assert.Equal(t, tc.expectedStatuses, actualStatuses) + assertEqualStatuses(t, tc.expectedStatuses, actualStatuses) }) } } diff --git a/service/internal/servicetelemetry/nop_settings.go b/service/internal/servicetelemetry/nop_settings.go index 8d596570759..f8d0b0c1ce5 100644 --- a/service/internal/servicetelemetry/nop_settings.go +++ b/service/internal/servicetelemetry/nop_settings.go @@ -21,7 +21,7 @@ func NewNopSettings() Settings { MeterProvider: noop.NewMeterProvider(), MetricsLevel: configtelemetry.LevelNone, Resource: pcommon.NewResource(), - ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) error { + ReportComponentStatus: func(*component.InstanceID, *component.StatusEvent) error { return nil }, } diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_settings_test.go index 90cdec4a9b6..e6411d1bd66 100644 --- a/service/internal/servicetelemetry/nop_settings_test.go +++ b/service/internal/servicetelemetry/nop_settings_test.go @@ -26,5 +26,5 @@ func TestNewNopSettings(t *testing.T) { require.Equal(t, noop.NewMeterProvider(), set.MeterProvider) require.Equal(t, configtelemetry.LevelNone, set.MetricsLevel) require.Equal(t, pcommon.NewResource(), set.Resource) - require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.StatusStarting)) + require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.NewStatusEvent(component.StatusStarting))) } diff --git a/service/internal/servicetelemetry/settings_test.go b/service/internal/servicetelemetry/settings_test.go index d5b3ee6a1e1..d57f8ac769b 100644 --- a/service/internal/servicetelemetry/settings_test.go +++ b/service/internal/servicetelemetry/settings_test.go @@ -23,12 +23,12 @@ func TestSettings(t *testing.T) { MeterProvider: noop.NewMeterProvider(), MetricsLevel: configtelemetry.LevelNone, Resource: pcommon.NewResource(), - ReportComponentStatus: func(*component.InstanceID, component.Status, ...component.StatusEventOption) error { + ReportComponentStatus: func(*component.InstanceID, *component.StatusEvent) error { return nil }, } - require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.StatusOK)) + require.NoError(t, set.ReportComponentStatus(&component.InstanceID{}, component.NewStatusEvent(component.StatusOK))) compSet := set.ToComponentTelemetrySettings(&component.InstanceID{}) - require.NoError(t, compSet.ReportComponentStatus(component.StatusOK)) + require.NoError(t, compSet.ReportComponentStatus(component.NewStatusEvent(component.StatusOK))) } diff --git a/service/internal/status/status.go b/service/internal/status/status.go index b497e3d8db0..bbccc6939ae 100644 --- a/service/internal/status/status.go +++ b/service/internal/status/status.go @@ -27,33 +27,25 @@ type fsm struct { // transition will attempt to execute a state transition. If it's successful, it calls the // onTransitionFunc with a StatusEvent representing the new state. Returns an error if the arguments // result in an invalid status, or if the state transition is not valid. -func (m *fsm) transition(status component.Status, options ...component.StatusEventOption) error { - if _, ok := m.transitions[m.current.Status()][status]; !ok { +func (m *fsm) transition(ev *component.StatusEvent) error { + if _, ok := m.transitions[m.current.Status()][ev.Status()]; !ok { return fmt.Errorf( "cannot transition from %s to %s: %w", m.current.Status(), - status, + ev.Status(), errInvalidStateTransition, ) } - - ev, err := component.NewStatusEvent(status, options...) - if err != nil { - return err - } - m.current = ev m.onTransition(ev) - return nil } // newFSM creates a state machine with all valid transitions for component.Status. // The initial state is set to component.StatusNone. func newFSM(onTransition onTransitionFunc) *fsm { - initial, _ := component.NewStatusEvent(component.StatusNone) return &fsm{ - current: initial, + current: component.NewStatusEvent(component.StatusNone), onTransition: onTransition, transitions: map[component.Status]map[component.Status]struct{}{ component.StatusNone: { @@ -121,7 +113,7 @@ func initAndReadyFuncs() (InitFunc, readyFunc) { type NotifyStatusFunc func(*component.InstanceID, *component.StatusEvent) // ServiceStatusFunc is the expected type of ReportComponentStatus for servicetelemetry.Settings -type ServiceStatusFunc func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) error +type ServiceStatusFunc func(*component.InstanceID, *component.StatusEvent) error // errStatusNotReady is returned when trying to report status before service start var errStatusNotReady = errors.New("report component status is not ready until service start") @@ -136,7 +128,7 @@ func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, Servic mu := sync.Mutex{} fsmMap := make(map[*component.InstanceID]*fsm) return init, - func(id *component.InstanceID, status component.Status, opts ...component.StatusEventOption) error { + func(id *component.InstanceID, ev *component.StatusEvent) error { if !isReady() { return errStatusNotReady } @@ -149,7 +141,7 @@ func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, Servic }) fsmMap[id] = fsm } - return fsm.transition(status, opts...) + return fsm.transition(ev) } } @@ -158,7 +150,7 @@ func NewServiceStatusFunc(notifyStatusChange NotifyStatusFunc) (InitFunc, Servic // component.TelemetrySettings, which differs from servicetelemetry.Settings in that // the component version is tied to specific component instance. func NewComponentStatusFunc(id *component.InstanceID, srvStatus ServiceStatusFunc) component.StatusFunc { - return func(status component.Status, opts ...component.StatusEventOption) error { - return srvStatus(id, status, opts...) + return func(ev *component.StatusEvent) error { + return srvStatus(id, ev) } } diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index 0c7fb425092..87a1d3231d9 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -8,7 +8,6 @@ import ( "sync" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" @@ -133,7 +132,7 @@ func TestStatusFSM(t *testing.T) { errorCount := 0 for _, status := range tc.reportedStatuses { - if err := fsm.transition(status); err != nil { + if err := fsm.transition(component.NewStatusEvent(status)); err != nil { errorCount++ require.ErrorIs(t, err, errInvalidStateTransition) } @@ -146,53 +145,41 @@ func TestStatusFSM(t *testing.T) { } func TestValidSeqsToStopped(t *testing.T) { - statuses := []component.Status{ - component.StatusStarting, - component.StatusOK, - component.StatusRecoverableError, - component.StatusPermanentError, - component.StatusFatalError, + events := []*component.StatusEvent{ + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusOK), + component.NewStatusEvent(component.StatusRecoverableError), + component.NewStatusEvent(component.StatusPermanentError), + component.NewStatusEvent(component.StatusFatalError), } - for _, status := range statuses { - name := fmt.Sprintf("transition from: %s to: %s invalid", status, component.StatusStopped) + for _, ev := range events { + name := fmt.Sprintf("transition from: %s to: %s invalid", ev.Status(), component.StatusStopped) t.Run(name, func(t *testing.T) { fsm := newFSM(func(*component.StatusEvent) {}) - if status != component.StatusStarting { - require.NoError(t, fsm.transition(component.StatusStarting)) + if ev.Status() != component.StatusStarting { + require.NoError(t, fsm.transition(component.NewStatusEvent(component.StatusStarting))) } - require.NoError(t, fsm.transition(status)) + require.NoError(t, fsm.transition(ev)) // skipping to stopped is not allowed - err := fsm.transition(component.StatusStopped) + err := fsm.transition(component.NewStatusEvent(component.StatusStopped)) require.Error(t, err) require.ErrorIs(t, err, errInvalidStateTransition) // stopping -> stopped is allowed for non-fatal, non-permanent errors - err = fsm.transition(component.StatusStopping) - if status == component.StatusPermanentError || status == component.StatusFatalError { + err = fsm.transition(component.NewStatusEvent(component.StatusStopping)) + if ev.Status() == component.StatusPermanentError || ev.Status() == component.StatusFatalError { require.Error(t, err) require.ErrorIs(t, err, errInvalidStateTransition) } else { require.NoError(t, err) - require.NoError(t, fsm.transition(component.StatusStopped)) + require.NoError(t, fsm.transition(component.NewStatusEvent(component.StatusStopped))) } }) } } -func TestStatusEventError(t *testing.T) { - fsm := newFSM(func(*component.StatusEvent) {}) - err := fsm.transition(component.StatusStarting) - require.NoError(t, err) - - // the combination of StatusOK with an error is invalid - err = fsm.transition(component.StatusOK, component.WithError(assert.AnError)) - - require.Error(t, err) - require.ErrorIs(t, err, component.ErrStatusEventInvalidArgument) -} - func TestStatusFuncs(t *testing.T) { id1 := &component.InstanceID{} id2 := &component.InstanceID{} @@ -229,11 +216,11 @@ func TestStatusFuncs(t *testing.T) { init() for _, st := range statuses1 { - require.NoError(t, comp1Func(st)) + require.NoError(t, comp1Func(component.NewStatusEvent(st))) } for _, st := range statuses2 { - require.NoError(t, comp2Func(st)) + require.NoError(t, comp2Func(component.NewStatusEvent(st))) } require.Equal(t, expectedStatuses, actualStatuses) @@ -255,10 +242,10 @@ func TestStatusFuncsConcurrent(t *testing.T) { id := id go func() { compFn := NewComponentStatusFunc(id, serviceStatusFn) - _ = compFn(component.StatusStarting) + _ = compFn(component.NewStatusEvent(component.StatusStarting)) for i := 0; i < 1000; i++ { - _ = compFn(component.StatusRecoverableError) - _ = compFn(component.StatusOK) + _ = compFn(component.NewStatusEvent(component.StatusRecoverableError)) + _ = compFn(component.NewStatusEvent(component.StatusOK)) } wg.Done() }() @@ -273,12 +260,12 @@ func TestStatusFuncReady(t *testing.T) { init, serviceStatusFn := NewServiceStatusFunc(statusFunc) id := &component.InstanceID{} - err := serviceStatusFn(id, component.StatusStarting) + err := serviceStatusFn(id, component.NewStatusEvent(component.StatusStarting)) require.Error(t, err) require.ErrorIs(t, err, errStatusNotReady) init() - err = serviceStatusFn(id, component.StatusStarting) + err = serviceStatusFn(id, component.NewStatusEvent(component.StatusStarting)) require.NoError(t, err) } diff --git a/service/service_test.go b/service/service_test.go index af2765df287..f8c1d0e4988 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -427,7 +427,7 @@ func TestServiceFatalError(t *testing.T) { }) go func() { - ev, _ := component.NewStatusEvent(component.StatusFatalError, component.WithError(assert.AnError)) + ev := component.NewFatalErrorEvent(assert.AnError) srv.host.notifyComponentStatusChange(&component.InstanceID{}, ev) }() From ec257c6ac8c209fd65b43ca298aea2dcf3758aee Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 14 Sep 2023 18:08:03 -0700 Subject: [PATCH 20/40] Do not return errors from StatusWatchers Watchers should handle errors themselves. --- service/extensions/extensions.go | 4 +--- service/host.go | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 8388f4c6334..e05196d65d7 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -85,14 +85,12 @@ func (bes *Extensions) NotifyConfig(ctx context.Context, conf *confmap.Conf) err return errs } -func (bes *Extensions) NotifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) error { - var errs error +func (bes *Extensions) NotifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) { for _, ext := range bes.extMap { if sw, ok := ext.(component.StatusWatcher); ok { sw.ComponentStatusChanged(source, event) } } - return errs } func (bes *Extensions) GetExtensions() map[component.ID]component.Component { diff --git a/service/host.go b/service/host.go index 945ec864a36..3396d70f841 100644 --- a/service/host.go +++ b/service/host.go @@ -68,8 +68,7 @@ func (host *serviceHost) GetExporters() map[component.DataType]map[component.ID] } func (host *serviceHost) notifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) { - // TODO: What should we do if there is an error returned by a StatusWatcher? - host.serviceExtensions.NotifyComponentStatusChange(source, event) //nolint:errcheck + host.serviceExtensions.NotifyComponentStatusChange(source, event) if event.Status() == component.StatusFatalError { host.asyncErrorChannel <- event.Err() } From 27545462e4b8e00e9efb6ef0a7de24c4a232d6fb Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 14 Sep 2023 18:14:29 -0700 Subject: [PATCH 21/40] Rename servicetelemetry.Settings to servicetelemetry.TelemetrySettings --- service/extensions/extensions.go | 4 ++-- service/extensions/extensions_test.go | 4 ++-- service/internal/graph/graph.go | 4 ++-- service/internal/graph/graph_test.go | 14 +++++++------- .../{nop_settings.go => nop_telemetry_settings.go} | 6 +++--- ...ings_test.go => nop_telemetry_settings_test.go} | 4 ++-- .../{settings.go => telemetry_settings.go} | 8 ++++---- ...settings_test.go => telemetry_settings_test.go} | 2 +- service/service.go | 4 ++-- service/telemetry.go | 2 +- service/telemetry_test.go | 2 +- 11 files changed, 27 insertions(+), 27 deletions(-) rename service/internal/servicetelemetry/{nop_settings.go => nop_telemetry_settings.go} (82%) rename service/internal/servicetelemetry/{nop_settings_test.go => nop_telemetry_settings_test.go} (91%) rename service/internal/servicetelemetry/{settings.go => telemetry_settings.go} (67%) rename service/internal/servicetelemetry/{settings_test.go => telemetry_settings_test.go} (97%) diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index e05196d65d7..3a1cf997d88 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -23,7 +23,7 @@ const zExtensionName = "zextensionname" // Extensions is a map of extensions created from extension configs. type Extensions struct { - telemetry servicetelemetry.Settings + telemetry servicetelemetry.TelemetrySettings extMap map[component.ID]extension.Extension } @@ -129,7 +129,7 @@ func (bes *Extensions) HandleZPages(w http.ResponseWriter, r *http.Request) { // Settings holds configuration for building Extensions. type Settings struct { - Telemetry servicetelemetry.Settings + Telemetry servicetelemetry.TelemetrySettings BuildInfo component.BuildInfo // Extensions builder for extensions. diff --git a/service/extensions/extensions_test.go b/service/extensions/extensions_test.go index dc4c4f82b95..f973d8cba68 100644 --- a/service/extensions/extensions_test.go +++ b/service/extensions/extensions_test.go @@ -81,7 +81,7 @@ func TestBuildExtensions(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, err := New(context.Background(), Settings{ - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), Extensions: extension.NewBuilder(tt.extensionsConfigs, tt.factories), }, tt.config) @@ -167,7 +167,7 @@ func TestNotifyConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { extensions, err := New(context.Background(), Settings{ - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), Extensions: extension.NewBuilder(tt.extensionsConfigs, tt.factories), }, tt.serviceExtensions) diff --git a/service/internal/graph/graph.go b/service/internal/graph/graph.go index c367289b7ad..902bc3a5afd 100644 --- a/service/internal/graph/graph.go +++ b/service/internal/graph/graph.go @@ -28,7 +28,7 @@ import ( // Settings holds configuration for building builtPipelines. type Settings struct { - Telemetry servicetelemetry.Settings + Telemetry servicetelemetry.TelemetrySettings BuildInfo component.BuildInfo ReceiverBuilder *receiver.Builder @@ -50,7 +50,7 @@ type Graph struct { // Keep track of status source per node instanceIDs map[int64]*component.InstanceID - telemetry servicetelemetry.Settings + telemetry servicetelemetry.TelemetrySettings } func Build(ctx context.Context, set Settings) (*Graph, error) { diff --git a/service/internal/graph/graph_test.go b/service/internal/graph/graph_test.go index 5257344f9ca..4e6bc0256eb 100644 --- a/service/internal/graph/graph_test.go +++ b/service/internal/graph/graph_test.go @@ -143,7 +143,7 @@ func TestGraphStartStop(t *testing.T) { } pg := &Graph{componentGraph: simple.NewDirectedGraph()} - pg.telemetry = servicetelemetry.NewNopSettings() + pg.telemetry = servicetelemetry.NewNopTelemetrySettings() pg.instanceIDs = make(map[int64]*component.InstanceID) for _, edge := range tt.edges { @@ -198,7 +198,7 @@ func TestGraphStartStopCycle(t *testing.T) { func TestGraphStartStopComponentError(t *testing.T) { pg := &Graph{componentGraph: simple.NewDirectedGraph()} - pg.telemetry = servicetelemetry.NewNopSettings() + pg.telemetry = servicetelemetry.NewNopTelemetrySettings() r1 := &testNode{ id: component.NewIDWithName("r", "1"), startErr: errors.New("foo"), @@ -639,7 +639,7 @@ func TestConnectorPipelinesGraph(t *testing.T) { t.Run(test.name, func(t *testing.T) { // Build the pipeline set := Settings{ - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -905,7 +905,7 @@ func TestConnectorRouter(t *testing.T) { ctx := context.Background() set := Settings{ - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -1949,7 +1949,7 @@ func TestGraphBuildErrors(t *testing.T) { t.Run(test.name, func(t *testing.T) { set := Settings{ BuildInfo: component.NewDefaultBuildInfo(), - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), ReceiverBuilder: receiver.NewBuilder( test.receiverCfgs, map[component.Type]receiver.Factory{ @@ -1996,7 +1996,7 @@ func TestGraphFailToStartAndShutdown(t *testing.T) { nopConnectorFactory := connectortest.NewNopFactory() set := Settings{ - Telemetry: servicetelemetry.NewNopSettings(), + Telemetry: servicetelemetry.NewNopTelemetrySettings(), BuildInfo: component.NewDefaultBuildInfo(), ReceiverBuilder: receiver.NewBuilder( map[component.ID]component.Config{ @@ -2225,7 +2225,7 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { pg := &Graph{componentGraph: simple.NewDirectedGraph()} - pg.telemetry = servicetelemetry.NewNopSettings() + pg.telemetry = servicetelemetry.NewNopTelemetrySettings() actualStatuses := make(map[*component.InstanceID][]*component.StatusEvent) init, statusFunc := status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { diff --git a/service/internal/servicetelemetry/nop_settings.go b/service/internal/servicetelemetry/nop_telemetry_settings.go similarity index 82% rename from service/internal/servicetelemetry/nop_settings.go rename to service/internal/servicetelemetry/nop_telemetry_settings.go index f8d0b0c1ce5..e0ee305346d 100644 --- a/service/internal/servicetelemetry/nop_settings.go +++ b/service/internal/servicetelemetry/nop_telemetry_settings.go @@ -13,9 +13,9 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" ) -// NewNopSettings returns a new nop settings for Create* functions. -func NewNopSettings() Settings { - return Settings{ +// NewNopTelemetrySettings returns a new nop settings for Create* functions. +func NewNopTelemetrySettings() TelemetrySettings { + return TelemetrySettings{ Logger: zap.NewNop(), TracerProvider: trace.NewNoopTracerProvider(), MeterProvider: noop.NewMeterProvider(), diff --git a/service/internal/servicetelemetry/nop_settings_test.go b/service/internal/servicetelemetry/nop_telemetry_settings_test.go similarity index 91% rename from service/internal/servicetelemetry/nop_settings_test.go rename to service/internal/servicetelemetry/nop_telemetry_settings_test.go index e6411d1bd66..dd5014c7e0f 100644 --- a/service/internal/servicetelemetry/nop_settings_test.go +++ b/service/internal/servicetelemetry/nop_telemetry_settings_test.go @@ -17,10 +17,10 @@ import ( ) func TestNewNopSettings(t *testing.T) { - set := NewNopSettings() + set := NewNopTelemetrySettings() require.NotNil(t, set) - require.IsType(t, Settings{}, set) + require.IsType(t, TelemetrySettings{}, set) require.Equal(t, zap.NewNop(), set.Logger) require.Equal(t, trace.NewNoopTracerProvider(), set.TracerProvider) require.Equal(t, noop.NewMeterProvider(), set.MeterProvider) diff --git a/service/internal/servicetelemetry/settings.go b/service/internal/servicetelemetry/telemetry_settings.go similarity index 67% rename from service/internal/servicetelemetry/settings.go rename to service/internal/servicetelemetry/telemetry_settings.go index c665bf27bb5..00062764d93 100644 --- a/service/internal/servicetelemetry/settings.go +++ b/service/internal/servicetelemetry/telemetry_settings.go @@ -8,14 +8,14 @@ import ( "go.opentelemetry.io/collector/service/internal/status" ) -// Settings mirrors component.TelemetrySettings except for the method signature of -// ReportComponentStatus. The service level Settings is not bound a specific component, and +// TelemetrySettings mirrors component.TelemetrySettings except for the method signature of +// ReportComponentStatus. The service level TelemetrySettings is not bound a specific component, and // therefore takes a component.InstanceID as an argument. -type Settings component.TelemetrySettingsBase[status.ServiceStatusFunc] +type TelemetrySettings component.TelemetrySettingsBase[status.ServiceStatusFunc] // ToComponentTelemetrySettings returns a TelemetrySettings for a specific component derived from // this service level Settings object. -func (s Settings) ToComponentTelemetrySettings(id *component.InstanceID) component.TelemetrySettings { +func (s TelemetrySettings) ToComponentTelemetrySettings(id *component.InstanceID) component.TelemetrySettings { return component.TelemetrySettings{ Logger: s.Logger, TracerProvider: s.TracerProvider, diff --git a/service/internal/servicetelemetry/settings_test.go b/service/internal/servicetelemetry/telemetry_settings_test.go similarity index 97% rename from service/internal/servicetelemetry/settings_test.go rename to service/internal/servicetelemetry/telemetry_settings_test.go index d57f8ac769b..17300404d2f 100644 --- a/service/internal/servicetelemetry/settings_test.go +++ b/service/internal/servicetelemetry/telemetry_settings_test.go @@ -17,7 +17,7 @@ import ( ) func TestSettings(t *testing.T) { - set := Settings{ + set := TelemetrySettings{ Logger: zap.NewNop(), TracerProvider: trace.NewNoopTracerProvider(), MeterProvider: noop.NewMeterProvider(), diff --git a/service/service.go b/service/service.go index 3ef1f6197b1..477ed2266ef 100644 --- a/service/service.go +++ b/service/service.go @@ -71,7 +71,7 @@ type Settings struct { type Service struct { buildInfo component.BuildInfo telemetry *telemetry.Telemetry - telemetrySettings servicetelemetry.Settings + telemetrySettings servicetelemetry.TelemetrySettings host *serviceHost telemetryInitializer *telemetryInitializer collectorConf *confmap.Conf @@ -107,7 +107,7 @@ func New(ctx context.Context, set Settings, cfg Config) (*Service, error) { res := buildResource(set.BuildInfo, cfg.Telemetry) pcommonRes := pdataFromSdk(res) - srv.telemetrySettings = servicetelemetry.Settings{ + srv.telemetrySettings = servicetelemetry.TelemetrySettings{ Logger: srv.telemetry.Logger(), TracerProvider: srv.telemetry.TracerProvider(), MeterProvider: noop.NewMeterProvider(), diff --git a/service/telemetry.go b/service/telemetry.go index 9ba57604dd3..bec39c8adf7 100644 --- a/service/telemetry.go +++ b/service/telemetry.go @@ -71,7 +71,7 @@ func newColTelemetry(useOtel bool, disableHighCardinality bool, extendedConfig b } } -func (tel *telemetryInitializer) init(res *resource.Resource, settings servicetelemetry.Settings, cfg telemetry.Config, asyncErrorChannel chan error) error { +func (tel *telemetryInitializer) init(res *resource.Resource, settings servicetelemetry.TelemetrySettings, cfg telemetry.Config, asyncErrorChannel chan error) error { if cfg.Metrics.Level == configtelemetry.LevelNone || (cfg.Metrics.Address == "" && len(cfg.Metrics.Readers) == 0) { settings.Logger.Info( "Skipping telemetry setup.", diff --git a/service/telemetry_test.go b/service/telemetry_test.go index 414d2578f7c..e22c7c88fd4 100644 --- a/service/telemetry_test.go +++ b/service/telemetry_test.go @@ -273,7 +273,7 @@ func TestTelemetryInit(t *testing.T) { } otelRes := buildResource(buildInfo, *tc.cfg) res := pdataFromSdk(otelRes) - settings := servicetelemetry.Settings{ + settings := servicetelemetry.TelemetrySettings{ Logger: zap.NewNop(), Resource: res, } From f1c678f4ad860e40246f5c7767ef47ee91b924e5 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 15 Sep 2023 09:37:01 -0700 Subject: [PATCH 22/40] Fix typo in component/status.go Co-authored-by: Pablo Baeyens --- component/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/status.go b/component/status.go index 3d9cbdae3ed..d1e7f768986 100644 --- a/component/status.go +++ b/component/status.go @@ -109,5 +109,5 @@ type StatusWatcher interface { ComponentStatusChanged(source *InstanceID, event *StatusEvent) } -// StatusFunc is the expected type of ReportComponentStatus for compoment.TelemetrySettings +// StatusFunc is the expected type of ReportComponentStatus for component.TelemetrySettings type StatusFunc func(*StatusEvent) error From ed88afdd82e4fc7a5f460741efa3a00e4a87f7c5 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 15 Sep 2023 16:29:23 -0700 Subject: [PATCH 23/40] Handle ReportComponentStatus for SharedComponents --- internal/sharedcomponent/sharedcomponent.go | 23 +++- .../sharedcomponent/sharedcomponent_test.go | 119 +++++++++++++++++- receiver/otlpreceiver/factory.go | 30 +++-- receiver/otlpreceiver/otlp.go | 8 +- 4 files changed, 161 insertions(+), 19 deletions(-) diff --git a/internal/sharedcomponent/sharedcomponent.go b/internal/sharedcomponent/sharedcomponent.go index 2d1c74f355e..2c974f1b428 100644 --- a/internal/sharedcomponent/sharedcomponent.go +++ b/internal/sharedcomponent/sharedcomponent.go @@ -27,19 +27,37 @@ func NewSharedComponents[K comparable, V component.Component]() *SharedComponent // GetOrAdd returns the already created instance if exists, otherwise creates a new instance // and adds it to the map of references. -func (scs *SharedComponents[K, V]) GetOrAdd(key K, create func() (V, error)) (*SharedComponent[V], error) { +func (scs *SharedComponents[K, V]) GetOrAdd(key K, create func() (V, error), telemetrySettings *component.TelemetrySettings) (*SharedComponent[V], error) { if c, ok := scs.comps[key]; ok { + // If we haven't already seen this telemetry settings, this shared component represents + // another instance. Wrap ReportComponentStatus to report for all instances this shared + // component represents. + if _, ok := c.seenSettings[telemetrySettings]; !ok { + c.seenSettings[telemetrySettings] = struct{}{} + prev := c.telemetry.ReportComponentStatus + c.telemetry.ReportComponentStatus = func(ev *component.StatusEvent) error { + if err := telemetrySettings.ReportComponentStatus(ev); err != nil { + return err + } + return prev(ev) + } + } return c, nil } comp, err := create() if err != nil { return nil, err } + newComp := &SharedComponent[V]{ component: comp, removeFunc: func() { delete(scs.comps, key) }, + telemetry: telemetrySettings, + seenSettings: map[*component.TelemetrySettings]struct{}{ + telemetrySettings: {}, + }, } scs.comps[key] = newComp return newComp, nil @@ -53,6 +71,9 @@ type SharedComponent[V component.Component] struct { startOnce sync.Once stopOnce sync.Once removeFunc func() + + telemetry *component.TelemetrySettings + seenSettings map[*component.TelemetrySettings]struct{} } // Unwrap returns the original component. diff --git a/internal/sharedcomponent/sharedcomponent_test.go b/internal/sharedcomponent/sharedcomponent_test.go index 112f1d79d07..0ab3d9e07c8 100644 --- a/internal/sharedcomponent/sharedcomponent_test.go +++ b/internal/sharedcomponent/sharedcomponent_test.go @@ -20,6 +20,7 @@ var id = component.NewID("test") type baseComponent struct { component.StartFunc component.ShutdownFunc + telemetry *component.TelemetrySettings } func TestNewSharedComponents(t *testing.T) { @@ -31,7 +32,11 @@ func TestNewSharedComponentsCreateError(t *testing.T) { comps := NewSharedComponents[component.ID, *baseComponent]() assert.Len(t, comps.comps, 0) myErr := errors.New("my error") - _, err := comps.GetOrAdd(id, func() (*baseComponent, error) { return nil, myErr }) + _, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { return nil, myErr }, + newNopTelemetrySettings(), + ) assert.ErrorIs(t, err, myErr) assert.Len(t, comps.comps, 0) } @@ -40,18 +45,31 @@ func TestSharedComponentsGetOrAdd(t *testing.T) { nop := &baseComponent{} comps := NewSharedComponents[component.ID, *baseComponent]() - got, err := comps.GetOrAdd(id, func() (*baseComponent, error) { return nop, nil }) + got, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { return nop, nil }, + newNopTelemetrySettings(), + ) require.NoError(t, err) assert.Len(t, comps.comps, 1) assert.Same(t, nop, got.Unwrap()) - gotSecond, err := comps.GetOrAdd(id, func() (*baseComponent, error) { panic("should not be called") }) + gotSecond, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { panic("should not be called") }, + newNopTelemetrySettings(), + ) + require.NoError(t, err) assert.Same(t, got, gotSecond) // Shutdown nop will remove assert.NoError(t, got.Shutdown(context.Background())) assert.Len(t, comps.comps, 0) - gotThird, err := comps.GetOrAdd(id, func() (*baseComponent, error) { return nop, nil }) + gotThird, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { return nop, nil }, + newNopTelemetrySettings(), + ) require.NoError(t, err) assert.NotSame(t, got, gotThird) } @@ -71,7 +89,11 @@ func TestSharedComponent(t *testing.T) { }} comps := NewSharedComponents[component.ID, *baseComponent]() - got, err := comps.GetOrAdd(id, func() (*baseComponent, error) { return comp, nil }) + got, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { return comp, nil }, + newNopTelemetrySettings(), + ) require.NoError(t, err) assert.Equal(t, wantErr, got.Start(context.Background(), componenttest.NewNopHost())) assert.Equal(t, 1, calledStart) @@ -84,3 +106,90 @@ func TestSharedComponent(t *testing.T) { assert.NoError(t, got.Shutdown(context.Background())) assert.Equal(t, 1, calledStop) } + +func TestSharedComponentsReportStatus(t *testing.T) { + reportedStatuses := make(map[*component.InstanceID][]component.Status) + newStatusFunc := func() func(*component.StatusEvent) error { + instanceID := &component.InstanceID{} + return func(ev *component.StatusEvent) error { + // Use an event with component.StatusNone to simulate an error. + if ev.Status() == component.StatusNone { + return assert.AnError + } + reportedStatuses[instanceID] = append(reportedStatuses[instanceID], ev.Status()) + return nil + } + } + + comp := &baseComponent{} + comps := NewSharedComponents[component.ID, *baseComponent]() + var telemetrySettings *component.TelemetrySettings + + // make a shared component that represents three instances + for i := 0; i < 3; i++ { + telemetrySettings = newNopTelemetrySettings() + telemetrySettings.ReportComponentStatus = newStatusFunc() + // The initial settings for the shared component need to match the ones passed to the first + // invocation of GetOrAdd so that underlying telemetry settings reference can be used to + // wrap ReportComponentStatus for subsequently added "instances". + if i == 0 { + comp.telemetry = telemetrySettings + } + got, err := comps.GetOrAdd( + id, + func() (*baseComponent, error) { return comp, nil }, + telemetrySettings, + ) + require.NoError(t, err) + assert.Len(t, comps.comps, 1) + assert.Same(t, comp, got.Unwrap()) + } + + // make sure we don't try to represent a fourth instance if we reuse a telemetrySettings + _, _ = comps.GetOrAdd( + id, + func() (*baseComponent, error) { return comp, nil }, + telemetrySettings, + ) + + err := comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStarting)) + require.NoError(t, err) + + // ok + err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusOK)) + require.NoError(t, err) + + // simulate an error + err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusNone)) + require.Error(t, err) + require.ErrorIs(t, err, assert.AnError) + + // stopping + err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStopping)) + require.NoError(t, err) + + // stopped + err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStopped)) + require.NoError(t, err) + + // The shared component represents 3 component instances. Reporting status for the shared + // component should report status for each of the instances it represents. + expectedStatuses := []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + } + + require.Equal(t, 3, len(reportedStatuses)) + + for _, actualStatuses := range reportedStatuses { + require.Equal(t, expectedStatuses, actualStatuses) + } +} + +// newNopTelemetrySettings streamlines getting a pointer to a NopTelemetrySettings +func newNopTelemetrySettings() *component.TelemetrySettings { + set := componenttest.NewNopTelemetrySettings() + return &set +} diff --git a/receiver/otlpreceiver/factory.go b/receiver/otlpreceiver/factory.go index 11cf3dc6668..cce8b363cd4 100644 --- a/receiver/otlpreceiver/factory.go +++ b/receiver/otlpreceiver/factory.go @@ -68,9 +68,13 @@ func createTraces( nextConsumer consumer.Traces, ) (receiver.Traces, error) { oCfg := cfg.(*Config) - r, err := receivers.GetOrAdd(oCfg, func() (*otlpReceiver, error) { - return newOtlpReceiver(oCfg, set) - }) + r, err := receivers.GetOrAdd( + oCfg, + func() (*otlpReceiver, error) { + return newOtlpReceiver(oCfg, &set) + }, + &set.TelemetrySettings, + ) if err != nil { return nil, err } @@ -89,9 +93,13 @@ func createMetrics( consumer consumer.Metrics, ) (receiver.Metrics, error) { oCfg := cfg.(*Config) - r, err := receivers.GetOrAdd(oCfg, func() (*otlpReceiver, error) { - return newOtlpReceiver(oCfg, set) - }) + r, err := receivers.GetOrAdd( + oCfg, + func() (*otlpReceiver, error) { + return newOtlpReceiver(oCfg, &set) + }, + &set.TelemetrySettings, + ) if err != nil { return nil, err } @@ -110,9 +118,13 @@ func createLog( consumer consumer.Logs, ) (receiver.Logs, error) { oCfg := cfg.(*Config) - r, err := receivers.GetOrAdd(oCfg, func() (*otlpReceiver, error) { - return newOtlpReceiver(oCfg, set) - }) + r, err := receivers.GetOrAdd( + oCfg, + func() (*otlpReceiver, error) { + return newOtlpReceiver(oCfg, &set) + }, + &set.TelemetrySettings, + ) if err != nil { return nil, err } diff --git a/receiver/otlpreceiver/otlp.go b/receiver/otlpreceiver/otlp.go index faf9a68fe04..c07fea243cb 100644 --- a/receiver/otlpreceiver/otlp.go +++ b/receiver/otlpreceiver/otlp.go @@ -43,13 +43,13 @@ type otlpReceiver struct { obsrepGRPC *receiverhelper.ObsReport obsrepHTTP *receiverhelper.ObsReport - settings receiver.CreateSettings + settings *receiver.CreateSettings } // newOtlpReceiver just creates the OpenTelemetry receiver services. It is the caller's // responsibility to invoke the respective Start*Reception methods as well // as the various Stop*Reception methods to end it. -func newOtlpReceiver(cfg *Config, set receiver.CreateSettings) (*otlpReceiver, error) { +func newOtlpReceiver(cfg *Config, set *receiver.CreateSettings) (*otlpReceiver, error) { r := &otlpReceiver{ cfg: cfg, settings: set, @@ -62,7 +62,7 @@ func newOtlpReceiver(cfg *Config, set receiver.CreateSettings) (*otlpReceiver, e r.obsrepGRPC, err = receiverhelper.NewObsReport(receiverhelper.ObsReportSettings{ ReceiverID: set.ID, Transport: "grpc", - ReceiverCreateSettings: set, + ReceiverCreateSettings: *set, }) if err != nil { return nil, err @@ -70,7 +70,7 @@ func newOtlpReceiver(cfg *Config, set receiver.CreateSettings) (*otlpReceiver, e r.obsrepHTTP, err = receiverhelper.NewObsReport(receiverhelper.ObsReportSettings{ ReceiverID: set.ID, Transport: "http", - ReceiverCreateSettings: set, + ReceiverCreateSettings: *set, }) if err != nil { return nil, err From 22f2c40d1688b1a8d36aaccce5f4a4b4ff8420b7 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 15 Sep 2023 19:09:11 -0700 Subject: [PATCH 24/40] Implement AggregateStatus to compute an effective status from a map of statuses --- component/status.go | 46 +++++++++++++++++++++ component/status_test.go | 88 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/component/status.go b/component/status.go index d1e7f768986..ff4bd397a1d 100644 --- a/component/status.go +++ b/component/status.go @@ -111,3 +111,49 @@ type StatusWatcher interface { // StatusFunc is the expected type of ReportComponentStatus for component.TelemetrySettings type StatusFunc func(*StatusEvent) error + +// AggregateStatus will derive a status for the given input using the following rules in order: +// 1. If any instance encounters a fatal error, the component is in a Fatal Error state. +// 2. If any instance is in a Permanent Error state, the component status is Permanent Error. +// 3. If any instance is Stopping, the component is in a Stopping state. +// 4. If any instance is Stopped, but no instances are Stopping, we must be in the process of Stopping the component. +// 5. If all instances are Stopped, the component is Stopped. +// 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. +// 7. If any instance is Starting, the component status is Starting. +// 8. None of the above were true, so the component is OK. (In other words, all instances are OK.) + +func AggregateStatus(eventMap map[*InstanceID]*StatusEvent) Status { + seen := make(map[Status]struct{}) + for _, ev := range eventMap { + seen[ev.Status()] = struct{}{} + } + + if _, isFatal := seen[StatusFatalError]; isFatal { + return StatusFatalError + } + + if _, isPermanent := seen[StatusPermanentError]; isPermanent { + return StatusPermanentError + } + + if _, isStopping := seen[StatusStopping]; isStopping { + return StatusStopping + } + + if _, isStopped := seen[StatusStopped]; isStopped { + if len(seen) == 1 { + return StatusStopped + } + return StatusStopping + } + + if _, isRecoverable := seen[StatusRecoverableError]; isRecoverable { + return StatusRecoverableError + } + + if _, isStarting := seen[StatusStarting]; isStarting { + return StatusStarting + } + + return StatusOK +} diff --git a/component/status_test.go b/component/status_test.go index db207e6ae26..63c5d725c86 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -47,3 +47,91 @@ func TestStatusEventsWithError(t *testing.T) { }) } } + +func TestAggregateStatus(t *testing.T) { + for _, tc := range []struct { + name string + statusMap map[*InstanceID]*StatusEvent + expectedStatus Status + }{ + { + name: "aggregate status with fatal is FatalError", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusFatalError), + {}: NewStatusEvent(StatusRecoverableError), + }, + expectedStatus: StatusFatalError, + }, + { + name: "aggregate status with permanent is PermanentError", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusPermanentError), + {}: NewStatusEvent(StatusRecoverableError), + }, + expectedStatus: StatusPermanentError, + }, + { + name: "aggregate status with stopping is Stopping", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusRecoverableError), + {}: NewStatusEvent(StatusStopping), + }, + expectedStatus: StatusStopping, + }, + { + name: "aggregate status with stopped and non-stopped is Stopping", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusRecoverableError), + {}: NewStatusEvent(StatusStopped), + }, + expectedStatus: StatusStopping, + }, + { + name: "aggregate status with all stopped is Stopped", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStopped), + {}: NewStatusEvent(StatusStopped), + {}: NewStatusEvent(StatusStopped), + }, + expectedStatus: StatusStopped, + }, + { + name: "aggregate status with recoverable is RecoverableError", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusRecoverableError), + }, + expectedStatus: StatusRecoverableError, + }, + { + name: "aggregate status with starting is Starting", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + }, + expectedStatus: StatusStarting, + }, + { + name: "aggregate status with all ok is OK", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusOK), + }, + expectedStatus: StatusOK, + }, + } { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedStatus, AggregateStatus(tc.statusMap)) + }) + } +} From 362e1abb3d4d61a8c5ef218fd2b340dee527f088 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sun, 17 Sep 2023 10:41:02 -0700 Subject: [PATCH 25/40] Add additional utility methods for component status and events --- component/status.go | 62 +++++++++++++++++++++++--------- component/status_test.go | 78 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 16 deletions(-) diff --git a/component/status.go b/component/status.go index ff4bd397a1d..b4f25e389f3 100644 --- a/component/status.go +++ b/component/status.go @@ -113,21 +113,25 @@ type StatusWatcher interface { type StatusFunc func(*StatusEvent) error // AggregateStatus will derive a status for the given input using the following rules in order: -// 1. If any instance encounters a fatal error, the component is in a Fatal Error state. -// 2. If any instance is in a Permanent Error state, the component status is Permanent Error. -// 3. If any instance is Stopping, the component is in a Stopping state. -// 4. If any instance is Stopped, but no instances are Stopping, we must be in the process of Stopping the component. -// 5. If all instances are Stopped, the component is Stopped. -// 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. -// 7. If any instance is Starting, the component status is Starting. -// 8. None of the above were true, so the component is OK. (In other words, all instances are OK.) - -func AggregateStatus(eventMap map[*InstanceID]*StatusEvent) Status { +// 1. If all instances have the same status, there is nothing to aggregate, return it. +// 2. If any instance encounters a fatal error, the component is in a Fatal Error state. +// 3. If any instance is in a Permanent Error state, the component status is Permanent Error. +// 4. If any instance is Stopping, the component is in a Stopping state. +// 5. An instance is Stopped, but not all instances are Stopping, we must be in the process of Stopping the component. +// 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. +// 7. By process of elimination, the only remaining state is starting. +func AggregateStatus[K comparable](eventMap map[K]*StatusEvent) Status { seen := make(map[Status]struct{}) for _, ev := range eventMap { seen[ev.Status()] = struct{}{} } + if len(seen) == 1 { + for st := range seen { + return st + } + } + if _, isFatal := seen[StatusFatalError]; isFatal { return StatusFatalError } @@ -141,9 +145,6 @@ func AggregateStatus(eventMap map[*InstanceID]*StatusEvent) Status { } if _, isStopped := seen[StatusStopped]; isStopped { - if len(seen) == 1 { - return StatusStopped - } return StatusStopping } @@ -151,9 +152,38 @@ func AggregateStatus(eventMap map[*InstanceID]*StatusEvent) Status { return StatusRecoverableError } - if _, isStarting := seen[StatusStarting]; isStarting { - return StatusStarting + return StatusStarting +} + +// StatusIsError returns true for error statuses (e.g. StatusRecoverableError, +// StatusPermanentError, or StatusFatalError) +func StatusIsError(status Status) bool { + return status == StatusRecoverableError || + status == StatusPermanentError || + status == StatusFatalError +} + +// LastStatusEvent returns the key and last StatusEvent by timestamp from the map provided. +// Results will be nil for an empty map. +func LastStatusEvent[K comparable](eventMap map[K]*StatusEvent) (lastKey K, lastEvent *StatusEvent) { + for key, event := range eventMap { + if lastEvent == nil || lastEvent.timestamp.Before(event.timestamp) { + lastKey = key + lastEvent = event + } } + return +} - return StatusOK +// LastErrorEvent returns the key and last StatusEvent with an Error status from the provided +// map. Results will be nil if there is not an error event in the map. +func LastErrorEvent[K comparable](eventMap map[K]*StatusEvent) (lastKey K, lastEvent *StatusEvent) { + for key, event := range eventMap { + if StatusIsError(event.Status()) && + (lastEvent == nil || lastEvent.timestamp.Before(event.timestamp)) { + lastKey = key + lastEvent = event + } + } + return } diff --git a/component/status_test.go b/component/status_test.go index 63c5d725c86..8ff6b5fdf15 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -135,3 +135,81 @@ func TestAggregateStatus(t *testing.T) { }) } } + +func TestStatusIsError(t *testing.T) { + for _, tc := range []struct { + status Status + isError bool + }{ + { + status: StatusStarting, + isError: false, + }, + { + status: StatusOK, + isError: false, + }, + { + status: StatusRecoverableError, + isError: true, + }, + { + status: StatusPermanentError, + isError: true, + }, + { + status: StatusFatalError, + isError: true, + }, + { + status: StatusStopping, + isError: false, + }, + { + status: StatusStopped, + isError: false, + }, + } { + name := fmt.Sprintf("StatusIsError(%s) is %t", tc.status, tc.isError) + t.Run(name, func(t *testing.T) { + assert.Equal(t, tc.isError, StatusIsError(tc.status)) + }) + } +} + +func TestLastEvent(t *testing.T) { + expectedID := &InstanceID{ + ID: NewIDWithName(DataTypeTraces, "0"), + Kind: KindReceiver, + } + + eventMap := map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusOK), + expectedID: NewStatusEvent(StatusStopping), + } + + lastID, lastEvent := LastStatusEvent(eventMap) + + assert.Equal(t, expectedID, lastID) + assert.Equal(t, StatusStopping, lastEvent.Status()) +} + +func TestLastErrorEvent(t *testing.T) { + expectedID := &InstanceID{ + ID: NewIDWithName(DataTypeTraces, "0"), + Kind: KindReceiver, + } + + eventMap := map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusRecoverableError), + {}: NewStatusEvent(StatusPermanentError), + expectedID: NewStatusEvent(StatusFatalError), + {}: NewStatusEvent(StatusStopping), + } + + lastID, lastEvent := LastErrorEvent(eventMap) + + assert.Equal(t, expectedID, lastID) + assert.Equal(t, StatusFatalError, lastEvent.Status()) +} From f821467f9b14427bc1509ede7546e0d26e754806 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sun, 17 Sep 2023 14:26:19 -0700 Subject: [PATCH 26/40] Automatically report status for extensions during startup/shutdown --- service/extensions/extensions.go | 25 ++++-- service/extensions/extensions_test.go | 120 ++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 6 deletions(-) diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 3a1cf997d88..50cf297c443 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -23,8 +23,9 @@ const zExtensionName = "zextensionname" // Extensions is a map of extensions created from extension configs. type Extensions struct { - telemetry servicetelemetry.TelemetrySettings - extMap map[component.ID]extension.Extension + telemetry servicetelemetry.TelemetrySettings + extMap map[component.ID]extension.Extension + instanceIDs map[component.ID]*component.InstanceID } // Start starts all extensions. @@ -33,7 +34,10 @@ func (bes *Extensions) Start(ctx context.Context, host component.Host) error { for extID, ext := range bes.extMap { extLogger := components.ExtensionLogger(bes.telemetry.Logger, extID) extLogger.Info("Extension is starting...") + instanceID := bes.instanceIDs[extID] + _ = bes.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStarting)) if err := ext.Start(ctx, components.NewHostWrapper(host, extLogger)); err != nil { + _ = bes.telemetry.ReportComponentStatus(instanceID, component.NewPermanentErrorEvent(err)) return err } extLogger.Info("Extension started.") @@ -45,8 +49,15 @@ func (bes *Extensions) Start(ctx context.Context, host component.Host) error { func (bes *Extensions) Shutdown(ctx context.Context) error { bes.telemetry.Logger.Info("Stopping extensions...") var errs error - for _, ext := range bes.extMap { - errs = multierr.Append(errs, ext.Shutdown(ctx)) + for extID, ext := range bes.extMap { + instanceID := bes.instanceIDs[extID] + _ = bes.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStopping)) + if err := ext.Shutdown(ctx); err != nil { + _ = bes.telemetry.ReportComponentStatus(instanceID, component.NewPermanentErrorEvent(err)) + errs = multierr.Append(errs, err) + continue + } + _ = bes.telemetry.ReportComponentStatus(instanceID, component.NewStatusEvent(component.StatusStopped)) } return errs @@ -139,8 +150,9 @@ type Settings struct { // New creates a new Extensions from Config. func New(ctx context.Context, set Settings, cfg Config) (*Extensions, error) { exts := &Extensions{ - telemetry: set.Telemetry, - extMap: make(map[component.ID]extension.Extension), + telemetry: set.Telemetry, + extMap: make(map[component.ID]extension.Extension), + instanceIDs: make(map[component.ID]*component.InstanceID), } for _, extID := range cfg { instanceID := &component.InstanceID{ @@ -165,6 +177,7 @@ func New(ctx context.Context, set Settings, cfg Config) (*Extensions, error) { } exts.extMap[extID] = ext + exts.instanceIDs[extID] = instanceID } return exts, nil diff --git a/service/extensions/extensions_test.go b/service/extensions/extensions_test.go index f973d8cba68..100603dbbbd 100644 --- a/service/extensions/extensions_test.go +++ b/service/extensions/extensions_test.go @@ -12,10 +12,12 @@ import ( "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/extension" "go.opentelemetry.io/collector/extension/extensiontest" "go.opentelemetry.io/collector/service/internal/servicetelemetry" + "go.opentelemetry.io/collector/service/internal/status" ) func TestBuildExtensions(t *testing.T) { @@ -241,3 +243,121 @@ func newCreateErrorExtensionFactory() extension.Factory { component.StabilityLevelDevelopment, ) } + +func TestStatusReportedOnStartupShutdown(t *testing.T) { + // compare two slices of status events ignoring timestamp + assertEqualStatuses := func(t *testing.T, evts1, evts2 []*component.StatusEvent) { + assert.Equal(t, len(evts1), len(evts2)) + for i := 0; i < len(evts1); i++ { + ev1 := evts1[i] + ev2 := evts2[i] + assert.Equal(t, ev1.Status(), ev2.Status()) + assert.Equal(t, ev1.Err(), ev2.Err()) + } + } + + for _, tc := range []struct { + name string + expectedStatuses []*component.StatusEvent + startErr error + shutdownErr error + }{ + { + name: "successful startup/shutdown", + expectedStatuses: []*component.StatusEvent{ + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewStatusEvent(component.StatusStopped), + }, + startErr: nil, + shutdownErr: nil, + }, + { + name: "start error", + expectedStatuses: []*component.StatusEvent{ + component.NewStatusEvent(component.StatusStarting), + component.NewPermanentErrorEvent(assert.AnError), + }, + startErr: assert.AnError, + shutdownErr: nil, + }, + { + name: "shutdown error", + expectedStatuses: []*component.StatusEvent{ + component.NewStatusEvent(component.StatusStarting), + component.NewStatusEvent(component.StatusStopping), + component.NewPermanentErrorEvent(assert.AnError), + }, + startErr: nil, + shutdownErr: assert.AnError, + }, + } { + t.Run(tc.name, func(t *testing.T) { + compID := component.NewID("statustest") + factory := newStatusTestExtensionFactory("statustest", tc.startErr, tc.shutdownErr) + config := factory.CreateDefaultConfig() + extensions, err := New( + context.Background(), + Settings{ + Telemetry: servicetelemetry.NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + Configs: map[component.ID]component.Config{ + compID: config, + }, + Factories: map[component.Type]extension.Factory{ + "statustest": factory, + }, + }, + []component.ID{compID}, + ) + + assert.NoError(t, err) + + var actualStatuses []*component.StatusEvent + init, statusFunc := status.NewServiceStatusFunc(func(id *component.InstanceID, ev *component.StatusEvent) { + actualStatuses = append(actualStatuses, ev) + }) + extensions.telemetry.ReportComponentStatus = statusFunc + init() + + assert.Equal(t, tc.startErr, extensions.Start(context.Background(), componenttest.NewNopHost())) + if tc.startErr == nil { + assert.Equal(t, tc.shutdownErr, extensions.Shutdown(context.Background())) + } + assertEqualStatuses(t, tc.expectedStatuses, actualStatuses) + }) + } +} + +type statusTestExtension struct { + startErr error + shutdownErr error +} + +func (ext *statusTestExtension) Start(_ context.Context, _ component.Host) error { + return ext.startErr +} + +func (ext *statusTestExtension) Shutdown(_ context.Context) error { + return ext.shutdownErr +} + +func newStatusTestExtension(startErr, shutdownErr error) *statusTestExtension { + return &statusTestExtension{ + startErr: startErr, + shutdownErr: shutdownErr, + } +} + +func newStatusTestExtensionFactory(name component.Type, startErr, shutdownErr error) extension.Factory { + return extension.NewFactory( + name, + func() component.Config { + return &struct{}{} + }, + func(ctx context.Context, set extension.CreateSettings, extension component.Config) (extension.Extension, error) { + return newStatusTestExtension(startErr, shutdownErr), nil + }, + component.StabilityLevelDevelopment, + ) +} From 178f8859a378cfa62757d02151da1a786ef777f1 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sun, 17 Sep 2023 16:26:20 -0700 Subject: [PATCH 27/40] Replace LastErrorEvent with more flexible LastEventByStatus --- component/status.go | 8 +++--- component/status_test.go | 53 +++++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/component/status.go b/component/status.go index b4f25e389f3..4b189d29ce5 100644 --- a/component/status.go +++ b/component/status.go @@ -175,11 +175,11 @@ func LastStatusEvent[K comparable](eventMap map[K]*StatusEvent) (lastKey K, last return } -// LastErrorEvent returns the key and last StatusEvent with an Error status from the provided -// map. Results will be nil if there is not an error event in the map. -func LastErrorEvent[K comparable](eventMap map[K]*StatusEvent) (lastKey K, lastEvent *StatusEvent) { +// LastEventByStatus returns the key and last StatusEvent of the given status from the provided map. +// Results will be nil if there is not an event with the given status in the map. +func LastEventByStatus[K comparable](eventMap map[K]*StatusEvent, status Status) (lastKey K, lastEvent *StatusEvent) { for key, event := range eventMap { - if StatusIsError(event.Status()) && + if status == event.Status() && (lastEvent == nil || lastEvent.timestamp.Before(event.timestamp)) { lastKey = key lastEvent = event diff --git a/component/status_test.go b/component/status_test.go index 8ff6b5fdf15..ed86bee0978 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -5,6 +5,7 @@ package component import ( "fmt" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -189,6 +190,10 @@ func TestLastEvent(t *testing.T) { expectedID: NewStatusEvent(StatusStopping), } + // ensure expected event is sufficiently more recent + ev := eventMap[expectedID] + ev.timestamp = ev.timestamp.Add(time.Duration(2) * time.Second) + lastID, lastEvent := LastStatusEvent(eventMap) assert.Equal(t, expectedID, lastID) @@ -196,20 +201,46 @@ func TestLastEvent(t *testing.T) { } func TestLastErrorEvent(t *testing.T) { - expectedID := &InstanceID{ - ID: NewIDWithName(DataTypeTraces, "0"), - Kind: KindReceiver, + statuses := []Status{ + StatusStarting, + StatusOK, + StatusRecoverableError, + StatusPermanentError, + StatusFatalError, + StatusStopping, + StatusStopped, } - eventMap := map[*InstanceID]*StatusEvent{ - {}: NewStatusEvent(StatusRecoverableError), - {}: NewStatusEvent(StatusPermanentError), - expectedID: NewStatusEvent(StatusFatalError), - {}: NewStatusEvent(StatusStopping), + // populate an eventMap and reverse lookup (expectedIDs) for later assertions + expectedIDs := make(map[Status]*InstanceID) + eventMap := make(map[*InstanceID]*StatusEvent) + // append duplicate statuses to ensure the latest is returned + statusesWithDups := statuses + statusesWithDups = append(statusesWithDups, StatusOK, StatusPermanentError, StatusStopped) + for i, st := range statusesWithDups { + id := &InstanceID{ + ID: NewIDWithName(DataTypeTraces, fmt.Sprint(i)), + } + // expectedID will be overwritten for dups + expectedIDs[st] = id + ev := NewStatusEvent(st) + // pad the time between events + ev.timestamp = ev.timestamp.Add(time.Duration(i) * time.Second) + // events with duplicate statuses will exist in the eventMap + eventMap[id] = ev } - lastID, lastEvent := LastErrorEvent(eventMap) + // multiple for events for some statuses + assert.Greater(t, len(eventMap), len(statuses)) + // one id per status + assert.Equal(t, len(statuses), len(expectedIDs)) + + for _, st := range statuses { + t.Run(fmt.Sprintf("with %s", st), func(t *testing.T) { + lastID, lastEvent := LastEventByStatus(eventMap, st) + assert.Equal(t, expectedIDs[st], lastID) + assert.Equal(t, st, lastEvent.Status()) + }) + } - assert.Equal(t, expectedID, lastID) - assert.Equal(t, StatusFatalError, lastEvent.Status()) } From 6f265c1994148ff33e9460d2fd58e4bbbb4f646d Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Sun, 17 Sep 2023 21:54:36 -0700 Subject: [PATCH 28/40] Add component.EffectiveStatus method --- component/status.go | 26 +++++++ component/status_test.go | 151 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/component/status.go b/component/status.go index 4b189d29ce5..09ec5df5f22 100644 --- a/component/status.go +++ b/component/status.go @@ -187,3 +187,29 @@ func LastEventByStatus[K comparable](eventMap map[K]*StatusEvent, status Status) } return } + +// EffectiveStatus returns a status event where: +// - The status is set to the aggregate status of the events in the eventMap +// - The timestamp is set to the latest timestamp of the events in the eventMap +// - For an error status, the event will have same error as the most current event of the same +// error type from the eventMap +func EffectiveStatus[K comparable](eventMap map[K]*StatusEvent) *StatusEvent { + aggregateStatus := AggregateStatus[K](eventMap) + _, lastEvent := LastStatusEvent[K](eventMap) + // the effective status matches an existing event + if lastEvent.Status() == aggregateStatus { + return lastEvent + } + + // the effective status requires a synthetic event + effectiveStatus := &StatusEvent{ + status: aggregateStatus, + timestamp: lastEvent.timestamp, + } + if StatusIsError(aggregateStatus) { + _, errorEvent := LastEventByStatus[K](eventMap, aggregateStatus) + effectiveStatus.err = errorEvent.err + } + + return effectiveStatus +} diff --git a/component/status_test.go b/component/status_test.go index ed86bee0978..a92ba9f278b 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -244,3 +244,154 @@ func TestLastErrorEvent(t *testing.T) { } } + +func TestEffectiveStatus(t *testing.T) { + // maxTime is used to make sure we select the event with the latest timestamp + maxTime := time.Unix(1<<63-62135596801, 999999999) + // latest sets the timestamp for an event to maxTime + latest := func(ev *StatusEvent) *StatusEvent { + ev.timestamp = maxTime + return ev + } + + for _, tc := range []struct { + name string + statusMap map[*InstanceID]*StatusEvent + expectedStatus *StatusEvent + }{ + { + name: "FatalError - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: latest(NewFatalErrorEvent(assert.AnError)), + {}: NewStatusEvent(StatusRecoverableError), + }, + expectedStatus: &StatusEvent{ + status: StatusFatalError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "FatalError - synthetic event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewFatalErrorEvent(assert.AnError), + {}: latest(NewStatusEvent(StatusRecoverableError)), + }, + expectedStatus: &StatusEvent{ + status: StatusFatalError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "PermanentError - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: latest(NewPermanentErrorEvent(assert.AnError)), + {}: NewStatusEvent(StatusRecoverableError), + }, + expectedStatus: &StatusEvent{ + status: StatusPermanentError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "PermanentError - synthetic event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewPermanentErrorEvent(assert.AnError), + {}: latest(NewStatusEvent(StatusRecoverableError)), + }, + expectedStatus: &StatusEvent{ + status: StatusPermanentError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "Stopping - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusRecoverableError), + {}: latest(NewStatusEvent(StatusStopping)), + }, + expectedStatus: &StatusEvent{ + status: StatusStopping, + timestamp: maxTime, + }, + }, + { + name: "Stopping - synthetic event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: NewStatusEvent(StatusRecoverableError), + {}: latest(NewStatusEvent(StatusStopped)), + }, + expectedStatus: &StatusEvent{ + status: StatusStopping, + timestamp: maxTime, + }, + }, + { + name: "Stopped - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStopped), + {}: latest(NewStatusEvent(StatusStopped)), + {}: NewStatusEvent(StatusStopped), + }, + expectedStatus: &StatusEvent{ + status: StatusStopped, + timestamp: maxTime, + }, + }, + { + name: "RecoverableError - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: NewStatusEvent(StatusOK), + {}: latest(NewRecoverableErrorEvent(assert.AnError)), + }, + expectedStatus: &StatusEvent{ + status: StatusRecoverableError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "Starting - synthetic event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusStarting), + {}: latest(NewStatusEvent(StatusOK)), + }, + expectedStatus: &StatusEvent{ + status: StatusStarting, + timestamp: maxTime, + }, + }, + { + name: "OK - existing event", + statusMap: map[*InstanceID]*StatusEvent{ + {}: NewStatusEvent(StatusOK), + {}: latest(NewStatusEvent(StatusOK)), + {}: NewStatusEvent(StatusOK), + }, + expectedStatus: &StatusEvent{ + status: StatusOK, + timestamp: maxTime, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedStatus, EffectiveStatus(tc.statusMap)) + }) + } +} From 20bd41d041eda6be84580b91bb97960124ed6249 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Tue, 19 Sep 2023 18:19:30 -0700 Subject: [PATCH 29/40] Fix out of date comment Co-authored-by: Daniel Jaglowski --- component/telemetry.go | 1 - 1 file changed, 1 deletion(-) diff --git a/component/telemetry.go b/component/telemetry.go index 7c47bf2af2e..c9a9bad24a4 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -37,7 +37,6 @@ type TelemetrySettingsBase[T any] struct { // be returned are: // // - An illegal state transition - // - Using the WithError() option with a non-error status // - Calling this method before component startup // // If the API is being used properly, these errors are safe to ignore. From bed7e505d011eb01133736cdfd8205e2a5152551 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Tue, 19 Sep 2023 18:36:14 -0700 Subject: [PATCH 30/40] Rename EffectiveStatus to AggregateStatusEvent; add more comments --- component/status.go | 14 +++++++++----- component/status_test.go | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/component/status.go b/component/status.go index 09ec5df5f22..e5731e7113b 100644 --- a/component/status.go +++ b/component/status.go @@ -126,12 +126,15 @@ func AggregateStatus[K comparable](eventMap map[K]*StatusEvent) Status { seen[ev.Status()] = struct{}{} } + // All statuses are the same. Note, this will handle StatusOK and StatusStopped as these two + // cases require all components be in the same state. if len(seen) == 1 { for st := range seen { return st } } + // Handle mixed status cases if _, isFatal := seen[StatusFatalError]; isFatal { return StatusFatalError } @@ -152,6 +155,7 @@ func AggregateStatus[K comparable](eventMap map[K]*StatusEvent) Status { return StatusRecoverableError } + // By process of elimination, this is the last possible status; no check necessary. return StatusStarting } @@ -188,12 +192,12 @@ func LastEventByStatus[K comparable](eventMap map[K]*StatusEvent, status Status) return } -// EffectiveStatus returns a status event where: +// AggregateStatusEvent returns a status event where: // - The status is set to the aggregate status of the events in the eventMap // - The timestamp is set to the latest timestamp of the events in the eventMap // - For an error status, the event will have same error as the most current event of the same // error type from the eventMap -func EffectiveStatus[K comparable](eventMap map[K]*StatusEvent) *StatusEvent { +func AggregateStatusEvent[K comparable](eventMap map[K]*StatusEvent) *StatusEvent { aggregateStatus := AggregateStatus[K](eventMap) _, lastEvent := LastStatusEvent[K](eventMap) // the effective status matches an existing event @@ -202,14 +206,14 @@ func EffectiveStatus[K comparable](eventMap map[K]*StatusEvent) *StatusEvent { } // the effective status requires a synthetic event - effectiveStatus := &StatusEvent{ + aggregateEvent := &StatusEvent{ status: aggregateStatus, timestamp: lastEvent.timestamp, } if StatusIsError(aggregateStatus) { _, errorEvent := LastEventByStatus[K](eventMap, aggregateStatus) - effectiveStatus.err = errorEvent.err + aggregateEvent.err = errorEvent.err } - return effectiveStatus + return aggregateEvent } diff --git a/component/status_test.go b/component/status_test.go index a92ba9f278b..ac41e780ede 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -245,7 +245,7 @@ func TestLastErrorEvent(t *testing.T) { } -func TestEffectiveStatus(t *testing.T) { +func TestAggregateStatusEvent(t *testing.T) { // maxTime is used to make sure we select the event with the latest timestamp maxTime := time.Unix(1<<63-62135596801, 999999999) // latest sets the timestamp for an event to maxTime @@ -391,7 +391,7 @@ func TestEffectiveStatus(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expectedStatus, EffectiveStatus(tc.statusMap)) + assert.Equal(t, tc.expectedStatus, AggregateStatusEvent(tc.statusMap)) }) } } From 5cb9626dd16bdb5aa8128edb4ce90a1011a4501c Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Tue, 19 Sep 2023 18:58:22 -0700 Subject: [PATCH 31/40] Fix flaky test --- otelcol/collector_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otelcol/collector_test.go b/otelcol/collector_test.go index e9fad20f7d0..f1a59c54430 100644 --- a/otelcol/collector_test.go +++ b/otelcol/collector_test.go @@ -218,7 +218,7 @@ func TestComponentStatusWatcher(t *testing.T) { // components having the same IDs (having same ID for different component instances // is a normal situation for processors). return len(changedComponents) == 3 - }, time.Second, time.Millisecond*10) + }, 2*time.Second, time.Millisecond*100) col.Shutdown() wg.Wait() From e4fb9acff15d09805ea76e034e22a06ea691ba7a Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 27 Sep 2023 14:07:48 -0700 Subject: [PATCH 32/40] Correct comments in component/telemetry.go Co-authored-by: Evan Bradley <11745660+evan-bradley@users.noreply.github.com> --- component/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/telemetry.go b/component/telemetry.go index c9a9bad24a4..5eb6bcf457a 100644 --- a/component/telemetry.go +++ b/component/telemetry.go @@ -33,7 +33,7 @@ type TelemetrySettingsBase[T any] struct { // ReportComponentStatus allows a component to report runtime changes in status. The service // will automatically report status for a component during startup and shutdown. Components can // use this method to report status after start and before shutdown. ReportComponentStatus - // will only return errors if the API used incorrectly. The three scenarios where an error will + // will only return errors if the API used incorrectly. The two scenarios where an error will // be returned are: // // - An illegal state transition From 59ad517df045d656720b21a0bf3b9cb9da3b19c2 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 29 Sep 2023 14:14:03 -0700 Subject: [PATCH 33/40] Improve comments Co-authored-by: Evan Bradley <11745660+evan-bradley@users.noreply.github.com> --- component/status.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/component/status.go b/component/status.go index e5731e7113b..68fae379aaa 100644 --- a/component/status.go +++ b/component/status.go @@ -104,7 +104,7 @@ func NewFatalErrorEvent(err error) *StatusEvent { type StatusWatcher interface { // ComponentStatusChanged notifies about a change in the source component status. // Extensions that implement this interface must be ready that the ComponentStatusChanged - // may be called before, after or concurrently with Component.Shutdown() call. + // may be called before, after or concurrently with calls to Component.Start() and Component.Shutdown(). // The function may be called concurrently with itself. ComponentStatusChanged(source *InstanceID, event *StatusEvent) } @@ -117,7 +117,7 @@ type StatusFunc func(*StatusEvent) error // 2. If any instance encounters a fatal error, the component is in a Fatal Error state. // 3. If any instance is in a Permanent Error state, the component status is Permanent Error. // 4. If any instance is Stopping, the component is in a Stopping state. -// 5. An instance is Stopped, but not all instances are Stopping, we must be in the process of Stopping the component. +// 5. An instance is Stopped, but not all instances are Stopped, we must be in the process of Stopping the component. // 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. // 7. By process of elimination, the only remaining state is starting. func AggregateStatus[K comparable](eventMap map[K]*StatusEvent) Status { From 0a646ad1ec56f5ccc7382b192ae172049a832176 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 29 Sep 2023 14:21:08 -0700 Subject: [PATCH 34/40] Move StatusWatcher interface to extension package --- component/status.go | 11 ----------- extension/extension.go | 11 +++++++++++ .../extensiontest/statuswatcher_extension_test.go | 3 ++- service/extensions/extensions.go | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/component/status.go b/component/status.go index 68fae379aaa..baf34fe5309 100644 --- a/component/status.go +++ b/component/status.go @@ -98,17 +98,6 @@ func NewFatalErrorEvent(err error) *StatusEvent { return ev } -// StatusWatcher is an extra interface for Extension hosted by the OpenTelemetry -// Collector that is to be implemented by extensions interested in changes to component -// status. -type StatusWatcher interface { - // ComponentStatusChanged notifies about a change in the source component status. - // Extensions that implement this interface must be ready that the ComponentStatusChanged - // may be called before, after or concurrently with calls to Component.Start() and Component.Shutdown(). - // The function may be called concurrently with itself. - ComponentStatusChanged(source *InstanceID, event *StatusEvent) -} - // StatusFunc is the expected type of ReportComponentStatus for component.TelemetrySettings type StatusFunc func(*StatusEvent) error diff --git a/extension/extension.go b/extension/extension.go index 6b8df571b81..2521fc65a18 100644 --- a/extension/extension.go +++ b/extension/extension.go @@ -40,6 +40,17 @@ type ConfigWatcher interface { NotifyConfig(ctx context.Context, conf *confmap.Conf) error } +// StatusWatcher is an extra interface for Extension hosted by the OpenTelemetry +// Collector that is to be implemented by extensions interested in changes to component +// status. +type StatusWatcher interface { + // ComponentStatusChanged notifies about a change in the source component status. + // Extensions that implement this interface must be ready that the ComponentStatusChanged + // may be called before, after or concurrently with calls to Component.Start() and Component.Shutdown(). + // The function may be called concurrently with itself. + ComponentStatusChanged(source *component.InstanceID, event *component.StatusEvent) +} + // CreateSettings is passed to Factory.Create(...) function. type CreateSettings struct { // ID returns the ID of the component that will be created. diff --git a/extension/extensiontest/statuswatcher_extension_test.go b/extension/extensiontest/statuswatcher_extension_test.go index 16163fbc650..0bfdf4f5eda 100644 --- a/extension/extensiontest/statuswatcher_extension_test.go +++ b/extension/extensiontest/statuswatcher_extension_test.go @@ -12,6 +12,7 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/extension" ) func TestStatusWatcherExtension(t *testing.T) { @@ -31,7 +32,7 @@ func TestStatusWatcherExtension(t *testing.T) { assert.NoError(t, ext.Start(context.Background(), componenttest.NewNopHost())) assert.False(t, statusChanged) - ext.(component.StatusWatcher).ComponentStatusChanged(&component.InstanceID{}, &component.StatusEvent{}) + ext.(extension.StatusWatcher).ComponentStatusChanged(&component.InstanceID{}, &component.StatusEvent{}) assert.True(t, statusChanged) assert.NoError(t, ext.Shutdown(context.Background())) diff --git a/service/extensions/extensions.go b/service/extensions/extensions.go index 50cf297c443..bb073f74cc5 100644 --- a/service/extensions/extensions.go +++ b/service/extensions/extensions.go @@ -98,7 +98,7 @@ func (bes *Extensions) NotifyConfig(ctx context.Context, conf *confmap.Conf) err func (bes *Extensions) NotifyComponentStatusChange(source *component.InstanceID, event *component.StatusEvent) { for _, ext := range bes.extMap { - if sw, ok := ext.(component.StatusWatcher); ok { + if sw, ok := ext.(extension.StatusWatcher); ok { sw.ComponentStatusChanged(source, event) } } From 4ada43fa80124eb3746ad2d4e8764f0d1cd108e1 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 29 Sep 2023 14:52:02 -0700 Subject: [PATCH 35/40] Reduce public API for event utility methods --- component/status.go | 41 ++++++++---------------- component/status_test.go | 67 ---------------------------------------- 2 files changed, 13 insertions(+), 95 deletions(-) diff --git a/component/status.go b/component/status.go index baf34fe5309..ffe423330d1 100644 --- a/component/status.go +++ b/component/status.go @@ -156,39 +156,25 @@ func StatusIsError(status Status) bool { status == StatusFatalError } -// LastStatusEvent returns the key and last StatusEvent by timestamp from the map provided. -// Results will be nil for an empty map. -func LastStatusEvent[K comparable](eventMap map[K]*StatusEvent) (lastKey K, lastEvent *StatusEvent) { - for key, event := range eventMap { - if lastEvent == nil || lastEvent.timestamp.Before(event.timestamp) { - lastKey = key - lastEvent = event - } - } - return -} - -// LastEventByStatus returns the key and last StatusEvent of the given status from the provided map. -// Results will be nil if there is not an event with the given status in the map. -func LastEventByStatus[K comparable](eventMap map[K]*StatusEvent, status Status) (lastKey K, lastEvent *StatusEvent) { - for key, event := range eventMap { - if status == event.Status() && - (lastEvent == nil || lastEvent.timestamp.Before(event.timestamp)) { - lastKey = key - lastEvent = event - } - } - return -} - // AggregateStatusEvent returns a status event where: // - The status is set to the aggregate status of the events in the eventMap // - The timestamp is set to the latest timestamp of the events in the eventMap // - For an error status, the event will have same error as the most current event of the same // error type from the eventMap func AggregateStatusEvent[K comparable](eventMap map[K]*StatusEvent) *StatusEvent { + var lastEvent, lastMatchingEvent *StatusEvent aggregateStatus := AggregateStatus[K](eventMap) - _, lastEvent := LastStatusEvent[K](eventMap) + + for _, ev := range eventMap { + if lastEvent == nil || lastEvent.timestamp.Before(ev.timestamp) { + lastEvent = ev + } + if aggregateStatus == ev.Status() && + (lastMatchingEvent == nil || lastMatchingEvent.timestamp.Before(ev.timestamp)) { + lastMatchingEvent = ev + } + } + // the effective status matches an existing event if lastEvent.Status() == aggregateStatus { return lastEvent @@ -200,8 +186,7 @@ func AggregateStatusEvent[K comparable](eventMap map[K]*StatusEvent) *StatusEven timestamp: lastEvent.timestamp, } if StatusIsError(aggregateStatus) { - _, errorEvent := LastEventByStatus[K](eventMap, aggregateStatus) - aggregateEvent.err = errorEvent.err + aggregateEvent.err = lastMatchingEvent.err } return aggregateEvent diff --git a/component/status_test.go b/component/status_test.go index ac41e780ede..13755d078a5 100644 --- a/component/status_test.go +++ b/component/status_test.go @@ -178,73 +178,6 @@ func TestStatusIsError(t *testing.T) { } } -func TestLastEvent(t *testing.T) { - expectedID := &InstanceID{ - ID: NewIDWithName(DataTypeTraces, "0"), - Kind: KindReceiver, - } - - eventMap := map[*InstanceID]*StatusEvent{ - {}: NewStatusEvent(StatusOK), - {}: NewStatusEvent(StatusOK), - expectedID: NewStatusEvent(StatusStopping), - } - - // ensure expected event is sufficiently more recent - ev := eventMap[expectedID] - ev.timestamp = ev.timestamp.Add(time.Duration(2) * time.Second) - - lastID, lastEvent := LastStatusEvent(eventMap) - - assert.Equal(t, expectedID, lastID) - assert.Equal(t, StatusStopping, lastEvent.Status()) -} - -func TestLastErrorEvent(t *testing.T) { - statuses := []Status{ - StatusStarting, - StatusOK, - StatusRecoverableError, - StatusPermanentError, - StatusFatalError, - StatusStopping, - StatusStopped, - } - - // populate an eventMap and reverse lookup (expectedIDs) for later assertions - expectedIDs := make(map[Status]*InstanceID) - eventMap := make(map[*InstanceID]*StatusEvent) - // append duplicate statuses to ensure the latest is returned - statusesWithDups := statuses - statusesWithDups = append(statusesWithDups, StatusOK, StatusPermanentError, StatusStopped) - for i, st := range statusesWithDups { - id := &InstanceID{ - ID: NewIDWithName(DataTypeTraces, fmt.Sprint(i)), - } - // expectedID will be overwritten for dups - expectedIDs[st] = id - ev := NewStatusEvent(st) - // pad the time between events - ev.timestamp = ev.timestamp.Add(time.Duration(i) * time.Second) - // events with duplicate statuses will exist in the eventMap - eventMap[id] = ev - } - - // multiple for events for some statuses - assert.Greater(t, len(eventMap), len(statuses)) - // one id per status - assert.Equal(t, len(statuses), len(expectedIDs)) - - for _, st := range statuses { - t.Run(fmt.Sprintf("with %s", st), func(t *testing.T) { - lastID, lastEvent := LastEventByStatus(eventMap, st) - assert.Equal(t, expectedIDs[st], lastID) - assert.Equal(t, st, lastEvent.Status()) - }) - } - -} - func TestAggregateStatusEvent(t *testing.T) { // maxTime is used to make sure we select the event with the latest timestamp maxTime := time.Unix(1<<63-62135596801, 999999999) From 9f0811f8b094f548610f26d2740d0983e3e0a521 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Tue, 3 Oct 2023 08:13:43 -0700 Subject: [PATCH 36/40] Update comment in component/status.go Co-authored-by: Tigran Najaryan <4194920+tigrannajaryan@users.noreply.github.com> --- component/status.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/status.go b/component/status.go index ffe423330d1..36a449edc12 100644 --- a/component/status.go +++ b/component/status.go @@ -65,7 +65,7 @@ func (ev *StatusEvent) Timestamp() time.Time { } // NewStatusEvent creates and returns a StatusEvent with the specified status and sets the timestamp -// time.Now(). To provide set an error on the event for an error status use one of the dedicated +// time.Now(). To set an error on the event for an error status use one of the dedicated // constructors (e.g. NewRecoverableErrorEvent, NewPermanentErrorEvent, NewFatalErrorEvent) func NewStatusEvent(status Status) *StatusEvent { return &StatusEvent{ From 9455b7fef9afb1928dbc7c67171cf7cbc757d915 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Wed, 4 Oct 2023 17:09:39 -0700 Subject: [PATCH 37/40] Fix automatic status reporting for SharedComponents Automatic status reporting for a SharedComponent needs to go through its telemtry settings in order to keep status reporting for the component and its instances in sync. This overrides the automatic status reporting that occurs in graph, and makes the reporting in graph essentially a no-op as the SharedComponent premptively transitions state during start and shutdown. --- internal/sharedcomponent/sharedcomponent.go | 19 +++- .../sharedcomponent/sharedcomponent_test.go | 95 +++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/internal/sharedcomponent/sharedcomponent.go b/internal/sharedcomponent/sharedcomponent.go index 2c974f1b428..cddebb59902 100644 --- a/internal/sharedcomponent/sharedcomponent.go +++ b/internal/sharedcomponent/sharedcomponent.go @@ -85,7 +85,14 @@ func (r *SharedComponent[V]) Unwrap() V { func (r *SharedComponent[V]) Start(ctx context.Context, host component.Host) error { var err error r.startOnce.Do(func() { - err = r.component.Start(ctx, host) + // It's important that status for a sharedcomponent is reported through its + // telemetrysettings to keep status in sync and avoid race conditions. This logic duplicates + // and takes priority over the automated status reporting that happens in graph, making the + // status reporting in graph a no-op. + _ = r.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStarting)) + if err = r.component.Start(ctx, host); err != nil { + _ = r.telemetry.ReportComponentStatus(component.NewPermanentErrorEvent(err)) + } }) return err } @@ -94,7 +101,17 @@ func (r *SharedComponent[V]) Start(ctx context.Context, host component.Host) err func (r *SharedComponent[V]) Shutdown(ctx context.Context) error { var err error r.stopOnce.Do(func() { + // It's important that status for a sharedcomponent is reported through its + // telemetrysettings to keep status in sync and avoid race conditions. This logic duplicates + // and takes priority over the automated status reporting that happens in graph, making the + // the status reporting in graph a no-op. + _ = r.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStopping)) err = r.component.Shutdown(ctx) + if err != nil { + _ = r.telemetry.ReportComponentStatus(component.NewPermanentErrorEvent(err)) + } else { + _ = r.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusStopped)) + } r.removeFunc() }) return err diff --git a/internal/sharedcomponent/sharedcomponent_test.go b/internal/sharedcomponent/sharedcomponent_test.go index 0ab3d9e07c8..c0a123b76a7 100644 --- a/internal/sharedcomponent/sharedcomponent_test.go +++ b/internal/sharedcomponent/sharedcomponent_test.go @@ -188,6 +188,101 @@ func TestSharedComponentsReportStatus(t *testing.T) { } } +func TestReportStatusOnStartShutdown(t *testing.T) { + for _, tc := range []struct { + name string + startErr error + shutdownErr error + expectedStatuses []component.Status + }{ + { + name: "successful start/stop", + startErr: nil, + shutdownErr: nil, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusStopped, + }, + }, + { + name: "start error", + startErr: assert.AnError, + shutdownErr: nil, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusPermanentError, + }, + }, + { + name: "shutdown error", + shutdownErr: assert.AnError, + expectedStatuses: []component.Status{ + component.StatusStarting, + component.StatusOK, + component.StatusStopping, + component.StatusPermanentError, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + reportedStatuses := make(map[*component.InstanceID][]component.Status) + newStatusFunc := func() func(*component.StatusEvent) error { + instanceID := &component.InstanceID{} + return func(ev *component.StatusEvent) error { + reportedStatuses[instanceID] = append(reportedStatuses[instanceID], ev.Status()) + return nil + } + } + base := &baseComponent{} + if tc.startErr != nil { + base.StartFunc = func(context.Context, component.Host) error { + return tc.startErr + } + } + if tc.shutdownErr != nil { + base.ShutdownFunc = func(context.Context) error { + return tc.shutdownErr + } + } + comps := NewSharedComponents[component.ID, *baseComponent]() + var comp *SharedComponent[*baseComponent] + var err error + for i := 0; i < 3; i++ { + telemetrySettings := newNopTelemetrySettings() + telemetrySettings.ReportComponentStatus = newStatusFunc() + if i == 0 { + base.telemetry = telemetrySettings + } + comp, err = comps.GetOrAdd( + id, + func() (*baseComponent, error) { return base, nil }, + telemetrySettings, + ) + require.NoError(t, err) + } + + err = comp.Start(context.Background(), componenttest.NewNopHost()) + require.Equal(t, tc.startErr, err) + + if tc.startErr == nil { + err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusOK)) + require.NoError(t, err) + + err = comp.Shutdown(context.Background()) + require.Equal(t, tc.shutdownErr, err) + } + + require.Equal(t, 3, len(reportedStatuses)) + + for _, actualStatuses := range reportedStatuses { + require.Equal(t, tc.expectedStatuses, actualStatuses) + } + }) + } +} + // newNopTelemetrySettings streamlines getting a pointer to a NopTelemetrySettings func newNopTelemetrySettings() *component.TelemetrySettings { set := componenttest.NewNopTelemetrySettings() From 5361925fe4eadfad0b4d8c35a1ecc478faff57f1 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 5 Oct 2023 12:22:24 -0700 Subject: [PATCH 38/40] Update version in deprecation comments Co-authored-by: Alex Boten --- component/host.go | 2 +- service/host.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/component/host.go b/component/host.go index 98f4fd79d3c..732e37c8c44 100644 --- a/component/host.go +++ b/component/host.go @@ -12,7 +12,7 @@ type Host interface { // // ReportFatalError should be called by the component anytime after Component.Start() ends and // before Component.Shutdown() begins. - // Deprecated: [x.x.x] Use TelemetrySettings.ReportComponentStatus instead (with an event + // Deprecated: [0.87.0] Use TelemetrySettings.ReportComponentStatus instead (with an event // component.StatusFatalError) ReportFatalError(err error) diff --git a/service/host.go b/service/host.go index 3396d70f841..b749564b0dd 100644 --- a/service/host.go +++ b/service/host.go @@ -33,7 +33,7 @@ type serviceHost struct { // ReportFatalError is used to report to the host that the receiver encountered // a fatal error (i.e.: an error that the instance can't recover from) after // its start function has already returned. -// Deprecated: [x.x.x] Replaced by servicetelemetry.Settings.ReportComponentStatus +// Deprecated: [0.87.0] Replaced by servicetelemetry.Settings.ReportComponentStatus func (host *serviceHost) ReportFatalError(err error) { host.asyncErrorChannel <- err } From 711fb73303da22dc3a215de434683a17414ac492 Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Thu, 5 Oct 2023 12:29:52 -0700 Subject: [PATCH 39/40] Streamline error assertions --- internal/sharedcomponent/sharedcomponent_test.go | 1 - service/internal/status/status_test.go | 3 --- service/service_test.go | 1 - 3 files changed, 5 deletions(-) diff --git a/internal/sharedcomponent/sharedcomponent_test.go b/internal/sharedcomponent/sharedcomponent_test.go index c0a123b76a7..5ce081fa752 100644 --- a/internal/sharedcomponent/sharedcomponent_test.go +++ b/internal/sharedcomponent/sharedcomponent_test.go @@ -161,7 +161,6 @@ func TestSharedComponentsReportStatus(t *testing.T) { // simulate an error err = comp.telemetry.ReportComponentStatus(component.NewStatusEvent(component.StatusNone)) - require.Error(t, err) require.ErrorIs(t, err, assert.AnError) // stopping diff --git a/service/internal/status/status_test.go b/service/internal/status/status_test.go index 87a1d3231d9..c439cea39af 100644 --- a/service/internal/status/status_test.go +++ b/service/internal/status/status_test.go @@ -163,13 +163,11 @@ func TestValidSeqsToStopped(t *testing.T) { require.NoError(t, fsm.transition(ev)) // skipping to stopped is not allowed err := fsm.transition(component.NewStatusEvent(component.StatusStopped)) - require.Error(t, err) require.ErrorIs(t, err, errInvalidStateTransition) // stopping -> stopped is allowed for non-fatal, non-permanent errors err = fsm.transition(component.NewStatusEvent(component.StatusStopping)) if ev.Status() == component.StatusPermanentError || ev.Status() == component.StatusFatalError { - require.Error(t, err) require.ErrorIs(t, err, errInvalidStateTransition) } else { require.NoError(t, err) @@ -261,7 +259,6 @@ func TestStatusFuncReady(t *testing.T) { id := &component.InstanceID{} err := serviceStatusFn(id, component.NewStatusEvent(component.StatusStarting)) - require.Error(t, err) require.ErrorIs(t, err, errStatusNotReady) init() diff --git a/service/service_test.go b/service/service_test.go index f8c1d0e4988..8b47218dc6b 100644 --- a/service/service_test.go +++ b/service/service_test.go @@ -433,7 +433,6 @@ func TestServiceFatalError(t *testing.T) { err = <-srv.host.asyncErrorChannel - require.Error(t, err) require.ErrorIs(t, err, assert.AnError) } From 6e09fb0af2c34710f77500c606887efd12e8028f Mon Sep 17 00:00:00 2001 From: Matthew Wear Date: Fri, 6 Oct 2023 11:07:47 -0700 Subject: [PATCH 40/40] Fix test post rebase --- service/extensions/extensions_test.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/service/extensions/extensions_test.go b/service/extensions/extensions_test.go index 100603dbbbd..4b8a574f4f0 100644 --- a/service/extensions/extensions_test.go +++ b/service/extensions/extensions_test.go @@ -296,17 +296,18 @@ func TestStatusReportedOnStartupShutdown(t *testing.T) { compID := component.NewID("statustest") factory := newStatusTestExtensionFactory("statustest", tc.startErr, tc.shutdownErr) config := factory.CreateDefaultConfig() + extensionsConfigs := map[component.ID]component.Config{ + compID: config, + } + factories := map[component.Type]extension.Factory{ + "statustest": factory, + } extensions, err := New( context.Background(), Settings{ - Telemetry: servicetelemetry.NewNopTelemetrySettings(), - BuildInfo: component.NewDefaultBuildInfo(), - Configs: map[component.ID]component.Config{ - compID: config, - }, - Factories: map[component.Type]extension.Factory{ - "statustest": factory, - }, + Telemetry: servicetelemetry.NewNopTelemetrySettings(), + BuildInfo: component.NewDefaultBuildInfo(), + Extensions: extension.NewBuilder(extensionsConfigs, factories), }, []component.ID{compID}, )