open-telemetry · codeboten · Oct 6, 2023 · Nov 15, 2022 · Aug 1, 2023 · Aug 2, 2023
@@ -0,0 +1,16 @@
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
+component: core
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Adds the ability for components to report status and for extensions to subscribe to status events by implementing an optional StatusWatcher interface.
+
+# One or more tracking issues or pull requests related to the change
+issues: [7682]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
@@ -175,3 +175,10 @@ type CreateDefaultConfigFunc func() Config
 func (f CreateDefaultConfigFunc) CreateDefaultConfig() Config {
 	return f()
 }
+
+// InstanceID uniquely identifies a component instance
+type InstanceID struct {
+	ID          ID
+	Kind        Kind
+	PipelineIDs map[ID]struct{}
+}
@@ -21,5 +21,8 @@
 		MeterProvider:  noop.NewMeterProvider(),
 		MetricsLevel:   configtelemetry.LevelNone,
 		Resource:       pcommon.NewResource(),
+		ReportComponentStatus: func(*component.StatusEvent) error {
+			return nil
+		},
 	}
 }
@@ -12,6 +12,8 @@ type Host interface {
 	//
 	// ReportFatalError should be called by the component anytime after Component.Start() ends and
 	// before Component.Shutdown() begins.
+	// Deprecated: [0.87.0] Use TelemetrySettings.ReportComponentStatus instead (with an event
+	// component.StatusFatalError)
 	ReportFatalError(err error)
 
 	// GetFactory of the specified kind. Returns the factory for a component type.

@@ -0,0 +1,193 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package component // import "go.opentelemetry.io/collector/component"
+
+import (
+	"time"
+)
+
+type Status int32
+
+// Enumeration of possible component statuses
+const (
+	StatusNone Status = iota
+	StatusStarting
+	StatusOK
+	StatusRecoverableError
+	StatusPermanentError
+	StatusFatalError
+	StatusStopping
+	StatusStopped
+)
+
+// String returns a string representation of a Status
+func (s Status) String() string {
+	switch s {
+	case StatusStarting:
+		return "StatusStarting"
+	case StatusOK:
+		return "StatusOK"
+	case StatusRecoverableError:
+		return "StatusRecoverableError"
+	case StatusPermanentError:
+		return "StatusPermanentError"
+	case StatusFatalError:
+		return "StatusFatalError"
+	case StatusStopping:
+		return "StatusStopping"
+	case StatusStopped:
+		return "StatusStopped"
+	}
+	return "StatusNone"
+}
+
+// StatusEvent contains a status and timestamp, and can contain an error
+type StatusEvent struct {
+	status    Status
+	err       error
+	timestamp time.Time
+}
+
+// Status returns the Status (enum) associated with the StatusEvent
+func (ev *StatusEvent) Status() Status {
+	return ev.status
+}
+
+// Err returns the error associated with the StatusEvent.
+func (ev *StatusEvent) Err() error {
+	return ev.err
+}
+
+// Timestamp returns the timestamp associated with the StatusEvent
+func (ev *StatusEvent) Timestamp() time.Time {
+	return ev.timestamp
+}
+
+// NewStatusEvent creates and returns a StatusEvent with the specified status and sets the timestamp
+// time.Now(). To set an error on the event for an error status use one of the dedicated
+// constructors (e.g. NewRecoverableErrorEvent, NewPermanentErrorEvent, NewFatalErrorEvent)
+func NewStatusEvent(status Status) *StatusEvent {
+	return &StatusEvent{
+		status:    status,
+		timestamp: time.Now(),
+	}
+}
+
+// NewRecoverableErrorEvent creates and returns a StatusEvent with StatusRecoverableError, the
+// specified error, and a timestamp set to time.Now().
+func NewRecoverableErrorEvent(err error) *StatusEvent {
+	ev := NewStatusEvent(StatusRecoverableError)
+	ev.err = err
+	return ev
+}
+
+// NewPermanentErrorEvent creates and returns a StatusEvent with StatusPermanentError, the
+// specified error, and a timestamp set to time.Now().
+func NewPermanentErrorEvent(err error) *StatusEvent {
+	ev := NewStatusEvent(StatusPermanentError)
+	ev.err = err
+	return ev
+}
+
+// NewFatalErrorEvent creates and returns a StatusEvent with StatusFatalError, the
+// specified error, and a timestamp set to time.Now().
+func NewFatalErrorEvent(err error) *StatusEvent {
+	ev := NewStatusEvent(StatusFatalError)
+	ev.err = err
+	return ev
+}
+
+// StatusFunc is the expected type of ReportComponentStatus for component.TelemetrySettings
+type StatusFunc func(*StatusEvent) error
+
+// AggregateStatus will derive a status for the given input using the following rules in order:
+//  1. If all instances have the same status, there is nothing to aggregate, return it.
+//  2. If any instance encounters a fatal error, the component is in a Fatal Error state.
+//  3. If any instance is in a Permanent Error state, the component status is Permanent Error.
+//  4. If any instance is Stopping, the component is in a Stopping state.
+//  5. An instance is Stopped, but not all instances are Stopped, we must be in the process of Stopping the component.
+//  6. If any instance is in a Recoverable Error state, the component status is Recoverable Error.
+//  7. By process of elimination, the only remaining state is starting.
+func AggregateStatus[K comparable](eventMap map[K]*StatusEvent) Status {
+	seen := make(map[Status]struct{})
+	for _, ev := range eventMap {
+		seen[ev.Status()] = struct{}{}
+	}
+
+	// All statuses are the same. Note, this will handle StatusOK and StatusStopped as these two
+	// cases require all components be in the same state.
+	if len(seen) == 1 {
+		for st := range seen {
+			return st
+		}
+	}
+
+	// Handle mixed status cases
+	if _, isFatal := seen[StatusFatalError]; isFatal {
+		return StatusFatalError
+	}
+
+	if _, isPermanent := seen[StatusPermanentError]; isPermanent {
+		return StatusPermanentError
+	}
+
+	if _, isStopping := seen[StatusStopping]; isStopping {
+		return StatusStopping
+	}
+
+	if _, isStopped := seen[StatusStopped]; isStopped {
+		return StatusStopping
+	}
+
+	if _, isRecoverable := seen[StatusRecoverableError]; isRecoverable {
+		return StatusRecoverableError
+	}
+
+	// By process of elimination, this is the last possible status; no check necessary.
+	return StatusStarting
+}
+
+// StatusIsError returns true for error statuses (e.g. StatusRecoverableError,
+// StatusPermanentError, or StatusFatalError)
+func StatusIsError(status Status) bool {
+	return status == StatusRecoverableError ||
+		status == StatusPermanentError ||
+		status == StatusFatalError
+}
+
+// AggregateStatusEvent returns a status event where:
+//   - The status is set to the aggregate status of the events in the eventMap
+//   - The timestamp is set to the latest timestamp of the events in the eventMap
+//   - For an error status, the event will have same error as the most current event of the same
+//     error type from the eventMap
+func AggregateStatusEvent[K comparable](eventMap map[K]*StatusEvent) *StatusEvent {
+	var lastEvent, lastMatchingEvent *StatusEvent
+	aggregateStatus := AggregateStatus[K](eventMap)
+
+	for _, ev := range eventMap {
+		if lastEvent == nil || lastEvent.timestamp.Before(ev.timestamp) {
+			lastEvent = ev
+		}
+		if aggregateStatus == ev.Status() &&
+			(lastMatchingEvent == nil || lastMatchingEvent.timestamp.Before(ev.timestamp)) {
+			lastMatchingEvent = ev
+		}
+	}
+
+	// the effective status matches an existing event
+	if lastEvent.Status() == aggregateStatus {
+		return lastEvent
+	}
+
+	// the effective status requires a synthetic event
+	aggregateEvent := &StatusEvent{
+		status:    aggregateStatus,
+		timestamp: lastEvent.timestamp,
+	}
+	if StatusIsError(aggregateStatus) {
+		aggregateEvent.err = lastMatchingEvent.err
+	}
+
+	return aggregateEvent
+}