Skip to content

Commit

Permalink
Convert status in cluster-autoscaler-status to yaml and add error inf…
Browse files Browse the repository at this point in the history
…o for backoff

Change-Id: Ic68e0d67b7ce9912b605b6c0a3356b4d0e177911
  • Loading branch information
walidghallab committed Dec 15, 2023
1 parent b95adf1 commit 802c17b
Show file tree
Hide file tree
Showing 10 changed files with 466 additions and 487 deletions.
162 changes: 137 additions & 25 deletions cluster-autoscaler/clusterstate/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,36 +69,148 @@ const (
ClusterAutoscalerBackoff ClusterAutoscalerConditionStatus = "Backoff"
)

// ClusterAutoscalerCondition describes some aspect of ClusterAutoscaler work.
type ClusterAutoscalerCondition struct {
// Type defines the aspect that the condition describes. For example, it can be Health or ScaleUp/Down activity.
Type ClusterAutoscalerConditionType `json:"type,omitempty"`
// Status of the condition.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty"`
// Message is a free text extra information about the condition. It may contain some
// extra debugging data, like why the cluster is unhealthy.
Message string `json:"message,omitempty"`
// Reason is a unique, one-word, CamelCase reason for the condition's last transition.
Reason string `json:"reason,omitempty"`
// RegisteredUnreadyNodeCount contains node counts of registered but unready nodes.
type RegisteredUnreadyNodeCount struct {
// Total number of registered but unready nodes.
Total int `json:"total" yaml:"total"`
// ResourceUnready is the number of registered but unready nodes due to a missing resource (e.g. GPU).
ResourceUnready int `json:"resourceUnready" yaml:"resourceUnready"`
}

// RegisteredNodeCount contains node counts of registered nodes.
type RegisteredNodeCount struct {
Total int `json:"total" yaml:"total"`
Ready int `json:"ready" yaml:"ready"`
NotStarted int `json:"notStarted" yaml:"notStarted"`
Unready RegisteredUnreadyNodeCount `json:"unready,omitempty" yaml:"unready,omitempty"`
}

// HealthConditionNodeCount contains number of nodes that satisfy different criteria for health condition.
type HealthConditionNodeCount struct {
Registered RegisteredNodeCount `json:"registered,omitempty" yaml:"registered,omitempty"`
LongUnregistered int `json:"longUnregistered" yaml:"longUnregistered"`
Unregistered int `json:"unregistered" yaml:"unregistered"`
}

// ClusterHealthCondition contains information about health condition for the whole cluster.
type ClusterHealthCondition struct {
// Status of cluster health.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria for health condition.
NodeCounts HealthConditionNodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"`
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterAutoscalerStatus contains ClusterAutoscaler status.
type ClusterAutoscalerStatus struct {
// NodeGroupStatuses contains status information of individual node groups on which CA works.
NodeGroupStatuses []NodeGroupStatus `json:"nodeGroupStatuses,omitempty"`
// ClusterwideConditions contains conditions that apply to the whole autoscaler.
ClusterwideConditions []ClusterAutoscalerCondition `json:"clusterwideConditions,omitempty"`
// NodeGroupHealthCondition contains information about health condition for a node group.
type NodeGroupHealthCondition struct {
// Status of node group health.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria for health condition.
NodeCounts HealthConditionNodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// CloudProviderTarget is the target size set by cloud provider.
CloudProviderTarget int `json:"cloudProviderTarget" yaml:"cloudProviderTarget"`
// MinSize is the CA max size of a node group.
MinSize int `json:"minSize" yaml:"minSize"`
// MaxSize is the CA max size of a node group.
MaxSize int `json:"maxSize" yaml:"maxSize"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterScaleUpNodeCount contains number of nodes that satisfy different criteria for scale up condition of the whole cluster.
type ClusterScaleUpNodeCount struct {
Ready int `json:"ready" yaml:"ready"`
Registered int `json:"registered" yaml:"registered"`
}

// NodeGroupScaleUpNodeCount contains number of nodes that satisfy different criteria for scale up condition of a node group.
type NodeGroupScaleUpNodeCount struct {
Ready int `json:"ready" yaml:"ready"`
CloudProviderTarget int `json:"cloudProviderTarget" yaml:"cloudProviderTarget"`
}

// ClusterScaleUpCondition contains information about scale up condition for the whole cluster.
type ClusterScaleUpCondition struct {
// Status of the scale up.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria for scale up condition.
NodeCounts ClusterScaleUpNodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// BackoffInfo contains error information that caused the backoff.
type BackoffInfo struct {
// ErrorCode is a specific error code for error condition
ErrorCode string `json:"errorCode,omitempty" yaml:"errorCode,omitempty"`
// ErrorMessage is human readable description of error condition
ErrorMessage string `json:"errorMessage,omitempty" yaml:"errorMessage,omitempty"`
}

// NodeGroupStatus contains status of a group of nodes controlled by ClusterAutoscaler.
// NodeGroupScaleUpCondition contains information about scale up condition for a node group.
type NodeGroupScaleUpCondition struct {
// Status of the scale up.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria for scale up condition.
NodeCounts NodeGroupScaleUpNodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// LastProbeTime is the last time we probed the condition.
BackoffInfo BackoffInfo `json:"backoffInfo,omitempty" yaml:"backoffInfo,omitempty"`
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ScaleDownCondition contains information about scale down condition for a node group or the whole cluster.
type ScaleDownCondition struct {
// Status of the scale down.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// Candidates number for the scale down.
Candidates int `json:"candidates,omitempty" yaml:"candidates,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterWideStatus contains status that apply to the whole cluster.
type ClusterWideStatus struct {
// Health contains information about health condition of the cluster.
Health ClusterHealthCondition `json:"health,omitempty" yaml:"health,omitempty"`
// ScaleUp contains information about scale up condition of the cluster.
ScaleUp ClusterScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"`
// ScaleDown contains information about scale down condition of the node group.
ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"`
}

// NodeGroupStatus contains status of an individual node group on which CA works..
type NodeGroupStatus struct {
// ProviderID is the cloud-provider-specific name of the node group. On GCE it will be equal
// to MIG url, on AWS it will be ASG name, etc.
ProviderID string `json:"providerID,omitempty"`
// Conditions is a list of conditions that describe the state of the node group.
Conditions []ClusterAutoscalerCondition `json:"conditions,omitempty"`
// Name of the node group.
Name string `json:"name,omitempty" yaml:"name,omitempty"`
// Health contains information about health condition of the node group.
Health NodeGroupHealthCondition `json:"health,omitempty" yaml:"health,omitempty"`
// ScaleUp contains information about scale up condition of the node group.
ScaleUp NodeGroupScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"`
// ScaleDown contains information about scale down condition of the node group.
ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"`
}

// ClusterAutoscalerStatus contains ClusterAutoscaler status.
type ClusterAutoscalerStatus struct {
// Time of the cluster autoscaler status.
Time string `json:"time,omitempty" yaml:"time,omitempty"`
// AutoscalerStatus contains status of ClusterAutoscaler (e.g. 'Initializing' & 'Running').
AutoscalerStatus string `json:"autoscalerStatus,omitempty" yaml:"autoscalerStatus,omitempty"`
// Message contains extra information about the status.
Message string `json:"message,omitempty" yaml:"message,omitempty"`
// ClusterWide contains conditions that apply to the whole cluster.
ClusterWide ClusterWideStatus `json:"clusterWide,omitempty" yaml:"clusterWide,omitempty"`
// NodeGroups contains status information of individual node groups on which CA works.
NodeGroups []NodeGroupStatus `json:"nodeGroups,omitempty" yaml:"nodeGroups,omitempty"`
}
92 changes: 0 additions & 92 deletions cluster-autoscaler/clusterstate/api/utils.go

This file was deleted.

91 changes: 0 additions & 91 deletions cluster-autoscaler/clusterstate/api/utils_test.go

This file was deleted.

Loading

0 comments on commit 802c17b

Please sign in to comment.