Skip to content

Commit

Permalink
Convert status in cluster-autoscaler-status to yaml and add error inf…
Browse files Browse the repository at this point in the history
…o for backoff and more node counts.

Change-Id: Ic68e0d67b7ce9912b605b6c0a3356b4d0e177911
  • Loading branch information
walidghallab committed Dec 28, 2023
1 parent 5469d79 commit f8d78b6
Show file tree
Hide file tree
Showing 11 changed files with 506 additions and 511 deletions.
166 changes: 130 additions & 36 deletions cluster-autoscaler/clusterstate/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// ClusterAutoscalerConditionType is the type of ClusterAutoscalerCondition.
type ClusterAutoscalerConditionType string
// ClusterAutoscalerStatusCondition is the status of the cluster autoscaler.
type ClusterAutoscalerStatusCondition string

const (
// ClusterAutoscalerHealth - is a condition that explains what is the current health
// of ClusterAutoscaler or its node groups.
ClusterAutoscalerHealth ClusterAutoscalerConditionType = "Health"
// ClusterAutoscalerScaleDown is a condition that explains what is the current status
// of a node group with regard to scale down activities.
ClusterAutoscalerScaleDown ClusterAutoscalerConditionType = "ScaleDown"
// ClusterAutoscalerScaleUp is a condition that explains what is the current status
// of a node group with regard to scale up activities.
ClusterAutoscalerScaleUp ClusterAutoscalerConditionType = "ScaleUp"
// ClusterAutoscalerRunning status means that the cluster autoscaler has been initialized and running.
ClusterAutoscalerRunning ClusterAutoscalerStatusCondition = "Running"
// ClusterAutoscalerInitializing status means that cluster autoscaler is currently being initialized.
ClusterAutoscalerInitializing ClusterAutoscalerStatusCondition = "Initializing"
)

// ClusterAutoscalerConditionStatus is a status of ClusterAutoscalerCondition.
Expand Down Expand Up @@ -69,36 +64,135 @@ const (
ClusterAutoscalerBackoff ClusterAutoscalerConditionStatus = "Backoff"
)

// ClusterAutoscalerCondition describes some aspect of ClusterAutoscaler work.
type ClusterAutoscalerCondition struct {
// Type defines the aspect that the condition describes. For example, it can be Health or ScaleUp/Down activity.
Type ClusterAutoscalerConditionType `json:"type,omitempty"`
// Status of the condition.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty"`
// Message is a free text extra information about the condition. It may contain some
// extra debugging data, like why the cluster is unhealthy.
Message string `json:"message,omitempty"`
// Reason is a unique, one-word, CamelCase reason for the condition's last transition.
Reason string `json:"reason,omitempty"`
// RegisteredUnreadyNodeCount contains node counts of registered but unready nodes.
type RegisteredUnreadyNodeCount struct {
// Total number of registered but unready nodes.
Total int `json:"total" yaml:"total"`
// ResourceUnready is the number of registered but unready nodes due to a missing resource (e.g. GPU).
ResourceUnready int `json:"resourceUnready" yaml:"resourceUnready"`
}

// RegisteredNodeCount contains node counts of registered nodes.
type RegisteredNodeCount struct {
Total int `json:"total" yaml:"total"`
Ready int `json:"ready" yaml:"ready"`
NotStarted int `json:"notStarted" yaml:"notStarted"`
// Number of nodes that are being currently deleted. They exist in K8S but are not included in NodeGroup.TargetSize().
BeingDeleted int `json:"beingDeleted,omitempty" yaml:"beingDeleted,omitempty"`
Unready RegisteredUnreadyNodeCount `json:"unready,omitempty" yaml:"unready,omitempty"`
}

// NodeCount contains number of nodes that satisfy different criteria.
type NodeCount struct {
Registered RegisteredNodeCount `json:"registered,omitempty" yaml:"registered,omitempty"`
LongUnregistered int `json:"longUnregistered" yaml:"longUnregistered"`
Unregistered int `json:"unregistered" yaml:"unregistered"`
}

// ClusterHealthCondition contains information about health condition for the whole cluster.
type ClusterHealthCondition struct {
// Status of cluster health.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria in the cluster.
NodeCounts NodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"`
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterAutoscalerStatus contains ClusterAutoscaler status.
type ClusterAutoscalerStatus struct {
// NodeGroupStatuses contains status information of individual node groups on which CA works.
NodeGroupStatuses []NodeGroupStatus `json:"nodeGroupStatuses,omitempty"`
// ClusterwideConditions contains conditions that apply to the whole autoscaler.
ClusterwideConditions []ClusterAutoscalerCondition `json:"clusterwideConditions,omitempty"`
// NodeGroupHealthCondition contains information about health condition for a node group.
type NodeGroupHealthCondition struct {
// Status of node group health.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// NodeCounts contains number of nodes that satisfy different criteria in the node group.
NodeCounts NodeCount `json:"nodeCounts,omitempty" yaml:"nodeCounts,omitempty"`
// CloudProviderTarget is the target size set by cloud provider.
CloudProviderTarget int `json:"cloudProviderTarget" yaml:"cloudProviderTarget"`
// MinSize is the CA max size of a node group.
MinSize int `json:"minSize" yaml:"minSize"`
// MaxSize is the CA max size of a node group.
MaxSize int `json:"maxSize" yaml:"maxSize"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterScaleUpCondition contains information about scale up condition for the whole cluster.
type ClusterScaleUpCondition struct {
// Status of the scale up.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// BackoffInfo contains error information that caused the backoff.
type BackoffInfo struct {
// ErrorCode is a specific error code for error condition
ErrorCode string `json:"errorCode,omitempty" yaml:"errorCode,omitempty"`
// ErrorMessage is human readable description of error condition
ErrorMessage string `json:"errorMessage,omitempty" yaml:"errorMessage,omitempty"`
}

// NodeGroupStatus contains status of a group of nodes controlled by ClusterAutoscaler.
// NodeGroupScaleUpCondition contains information about scale up condition for a node group.
type NodeGroupScaleUpCondition struct {
// Status of the scale up.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// LastProbeTime is the last time we probed the condition.
BackoffInfo BackoffInfo `json:"backoffInfo,omitempty" yaml:"backoffInfo,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ScaleDownCondition contains information about scale down condition for a node group or the whole cluster.
type ScaleDownCondition struct {
// Status of the scale down.
Status ClusterAutoscalerConditionStatus `json:"status,omitempty" yaml:"status,omitempty"`
// Candidates number for the scale down.
Candidates int `json:"candidates,omitempty" yaml:"candidates,omitempty"`
// LastProbeTime is the last time we probed the condition.
LastProbeTime metav1.Time `json:"lastProbeTime,omitempty" yaml:"lastProbeTime,omitempty"`
// LastTransitionTime is the time since when the condition was in the given state.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" yaml:"lastTransitionTime,omitempty"`
}

// ClusterWideStatus contains status that apply to the whole cluster.
type ClusterWideStatus struct {
// Health contains information about health condition of the cluster.
Health ClusterHealthCondition `json:"health,omitempty" yaml:"health,omitempty"`
// ScaleUp contains information about scale up condition of the cluster.
ScaleUp ClusterScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"`
// ScaleDown contains information about scale down condition of the node group.
ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"`
}

// NodeGroupStatus contains status of an individual node group on which CA works..
type NodeGroupStatus struct {
// ProviderID is the cloud-provider-specific name of the node group. On GCE it will be equal
// to MIG url, on AWS it will be ASG name, etc.
ProviderID string `json:"providerID,omitempty"`
// Conditions is a list of conditions that describe the state of the node group.
Conditions []ClusterAutoscalerCondition `json:"conditions,omitempty"`
// Name of the node group.
Name string `json:"name,omitempty" yaml:"name,omitempty"`
// Health contains information about health condition of the node group.
Health NodeGroupHealthCondition `json:"health,omitempty" yaml:"health,omitempty"`
// ScaleUp contains information about scale up condition of the node group.
ScaleUp NodeGroupScaleUpCondition `json:"scaleUp,omitempty" yaml:"scaleUp,omitempty"`
// ScaleDown contains information about scale down condition of the node group.
ScaleDown ScaleDownCondition `json:"scaleDown,omitempty" yaml:"scaleDown,omitempty"`
}

// ClusterAutoscalerStatus contains ClusterAutoscaler status.
type ClusterAutoscalerStatus struct {
// Time of the cluster autoscaler status.
Time string `json:"time,omitempty" yaml:"time,omitempty"`
// AutoscalerStatus contains status of ClusterAutoscaler (e.g. 'Initializing' & 'Running').
AutoscalerStatus ClusterAutoscalerStatusCondition `json:"autoscalerStatus,omitempty" yaml:"autoscalerStatus,omitempty"`
// Message contains extra information about the status.
Message string `json:"message,omitempty" yaml:"message,omitempty"`
// ClusterWide contains conditions that apply to the whole cluster.
ClusterWide ClusterWideStatus `json:"clusterWide,omitempty" yaml:"clusterWide,omitempty"`
// NodeGroups contains status information of individual node groups on which CA works.
NodeGroups []NodeGroupStatus `json:"nodeGroups,omitempty" yaml:"nodeGroups,omitempty"`
}
92 changes: 0 additions & 92 deletions cluster-autoscaler/clusterstate/api/utils.go

This file was deleted.

91 changes: 0 additions & 91 deletions cluster-autoscaler/clusterstate/api/utils_test.go

This file was deleted.

Loading

0 comments on commit f8d78b6

Please sign in to comment.