diff --git a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go index e91e10fb6e84..bcebbfa9db7e 100644 --- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go @@ -479,7 +479,12 @@ func (tng *TestNodeGroup) Nodes() ([]cloudprovider.Instance, error) { instances := make([]cloudprovider.Instance, 0) for node, nodegroup := range tng.cloudProvider.nodes { if nodegroup == tng.id { - instances = append(instances, cloudprovider.Instance{Id: node}) + instances = append(instances, cloudprovider.Instance{ + Id: node, + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceRunning, + }, + }) } } return instances, nil diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 21affaeae393..19cffd1dc2ba 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -988,7 +988,9 @@ func (csr *ClusterStateRegistry) getCloudProviderNodeInstances() (map[string][]c return csr.cloudProviderNodeInstancesCache.GetCloudProviderNodeInstances() } -// Calculates which of the existing cloud provider nodes are not registered in Kubernetes. +// Calculates which of the existing cloud provider nodes are not yet registered in Kubernetes. +// As we are expecting for those instances to be Ready soon (O(~minutes)), to speed up the scaling process, +// we are injecting a temporary, fake nodes to continue scaling based on in-memory cluster state. func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances map[string][]cloudprovider.Instance, time time.Time) []UnregisteredNode { registered := sets.NewString() for _, node := range allNodes { @@ -997,7 +999,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma notRegistered := make([]UnregisteredNode, 0) for _, instances := range cloudProviderNodeInstances { for _, instance := range instances { - if !registered.Has(instance.Id) { + if !registered.Has(instance.Id) && expectedToRegister(instance) { notRegistered = append(notRegistered, UnregisteredNode{ Node: fakeNode(instance, cloudprovider.FakeNodeUnregistered), UnregisteredSince: time, @@ -1008,6 +1010,10 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma return notRegistered } +func expectedToRegister(instance cloudprovider.Instance) bool { + return instance.Status != nil && instance.Status.State != cloudprovider.InstanceDeleting && instance.Status.ErrorInfo == nil +} + // Calculates which of the registered nodes in Kubernetes that do not exist in cloud provider. func (csr *ClusterStateRegistry) getCloudProviderDeletedNodes(allNodes []*apiv1.Node) []*apiv1.Node { nodesRemoved := make([]*apiv1.Node, 0) diff --git a/cluster-autoscaler/clusterstate/utils/node_instances_cache_test.go b/cluster-autoscaler/clusterstate/utils/node_instances_cache_test.go index 68006a5a1f19..54881fd62974 100644 --- a/cluster-autoscaler/clusterstate/utils/node_instances_cache_test.go +++ b/cluster-autoscaler/clusterstate/utils/node_instances_cache_test.go @@ -29,20 +29,20 @@ import ( func TestCloudProviderNodeInstancesCache(t *testing.T) { // Fresh entry for node group in cache. nodeNg1_1 := BuildTestNode("ng1-1", 1000, 1000) - instanceNg1_1 := cloudprovider.Instance{Id: nodeNg1_1.Name} + instanceNg1_1 := buildRunningInstance(nodeNg1_1.Name) // Fresh entry for node group in cache - checks Invalidate function. nodeNg2_1 := BuildTestNode("ng2-1", 1000, 1000) - instanceNg2_1 := cloudprovider.Instance{Id: nodeNg2_1.Name} + instanceNg2_1 := buildRunningInstance(nodeNg2_1.Name) nodeNg2_2 := BuildTestNode("ng2-2", 1000, 1000) - instanceNg2_2 := cloudprovider.Instance{Id: nodeNg2_2.Name} + instanceNg2_2 := buildRunningInstance(nodeNg2_2.Name) // Stale entry for node group in cache - check Refresh function. nodeNg3_1 := BuildTestNode("ng3-1", 1000, 1000) - instanceNg3_1 := cloudprovider.Instance{Id: nodeNg3_1.Name} + instanceNg3_1 := buildRunningInstance(nodeNg3_1.Name) nodeNg3_2 := BuildTestNode("ng3-2", 1000, 1000) - instanceNg3_2 := cloudprovider.Instance{Id: nodeNg3_2.Name} + instanceNg3_2 := buildRunningInstance(nodeNg3_2.Name) // Removed node group. nodeNg4_1 := BuildTestNode("ng4-1", 1000, 1000) - instanceNg4_1 := cloudprovider.Instance{Id: nodeNg4_1.Name} + instanceNg4_1 := buildRunningInstance(nodeNg4_1.Name) provider := testprovider.NewTestCloudProvider(nil, nil) provider.AddNodeGroup("ng1", 1, 10, 1) @@ -90,3 +90,12 @@ func TestCloudProviderNodeInstancesCache(t *testing.T) { assert.Equal(t, map[string][]cloudprovider.Instance{"ng1": {instanceNg1_1}, "ng2": {instanceNg2_2}, "ng3": {instanceNg3_2}}, results) assert.Equal(t, 3, len(cache.cloudProviderNodeInstances)) } + +func buildRunningInstance(name string) cloudprovider.Instance { + return cloudprovider.Instance{ + Id: name, + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceRunning, + }, + } +}