From 5a781e67c9007eab31a10dec19abb63584c79f9c Mon Sep 17 00:00:00 2001 From: Dominic Date: Wed, 3 Jul 2024 06:15:26 +0000 Subject: [PATCH] WIP add support for node pool placement group config See #5919 --- .../hetzner/hetzner_cloud_provider.go | 70 +++++++++++++++++++ .../cloudprovider/hetzner/hetzner_manager.go | 7 +- .../hetzner/hetzner_node_group.go | 1 + 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go index ad9e35d2e85e..8bad29d5ec70 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go @@ -17,6 +17,7 @@ limitations under the License. package hetzner import ( + "context" "fmt" "regexp" "strconv" @@ -27,6 +28,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner/hcloud-go/hcloud" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" @@ -179,6 +181,29 @@ func (d *HetznerCloudProvider) Refresh() error { return nil } +// Check if any defined placement groups could potentially have more than the maximum allowed number of nodes +func getLargePlacementGroups(nodeGroups map[string]*hetznerNodeGroup, threshold int) []hcloud.PlacementGroup { + placementGroupTotals := make(map[hcloud.PlacementGroup]int) + + // Calculate totals for each placement group + for _, nodeGroup := range nodeGroups { + if nodeGroup.placementGroup.Name != "" { // Check if placementGroup is defined + placementGroup := nodeGroup.placementGroup + placementGroupTotals[placementGroup] += nodeGroup.maxSize + } + } + + // Collect placement groups with total maxSize > threshold + var largePlacementGroups []hcloud.PlacementGroup + for placementGroup, totalMaxSize := range placementGroupTotals { + if totalMaxSize > threshold { + largePlacementGroups = append(largePlacementGroups, placementGroup) + } + } + + return largePlacementGroups +} + // BuildHetzner builds the Hetzner cloud provider. func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { manager, err := newManager() @@ -226,6 +251,51 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove targetSize: len(servers), clusterUpdateMutex: &clusterUpdateLock, } + + // If a placement group was specified, check with the API to see if it exists + if manager.clusterConfig.IsUsingNewFormat && placementGroupRef := manager.clusterConfig.NodeConfigs[spec.name].PlacementGroup; placementGroupRef != nil { + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + placementGroup, _, err := manager.client.PlacementGroup.Get(ctx, placementGroupRef) + + // Check if an error occurred + if err != nil { + if err == context.DeadlineExceeded { + klog.Fatalf("Timed out checking if placement group `%s` exists.", placementGroupRef) + } else { + klog.Fatalf("Failed to verify if placement group `%s` exists error: %v", placementGroupRef, err) + } + } + + // If the placement group exists, add it to the node group config + if placementGroup != nil { + manager.nodeGroups[spec.name].placementGroup = placementGroup + } else { + klog.Fatalf("The requested placement group `%s` does not appear to exist.", placementGroupRef) + } + } + } + + // Get placement groups with total maxSize over the maximum allowed + maxPlacementGroupSize := 10 + + largePlacementGroups := getLargePlacementGroups(manager.nodeGroups, maxPlacementGroupSize) + + // Fail if we have placement groups over the max size + if (len(largePlacementGroups) > 0) { + + // Gather placement group names + var placementGroupNames string + for i, pg := range largePlacementGroups { + if i > 0 { + placementGroupNames += ", " + } + placementGroupNames += pg.Name + } + + klog.Fatalf("The following placement groups have a potential size over the allowed maximum of %d: %s.", maxPlacementGroupSize, placementGroupNames) } return provider diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go index a4071b213792..4f5f5a127920 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go @@ -73,9 +73,10 @@ type ImageList struct { // NodeConfig holds the configuration for a single nodepool type NodeConfig struct { - CloudInit string - Taints []apiv1.Taint - Labels map[string]string + CloudInit string + PlacementGroup string + Taints []apiv1.Taint + Labels map[string]string } // LegacyConfig holds the configuration in the legacy format diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go index 6fef8f37c5fd..99af3900bc3f 100644 --- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go +++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go @@ -47,6 +47,7 @@ type hetznerNodeGroup struct { instanceType string clusterUpdateMutex *sync.Mutex + placementGroup *hcloud.PlacementGroup } type hetznerNodeGroupSpec struct {