From 4a52633443e8553f1cc9c80037923e8e5379c2c3 Mon Sep 17 00:00:00 2001 From: Waleed Malik Date: Mon, 22 Apr 2024 20:47:41 +0500 Subject: [PATCH] Fix vSphere anti-affinity Signed-off-by: Waleed Malik --- .../provider/vsphere/provider.go | 16 +++-- pkg/cloudprovider/provider/vsphere/rule.go | 62 ++++++------------- 2 files changed, 26 insertions(+), 52 deletions(-) diff --git a/pkg/cloudprovider/provider/vsphere/provider.go b/pkg/cloudprovider/provider/vsphere/provider.go index c3452bde0..a55ba2c73 100644 --- a/pkg/cloudprovider/provider/vsphere/provider.go +++ b/pkg/cloudprovider/provider/vsphere/provider.go @@ -384,8 +384,7 @@ func (p *provider) create(ctx context.Context, log *zap.SugaredLogger, machine * } if config.VMAntiAffinity { - machineSetName := machine.Name[:strings.LastIndex(machine.Name, "-")] - if err := p.createOrUpdateVMAntiAffinityRule(ctx, session, machineSetName, config); err != nil { + if err := p.createOrUpdateVMAntiAffinityRule(ctx, session, machine, config); err != nil { return nil, fmt.Errorf("failed to add VM to anti affinity rule: %w", err) } } @@ -452,6 +451,12 @@ func (p *provider) Cleanup(ctx context.Context, log *zap.SugaredLogger, machine return false, fmt.Errorf("failed to delete tags: %w", err) } + if config.VMAntiAffinity { + if err := p.createOrUpdateVMAntiAffinityRule(ctx, session, machine, config); err != nil { + return false, fmt.Errorf("failed to update VMs in anti-affinity rule: %w", err) + } + } + powerState, err := virtualMachine.PowerState(ctx) if err != nil { return false, fmt.Errorf("failed to get virtual machine power state: %w", err) @@ -507,13 +512,6 @@ func (p *provider) Cleanup(ctx context.Context, log *zap.SugaredLogger, machine return false, fmt.Errorf("failed to destroy vm %s: %w", virtualMachine.Name(), err) } - if config.VMAntiAffinity { - machineSetName := machine.Name[:strings.LastIndex(machine.Name, "-")] - if err := p.createOrUpdateVMAntiAffinityRule(ctx, session, machineSetName, config); err != nil { - return false, fmt.Errorf("failed to add VM to anti affinity rule: %w", err) - } - } - if pc.OperatingSystem != providerconfigtypes.OperatingSystemFlatcar { filemanager := datastore.NewFileManager(session.Datacenter, false) diff --git a/pkg/cloudprovider/provider/vsphere/rule.go b/pkg/cloudprovider/provider/vsphere/rule.go index 8df7f3811..0026220bd 100644 --- a/pkg/cloudprovider/provider/vsphere/rule.go +++ b/pkg/cloudprovider/provider/vsphere/rule.go @@ -20,21 +20,21 @@ import ( "context" "errors" "fmt" - "reflect" "strings" - "time" "github.com/aws/smithy-go/ptr" "github.com/vmware/govmomi/find" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/types" + + clusterv1alpha1 "github.com/kubermatic/machine-controller/pkg/apis/cluster/v1alpha1" ) // createOrUpdateVMAntiAffinityRule creates or updates an anti affinity rule with the name in the given cluster. // VMs are attached to the rule based on their folder path and name prefix in vsphere. // A minimum of two VMs is required. -func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session *Session, name string, config *Config) error { +func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session *Session, machine *clusterv1alpha1.Machine, config *Config) error { p.mutex.Lock() defer p.mutex.Unlock() @@ -43,27 +43,31 @@ func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session return err } + machineSetName := machine.Name[:strings.LastIndex(machine.Name, "-")] vmsInFolder, err := session.Finder.VirtualMachineList(ctx, strings.Join([]string{config.Folder, "*"}, "/")) if err != nil { if errors.Is(err, &find.NotFoundError{}) { - return removeVMAntiAffinityRule(ctx, session, config.Cluster, name) + return removeVMAntiAffinityRule(ctx, session, config.Cluster, machineSetName) } return err } var ruleVMRef []types.ManagedObjectReference for _, vm := range vmsInFolder { - if strings.HasPrefix(vm.Name(), name) { + // Only add VMs with the same machineSetName to the rule and exclude the machine itself if it is being deleted + if strings.HasPrefix(vm.Name(), machineSetName) && !(vm.Name() == machine.Name && machine.DeletionTimestamp != nil) { ruleVMRef = append(ruleVMRef, vm.Reference()) } } - // minimum of two vms required + // DRS rule must have at least two virtual machine members. if len(ruleVMRef) < 2 { - return removeVMAntiAffinityRule(ctx, session, config.Cluster, name) + return nil + } else if len(ruleVMRef) == 0 { + return removeVMAntiAffinityRule(ctx, session, config.Cluster, machineSetName) } - info, err := findClusterAntiAffinityRuleByName(ctx, cluster, name) + info, err := findClusterAntiAffinityRuleByName(ctx, cluster, machineSetName) if err != nil { return err } @@ -76,7 +80,7 @@ func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session ClusterRuleInfo: types.ClusterRuleInfo{ Enabled: ptr.Bool(true), Mandatory: ptr.Bool(false), - Name: name, + Name: machineSetName, UserCreated: ptr.Bool(true), }, } @@ -100,44 +104,16 @@ func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session return err } - err = task.WaitEx(ctx) + taskResult, err := task.WaitForResultEx(ctx) if err != nil { - return err + return fmt.Errorf("error waiting for cluster %v reconfiguration to complete", cluster.Name()) } - return waitForRule(ctx, cluster, info) -} - -// waitForRule checks periodically the vsphere api for the ClusterAntiAffinityRule and returns error if the rule was not found after a timeout. -func waitForRule(ctx context.Context, cluster *object.ClusterComputeResource, rule *types.ClusterAntiAffinityRuleSpec) error { - timeout := time.NewTimer(10 * time.Second) - ticker := time.NewTicker(500 * time.Millisecond) - defer timeout.Stop() - defer ticker.Stop() - - for { - select { - case <-timeout.C: - - info, err := findClusterAntiAffinityRuleByName(ctx, cluster, rule.Name) - if err != nil { - return err - } - - if !reflect.DeepEqual(rule, info) { - return fmt.Errorf("expected anti affinity changes not found in vsphere") - } - case <-ticker.C: - info, err := findClusterAntiAffinityRuleByName(ctx, cluster, rule.Name) - if err != nil { - return err - } - - if reflect.DeepEqual(rule, info) { - return nil - } - } + if taskResult.State != types.TaskInfoStateSuccess { + return fmt.Errorf("cluster %v reconfiguration task was not successful", cluster.Name()) } + + return nil } // removeVMAntiAffinityRule removes an anti affinity rule with the name in the given cluster.