From d6f04ddeacb1e157f7c0befe45492932c96d6136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= Date: Sun, 20 Aug 2023 16:33:02 +0200 Subject: [PATCH 1/5] fix(vm): wait for VMs to actually stop when sending a shutdown command Due to how a Proxmox cluster reacts to a VM shutdown command when running in HA mode, the VM might still be running when the shutdown API calls returns. This commit adds a loop that actively waits for the VM's status to change to "stopped" (while also accounting for the shutdown timeout) after the call's return. --- proxmoxtf/resource/vm.go | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go index 2ee3956d4..e7110207b 100644 --- a/proxmoxtf/resource/vm.go +++ b/proxmoxtf/resource/vm.go @@ -1591,7 +1591,9 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s return nil } -// Shutdown the VM. +// Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running, +// the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually +// shut it down. func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics { tflog.Debug(ctx, "Shutting down VM") @@ -1606,7 +1608,24 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) return diag.FromErr(e) } - return nil + // Wait for VM to stop + for { + vmStatus, err := vmAPI.GetVMStatus(ctx) + if err != nil { + return diag.FromErr(err) + } + + if vmStatus.Status == "stopped" { + return nil + } + + if shutdownTimeout > 0 { + time.Sleep(time.Second) + shutdownTimeout-- + } else { + return diag.Errorf("timed out waiting for VM to stop") + } + } } func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { From 7e32977e1ddc982c7f8645e57fcf47d68361357c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= Date: Sun, 20 Aug 2023 16:45:49 +0200 Subject: [PATCH 2/5] chore(refactoring): extracted VM state change wait loop into a separate function --- proxmoxtf/resource/vm.go | 41 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go index e7110207b..9ad9cce54 100644 --- a/proxmoxtf/resource/vm.go +++ b/proxmoxtf/resource/vm.go @@ -1591,6 +1591,28 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s return nil } +// Wait until a VM reaches the desired state. An error is returned if the VM doesn't reach the +// desired state within the given timeout. +func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout int) diag.Diagnostics { + for { + vmStatus, err := vmAPI.GetVMStatus(ctx) + if err != nil { + return diag.FromErr(err) + } + + if vmStatus.Status == state { + return nil + } + + if timeout > 0 { + time.Sleep(time.Second) + timeout-- + } else { + return diag.Errorf("timed out waiting for VM to stop") + } + } +} + // Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running, // the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually // shut it down. @@ -1608,24 +1630,7 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) return diag.FromErr(e) } - // Wait for VM to stop - for { - vmStatus, err := vmAPI.GetVMStatus(ctx) - if err != nil { - return diag.FromErr(err) - } - - if vmStatus.Status == "stopped" { - return nil - } - - if shutdownTimeout > 0 { - time.Sleep(time.Second) - shutdownTimeout-- - } else { - return diag.Errorf("timed out waiting for VM to stop") - } - } + return vmWaitState(ctx, vmAPI, "stopped", shutdownTimeout) } func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { From f33cd20e0b71a2f185169bbc40faf4056503a72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= Date: Sun, 20 Aug 2023 17:02:54 +0200 Subject: [PATCH 3/5] fix(vm): wait for VMs to actually start after requesting it from the cluster This commit forces the plugin to wait for a VM to actually run after requesting it to be started. This avoids problems with Proxmox's High Availability mode, where a start request may not be immediately honoured by the cluster. --- proxmoxtf/resource/vm.go | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go index 9ad9cce54..70c536754 100644 --- a/proxmoxtf/resource/vm.go +++ b/proxmoxtf/resource/vm.go @@ -1613,9 +1613,22 @@ func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout i } } -// Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running, -// the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually -// shut it down. +// Start the VM, then wait for it to actually start; it may not be started immediately if running in HA mode. +func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics { + tflog.Debug(ctx, "Starting VM") + + startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int) + + e := vmAPI.StartVM(ctx, startVMTimeout) + if e != nil { + return diag.FromErr(e) + } + + return vmWaitState(ctx, vmAPI, "running", startVMTimeout) +} + +// Shutdown the VM, then wait for it to actually shut down (it may not be shut down immediately if +// running in HA mode). func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics { tflog.Debug(ctx, "Shutting down VM") @@ -2765,10 +2778,8 @@ func vmCreateStart(ctx context.Context, d *schema.ResourceData, m interface{}) d vmAPI := api.Node(nodeName).VM(vmID) // Start the virtual machine and wait for it to reach a running state before continuing. - startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int) - err = vmAPI.StartVM(ctx, startVMTimeout) - if err != nil { - return diag.FromErr(err) + if diags := vmStart(ctx, vmAPI, d); diags != nil { + return diags } if reboot { @@ -5429,11 +5440,8 @@ func vmUpdate(ctx context.Context, d *schema.ResourceData, m interface{}) diag.D if (d.HasChange(mkResourceVirtualEnvironmentVMStarted) || stoppedBeforeUpdate) && !bool(template) { started := d.Get(mkResourceVirtualEnvironmentVMStarted).(bool) if started { - startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int) - - e = vmAPI.StartVM(ctx, startVMTimeout) - if e != nil { - return diag.FromErr(e) + if diags := vmStart(ctx, vmAPI, d); diags != nil { + return diags } } else { if e := vmShutdown(ctx, vmAPI, d); e != nil { @@ -5590,10 +5598,8 @@ func vmUpdateDiskLocationAndSize( } if shutdownForDisksRequired && started && !template { - startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int) - err = vmAPI.StartVM(ctx, startVMTimeout) - if err != nil { - return diag.FromErr(err) + if diags := vmStart(ctx, vmAPI, d); diags != nil { + return diags } // This concludes an equivalent of a reboot, avoid doing another. From 6b66b642ac50dbf3a35de155fd90bb5b628b175d Mon Sep 17 00:00:00 2001 From: Pavel Boldyrev <627562+bpg@users.noreply.github.com> Date: Sun, 20 Aug 2023 19:58:15 -0400 Subject: [PATCH 4/5] fix: linter errors --- proxmoxtf/resource/vm.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go index 6262a8cde..cf0bd491a 100644 --- a/proxmoxtf/resource/vm.go +++ b/proxmoxtf/resource/vm.go @@ -1965,6 +1965,7 @@ func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) d tflog.Trace(ctx, fmt.Sprintf("CloudInit IDE interface is '%s'", initializationInterface)) const cdromCloudInitEnabled = true + cdromCloudInitFileID := fmt.Sprintf("%s:cloudinit", initializationDatastoreID) cdromCloudInitMedia := "cdrom" ideDevices[initializationInterface] = vms.CustomStorageDevice{ @@ -3534,6 +3535,7 @@ func vmReadCustom( vmStatus *vms.GetStatusResponseData, ) diag.Diagnostics { config := m.(proxmoxtf.ProviderConfiguration) + api, err := config.GetClient() if err != nil { return diag.FromErr(err) From d834138fac8acde4ca4410202450520c71a4167d Mon Sep 17 00:00:00 2001 From: Pavel Boldyrev <627562+bpg@users.noreply.github.com> Date: Sun, 20 Aug 2023 20:04:07 -0400 Subject: [PATCH 5/5] fix: use `vmAPI.WaitForVMState` --- proxmoxtf/resource/vm.go | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go index cf0bd491a..ba7e1c8a7 100644 --- a/proxmoxtf/resource/vm.go +++ b/proxmoxtf/resource/vm.go @@ -1600,28 +1600,6 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s return nil } -// Wait until a VM reaches the desired state. An error is returned if the VM doesn't reach the -// desired state within the given timeout. -func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout int) diag.Diagnostics { - for { - vmStatus, err := vmAPI.GetVMStatus(ctx) - if err != nil { - return diag.FromErr(err) - } - - if vmStatus.Status == state { - return nil - } - - if timeout > 0 { - time.Sleep(time.Second) - timeout-- - } else { - return diag.Errorf("timed out waiting for VM to stop") - } - } -} - // Start the VM, then wait for it to actually start; it may not be started immediately if running in HA mode. func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics { tflog.Debug(ctx, "Starting VM") @@ -1633,7 +1611,7 @@ func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) dia return diag.FromErr(e) } - return vmWaitState(ctx, vmAPI, "running", startVMTimeout) + return diag.FromErr(vmAPI.WaitForVMState(ctx, "running", startVMTimeout, 1)) } // Shutdown the VM, then wait for it to actually shut down (it may not be shut down immediately if @@ -1652,7 +1630,7 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) return diag.FromErr(e) } - return vmWaitState(ctx, vmAPI, "stopped", shutdownTimeout) + return diag.FromErr(vmAPI.WaitForVMState(ctx, "stopped", shutdownTimeout, 1)) } func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {