From d6f04ddeacb1e157f7c0befe45492932c96d6136 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= <tseeker@nocternity.net>
Date: Sun, 20 Aug 2023 16:33:02 +0200
Subject: [PATCH 1/5] fix(vm): wait for VMs to actually stop when sending a
 shutdown command

Due to how a Proxmox cluster reacts to a VM shutdown command when
running in HA mode, the VM might still be running when the shutdown API
calls returns. This commit adds a loop that actively waits for the VM's
status to change to "stopped" (while also accounting for the shutdown
timeout) after the call's return.
---
 proxmoxtf/resource/vm.go | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go
index 2ee3956d4..e7110207b 100644
--- a/proxmoxtf/resource/vm.go
+++ b/proxmoxtf/resource/vm.go
@@ -1591,7 +1591,9 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s
 	return nil
 }
 
-// Shutdown the VM.
+// Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running,
+// the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually
+// shut it down.
 func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics {
 	tflog.Debug(ctx, "Shutting down VM")
 
@@ -1606,7 +1608,24 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData)
 		return diag.FromErr(e)
 	}
 
-	return nil
+	// Wait for VM to stop
+	for {
+		vmStatus, err := vmAPI.GetVMStatus(ctx)
+		if err != nil {
+			return diag.FromErr(err)
+		}
+
+		if vmStatus.Status == "stopped" {
+			return nil
+		}
+
+		if shutdownTimeout > 0 {
+			time.Sleep(time.Second)
+			shutdownTimeout--
+		} else {
+			return diag.Errorf("timed out waiting for VM to stop")
+		}
+	}
 }
 
 func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {

From 7e32977e1ddc982c7f8645e57fcf47d68361357c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= <tseeker@nocternity.net>
Date: Sun, 20 Aug 2023 16:45:49 +0200
Subject: [PATCH 2/5] chore(refactoring): extracted VM state change wait loop
 into a separate function

---
 proxmoxtf/resource/vm.go | 41 ++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go
index e7110207b..9ad9cce54 100644
--- a/proxmoxtf/resource/vm.go
+++ b/proxmoxtf/resource/vm.go
@@ -1591,6 +1591,28 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s
 	return nil
 }
 
+// Wait until a VM reaches the desired state. An error is returned if the VM doesn't reach the
+// desired state within the given timeout.
+func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout int) diag.Diagnostics {
+	for {
+		vmStatus, err := vmAPI.GetVMStatus(ctx)
+		if err != nil {
+			return diag.FromErr(err)
+		}
+
+		if vmStatus.Status == state {
+			return nil
+		}
+
+		if timeout > 0 {
+			time.Sleep(time.Second)
+			timeout--
+		} else {
+			return diag.Errorf("timed out waiting for VM to stop")
+		}
+	}
+}
+
 // Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running,
 // the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually
 // shut it down.
@@ -1608,24 +1630,7 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData)
 		return diag.FromErr(e)
 	}
 
-	// Wait for VM to stop
-	for {
-		vmStatus, err := vmAPI.GetVMStatus(ctx)
-		if err != nil {
-			return diag.FromErr(err)
-		}
-
-		if vmStatus.Status == "stopped" {
-			return nil
-		}
-
-		if shutdownTimeout > 0 {
-			time.Sleep(time.Second)
-			shutdownTimeout--
-		} else {
-			return diag.Errorf("timed out waiting for VM to stop")
-		}
-	}
+	return vmWaitState(ctx, vmAPI, "stopped", shutdownTimeout)
 }
 
 func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {

From f33cd20e0b71a2f185169bbc40faf4056503a72c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Emmanuel=20BENO=C3=8ET?= <tseeker@nocternity.net>
Date: Sun, 20 Aug 2023 17:02:54 +0200
Subject: [PATCH 3/5] fix(vm): wait for VMs to actually start after requesting
 it from the cluster

This commit forces the plugin to wait for a VM to actually run after
requesting it to be started. This avoids problems with Proxmox's High
Availability mode, where a start request may not be immediately honoured
by the cluster.
---
 proxmoxtf/resource/vm.go | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go
index 9ad9cce54..70c536754 100644
--- a/proxmoxtf/resource/vm.go
+++ b/proxmoxtf/resource/vm.go
@@ -1613,9 +1613,22 @@ func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout i
 	}
 }
 
-// Shutdown the VM. When the API calls returns without error, we check the VM's actual state: if it is still running,
-// the VM is most likely used as a High Availability resource, and we have to wait for the HA manager to actually
-// shut it down.
+// Start the VM, then wait for it to actually start; it may not be started immediately if running in HA mode.
+func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics {
+	tflog.Debug(ctx, "Starting VM")
+
+	startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int)
+
+	e := vmAPI.StartVM(ctx, startVMTimeout)
+	if e != nil {
+		return diag.FromErr(e)
+	}
+
+	return vmWaitState(ctx, vmAPI, "running", startVMTimeout)
+}
+
+// Shutdown the VM, then wait for it to actually shut down (it may not be shut down immediately if
+// running in HA mode).
 func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics {
 	tflog.Debug(ctx, "Shutting down VM")
 
@@ -2765,10 +2778,8 @@ func vmCreateStart(ctx context.Context, d *schema.ResourceData, m interface{}) d
 	vmAPI := api.Node(nodeName).VM(vmID)
 
 	// Start the virtual machine and wait for it to reach a running state before continuing.
-	startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int)
-	err = vmAPI.StartVM(ctx, startVMTimeout)
-	if err != nil {
-		return diag.FromErr(err)
+	if diags := vmStart(ctx, vmAPI, d); diags != nil {
+		return diags
 	}
 
 	if reboot {
@@ -5429,11 +5440,8 @@ func vmUpdate(ctx context.Context, d *schema.ResourceData, m interface{}) diag.D
 	if (d.HasChange(mkResourceVirtualEnvironmentVMStarted) || stoppedBeforeUpdate) && !bool(template) {
 		started := d.Get(mkResourceVirtualEnvironmentVMStarted).(bool)
 		if started {
-			startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int)
-
-			e = vmAPI.StartVM(ctx, startVMTimeout)
-			if e != nil {
-				return diag.FromErr(e)
+			if diags := vmStart(ctx, vmAPI, d); diags != nil {
+				return diags
 			}
 		} else {
 			if e := vmShutdown(ctx, vmAPI, d); e != nil {
@@ -5590,10 +5598,8 @@ func vmUpdateDiskLocationAndSize(
 		}
 
 		if shutdownForDisksRequired && started && !template {
-			startVMTimeout := d.Get(mkResourceVirtualEnvironmentVMTimeoutStartVM).(int)
-			err = vmAPI.StartVM(ctx, startVMTimeout)
-			if err != nil {
-				return diag.FromErr(err)
+			if diags := vmStart(ctx, vmAPI, d); diags != nil {
+				return diags
 			}
 
 			// This concludes an equivalent of a reboot, avoid doing another.

From 6b66b642ac50dbf3a35de155fd90bb5b628b175d Mon Sep 17 00:00:00 2001
From: Pavel Boldyrev <627562+bpg@users.noreply.github.com>
Date: Sun, 20 Aug 2023 19:58:15 -0400
Subject: [PATCH 4/5] fix: linter errors

---
 proxmoxtf/resource/vm.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go
index 6262a8cde..cf0bd491a 100644
--- a/proxmoxtf/resource/vm.go
+++ b/proxmoxtf/resource/vm.go
@@ -1965,6 +1965,7 @@ func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) d
 		tflog.Trace(ctx, fmt.Sprintf("CloudInit IDE interface is '%s'", initializationInterface))
 
 		const cdromCloudInitEnabled = true
+
 		cdromCloudInitFileID := fmt.Sprintf("%s:cloudinit", initializationDatastoreID)
 		cdromCloudInitMedia := "cdrom"
 		ideDevices[initializationInterface] = vms.CustomStorageDevice{
@@ -3534,6 +3535,7 @@ func vmReadCustom(
 	vmStatus *vms.GetStatusResponseData,
 ) diag.Diagnostics {
 	config := m.(proxmoxtf.ProviderConfiguration)
+
 	api, err := config.GetClient()
 	if err != nil {
 		return diag.FromErr(err)

From d834138fac8acde4ca4410202450520c71a4167d Mon Sep 17 00:00:00 2001
From: Pavel Boldyrev <627562+bpg@users.noreply.github.com>
Date: Sun, 20 Aug 2023 20:04:07 -0400
Subject: [PATCH 5/5] fix: use `vmAPI.WaitForVMState`

---
 proxmoxtf/resource/vm.go | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/proxmoxtf/resource/vm.go b/proxmoxtf/resource/vm.go
index cf0bd491a..ba7e1c8a7 100644
--- a/proxmoxtf/resource/vm.go
+++ b/proxmoxtf/resource/vm.go
@@ -1600,28 +1600,6 @@ func deleteIdeDrives(ctx context.Context, vmAPI *vms.Client, itf1 string, itf2 s
 	return nil
 }
 
-// Wait until a VM reaches the desired state. An error is returned if the VM doesn't reach the
-// desired state within the given timeout.
-func vmWaitState(ctx context.Context, vmAPI *vms.Client, state string, timeout int) diag.Diagnostics {
-	for {
-		vmStatus, err := vmAPI.GetVMStatus(ctx)
-		if err != nil {
-			return diag.FromErr(err)
-		}
-
-		if vmStatus.Status == state {
-			return nil
-		}
-
-		if timeout > 0 {
-			time.Sleep(time.Second)
-			timeout--
-		} else {
-			return diag.Errorf("timed out waiting for VM to stop")
-		}
-	}
-}
-
 // Start the VM, then wait for it to actually start; it may not be started immediately if running in HA mode.
 func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) diag.Diagnostics {
 	tflog.Debug(ctx, "Starting VM")
@@ -1633,7 +1611,7 @@ func vmStart(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData) dia
 		return diag.FromErr(e)
 	}
 
-	return vmWaitState(ctx, vmAPI, "running", startVMTimeout)
+	return diag.FromErr(vmAPI.WaitForVMState(ctx, "running", startVMTimeout, 1))
 }
 
 // Shutdown the VM, then wait for it to actually shut down (it may not be shut down immediately if
@@ -1652,7 +1630,7 @@ func vmShutdown(ctx context.Context, vmAPI *vms.Client, d *schema.ResourceData)
 		return diag.FromErr(e)
 	}
 
-	return vmWaitState(ctx, vmAPI, "stopped", shutdownTimeout)
+	return diag.FromErr(vmAPI.WaitForVMState(ctx, "stopped", shutdownTimeout, 1))
 }
 
 func vmCreateClone(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {