From 9de444efa78f913a1cc3adbac33b3d0fb661e9c7 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Wed, 8 Jun 2022 11:55:01 -0400
Subject: [PATCH] CSI: skip node unpublish on GC'd or down nodes

If the node has been GC'd or is down, we can't send it a node
unpublish. The CSI spec requires that we don't send the controller
unpublish before the node unpublish, but in the case where a node is
gone we can't know the final fate of the node unpublish step.

The `csi_hook` on the client will unpublish if the allocation has
stopped and if the host is terminated there's no mount for the volume
anyways. So we'll now assume that the node has unpublished at its
end. If it hasn't, any controller unpublish will potentially hang or
error and need to be retried.
---
 .changelog/13301.txt       |  3 +++
 nomad/csi_endpoint.go      | 22 ++++++++++++++++++++--
 nomad/csi_endpoint_test.go | 14 ++++++++++++--
 3 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 .changelog/13301.txt

diff --git a/.changelog/13301.txt b/.changelog/13301.txt
new file mode 100644
index 000000000000..c6ee35f40bc5
--- /dev/null
+++ b/.changelog/13301.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+csi: Fixed a bug where volume claims on lost or garbage collected nodes could not be freed
+```
diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go
index 89a0af1d5f3e..7286450d19cc 100644
--- a/nomad/csi_endpoint.go
+++ b/nomad/csi_endpoint.go
@@ -686,6 +686,25 @@ RELEASE_CLAIM:
 
 func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
 	v.logger.Trace("node unpublish", "vol", vol.ID)
+
+	store := v.srv.fsm.State()
+
+	// If the node has been GC'd or is down, we can't send it a node
+	// unpublish. We need to assume the node has unpublished at its
+	// end. If it hasn't, any controller unpublish will potentially
+	// hang or error and need to be retried.
+	if claim.NodeID != "" {
+		node, err := store.NodeByID(memdb.NewWatchSet(), claim.NodeID)
+		if err != nil {
+			return err
+		}
+		if node == nil || node.Status == structs.NodeStatusDown {
+			v.logger.Debug("skipping node unpublish for down or GC'd node")
+			claim.State = structs.CSIVolumeClaimStateNodeDetached
+			return v.checkpointClaim(vol, claim)
+		}
+	}
+
 	if claim.AllocationID != "" {
 		err := v.nodeUnpublishVolumeImpl(vol, claim)
 		if err != nil {
@@ -698,8 +717,7 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C
 	// The RPC sent from the 'nomad node detach' command or GC won't have an
 	// allocation ID set so we try to unpublish every terminal or invalid
 	// alloc on the node, all of which will be in PastClaims after denormalizing
-	state := v.srv.fsm.State()
-	vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
+	vol, err := store.CSIVolumeDenormalize(memdb.NewWatchSet(), vol)
 	if err != nil {
 		return err
 	}
diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go
index 84fae9a48de3..9646590729f6 100644
--- a/nomad/csi_endpoint_test.go
+++ b/nomad/csi_endpoint_test.go
@@ -504,22 +504,32 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
 	type tc struct {
 		name           string
 		startingState  structs.CSIVolumeClaimState
+		nodeID         string
 		expectedErrMsg string
 	}
 	testCases := []tc{
 		{
 			name:          "success",
 			startingState: structs.CSIVolumeClaimStateControllerDetached,
+			nodeID:        node.ID,
 		},
 		{
 			name:           "unpublish previously detached node",
 			startingState:  structs.CSIVolumeClaimStateNodeDetached,
 			expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
+			nodeID:         node.ID,
+		},
+		{
+			name:           "unpublish claim on garbage collected node",
+			startingState:  structs.CSIVolumeClaimStateTaken,
+			expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
+			nodeID:         uuid.Generate(),
 		},
 		{
 			name:           "first unpublish",
 			startingState:  structs.CSIVolumeClaimStateTaken,
 			expectedErrMsg: "could not detach from controller: controller detach volume: No path to node",
+			nodeID:         node.ID,
 		},
 	}
 
@@ -545,7 +555,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
 
 			// setup: create an alloc that will claim our volume
 			alloc := mock.BatchAlloc()
-			alloc.NodeID = node.ID
+			alloc.NodeID = tc.nodeID
 			alloc.ClientStatus = structs.AllocClientStatusFailed
 
 			index++
@@ -554,7 +564,7 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) {
 			// setup: claim the volume for our alloc
 			claim := &structs.CSIVolumeClaim{
 				AllocationID:   alloc.ID,
-				NodeID:         node.ID,
+				NodeID:         tc.nodeID,
 				ExternalNodeID: "i-example",
 				Mode:           structs.CSIVolumeClaimRead,
 			}