diff --git a/.changelog/14484.txt b/.changelog/14484.txt new file mode 100644 index 000000000000..305e9b065ad8 --- /dev/null +++ b/.changelog/14484.txt @@ -0,0 +1,11 @@ +```release-note:bug +csi: Fixed a bug where the server would not send controller unpublish for a failed allocation. +``` + +```release-note:bug +csi: Fixed a data race in the volume unpublish endpoint that could result in claims being incorrectly marked as freed before being persisted to raft. +``` + +```release-note:bug +api: Fixed a bug where the List Volume API did not include the `ControllerRequired` and `ResourceExhausted` fields. +``` diff --git a/api/csi.go b/api/csi.go index 120c239fde8f..8b7f46df6974 100644 --- a/api/csi.go +++ b/api/csi.go @@ -292,6 +292,8 @@ type CSIVolumeListStub struct { Topologies []*CSITopology AccessMode CSIVolumeAccessMode AttachmentMode CSIVolumeAttachmentMode + CurrentReaders int + CurrentWriters int Schedulable bool PluginID string Provider string diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 545afaf4f68e..cdf3d4cd395d 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -586,12 +586,14 @@ func (v *CSIVolume) Unpublish(args *structs.CSIVolumeUnpublishRequest, reply *st case structs.CSIVolumeClaimStateReadyToFree: goto RELEASE_CLAIM } + vol = vol.Copy() err = v.nodeUnpublishVolume(vol, claim) if err != nil { return err } NODE_DETACHED: + vol = vol.Copy() err = v.controllerUnpublishVolume(vol, claim) if err != nil { return err @@ -611,6 +613,10 @@ RELEASE_CLAIM: return nil } +// nodeUnpublishVolume handles the sending RPCs to the Node plugin to unmount +// it. Typically this task is already completed on the client, but we need to +// have this here so that GC can re-send it in case of client-side +// problems. This function should only be called on a copy of the volume. func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { v.logger.Trace("node unpublish", "vol", vol.ID) if claim.AllocationID != "" { @@ -685,8 +691,12 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc return nil } +// controllerUnpublishVolume handles the sending RPCs to the Controller plugin +// to unpublish the volume (detach it from its host). This function should only +// be called on a copy of the volume. func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { v.logger.Trace("controller unpublish", "vol", vol.ID) + if !vol.ControllerRequired { claim.State = structs.CSIVolumeClaimStateReadyToFree return nil @@ -701,26 +711,39 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str } else if plugin == nil { return fmt.Errorf("no such plugin: %q", vol.PluginID) } + if !plugin.HasControllerCapability(structs.CSIControllerSupportsAttachDetach) { + claim.State = structs.CSIVolumeClaimStateReadyToFree return nil } - // we only send a controller detach if a Nomad client no longer has - // any claim to the volume, so we need to check the status of claimed - // allocations vol, err = state.CSIVolumeDenormalize(ws, vol) if err != nil { return err } - for _, alloc := range vol.ReadAllocs { - if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { + + // we only send a controller detach if a Nomad client no longer has any + // claim to the volume, so we need to check the status of any other claimed + // allocations + shouldCancel := func(alloc *structs.Allocation) bool { + if alloc != nil && alloc.ID != claim.AllocationID && + alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { claim.State = structs.CSIVolumeClaimStateReadyToFree + v.logger.Debug( + "controller unpublish canceled: another non-terminal alloc is on this node", + "vol", vol.ID, "alloc", alloc.ID) + return true + } + return false + } + + for _, alloc := range vol.ReadAllocs { + if shouldCancel(alloc) { return nil } } for _, alloc := range vol.WriteAllocs { - if alloc != nil && alloc.NodeID == claim.NodeID && !alloc.TerminalStatus() { - claim.State = structs.CSIVolumeClaimStateReadyToFree + if shouldCancel(alloc) { return nil } } @@ -746,6 +769,8 @@ func (v *CSIVolume) controllerUnpublishVolume(vol *structs.CSIVolume, claim *str if err != nil { return fmt.Errorf("could not detach from controller: %v", err) } + + v.logger.Trace("controller detach complete", "vol", vol.ID) claim.State = structs.CSIVolumeClaimStateReadyToFree return v.checkpointClaim(vol, claim) } diff --git a/nomad/csi_endpoint_test.go b/nomad/csi_endpoint_test.go index a0bb56eda9c9..b8c54d3855ef 100644 --- a/nomad/csi_endpoint_test.go +++ b/nomad/csi_endpoint_test.go @@ -7,6 +7,9 @@ import ( "time" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client" @@ -17,7 +20,6 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - "github.com/stretchr/testify/require" ) func TestCSIVolumeEndpoint_Get(t *testing.T) { @@ -500,22 +502,47 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { type tc struct { name string startingState structs.CSIVolumeClaimState + endState structs.CSIVolumeClaimState + nodeID string + otherNodeID string expectedErrMsg string } testCases := []tc{ { name: "success", startingState: structs.CSIVolumeClaimStateControllerDetached, + nodeID: node.ID, + otherNodeID: uuid.Generate(), + }, + { + name: "non-terminal allocation on same node", + startingState: structs.CSIVolumeClaimStateNodeDetached, + nodeID: node.ID, + otherNodeID: node.ID, }, { name: "unpublish previously detached node", startingState: structs.CSIVolumeClaimStateNodeDetached, + endState: structs.CSIVolumeClaimStateNodeDetached, expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", + nodeID: node.ID, + otherNodeID: uuid.Generate(), + }, + { + name: "unpublish claim on garbage collected node", + startingState: structs.CSIVolumeClaimStateTaken, + endState: structs.CSIVolumeClaimStateNodeDetached, + expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", + nodeID: uuid.Generate(), + otherNodeID: uuid.Generate(), }, { name: "first unpublish", startingState: structs.CSIVolumeClaimStateTaken, + endState: structs.CSIVolumeClaimStateNodeDetached, expectedErrMsg: "could not detach from controller: controller detach volume: No path to node", + nodeID: node.ID, + otherNodeID: uuid.Generate(), }, } @@ -544,8 +571,13 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { alloc.NodeID = node.ID alloc.ClientStatus = structs.AllocClientStatusFailed + otherAlloc := mock.BatchAlloc() + otherAlloc.NodeID = tc.otherNodeID + otherAlloc.ClientStatus = structs.AllocClientStatusRunning + index++ - require.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})) + require.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{alloc, otherAlloc})) // setup: claim the volume for our alloc claim := &structs.CSIVolumeClaim{ @@ -560,6 +592,19 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { err = state.CSIVolumeClaim(index, ns, volID, claim) require.NoError(t, err) + // setup: claim the volume for our other alloc + otherClaim := &structs.CSIVolumeClaim{ + AllocationID: otherAlloc.ID, + NodeID: tc.otherNodeID, + ExternalNodeID: "i-example", + Mode: structs.CSIVolumeClaimRead, + } + + index++ + otherClaim.State = structs.CSIVolumeClaimStateTaken + err = state.CSIVolumeClaim(index, ns, volID, otherClaim) + require.NoError(t, err) + // test: unpublish and check the results claim.State = tc.startingState req := &structs.CSIVolumeUnpublishRequest{ @@ -575,17 +620,23 @@ func TestCSIVolumeEndpoint_Unpublish(t *testing.T) { err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Unpublish", req, &structs.CSIVolumeUnpublishResponse{}) + vol, volErr := state.CSIVolumeByID(nil, ns, volID) + require.NoError(t, volErr) + require.NotNil(t, vol) + if tc.expectedErrMsg == "" { require.NoError(t, err) - vol, err = state.CSIVolumeByID(nil, ns, volID) - require.NoError(t, err) - require.NotNil(t, vol) - require.Len(t, vol.ReadAllocs, 0) + assert.Len(t, vol.ReadAllocs, 1) } else { require.Error(t, err) - require.True(t, strings.Contains(err.Error(), tc.expectedErrMsg), - "error message %q did not contain %q", err.Error(), tc.expectedErrMsg) + assert.Len(t, vol.ReadAllocs, 2) + assert.True(t, strings.Contains(err.Error(), tc.expectedErrMsg), + "error %v did not contain %q", err, tc.expectedErrMsg) + claim = vol.PastClaims[alloc.ID] + require.NotNil(t, claim) + assert.Equal(t, tc.endState, claim.State) } + }) } diff --git a/nomad/structs/csi.go b/nomad/structs/csi.go index d8eae4d3ea9e..c2149f4415f0 100644 --- a/nomad/structs/csi.go +++ b/nomad/structs/csi.go @@ -309,12 +309,15 @@ type CSIVolListStub struct { Schedulable bool PluginID string Provider string + ControllerRequired bool ControllersHealthy int ControllersExpected int NodesHealthy int NodesExpected int - CreateIndex uint64 - ModifyIndex uint64 + ResourceExhausted time.Time + + CreateIndex uint64 + ModifyIndex uint64 } // NewCSIVolume creates the volume struct. No side-effects @@ -351,7 +354,7 @@ func (v *CSIVolume) RemoteID() string { } func (v *CSIVolume) Stub() *CSIVolListStub { - stub := CSIVolListStub{ + return &CSIVolListStub{ ID: v.ID, Namespace: v.Namespace, Name: v.Name, @@ -364,15 +367,15 @@ func (v *CSIVolume) Stub() *CSIVolListStub { Schedulable: v.Schedulable, PluginID: v.PluginID, Provider: v.Provider, + ControllerRequired: v.ControllerRequired, ControllersHealthy: v.ControllersHealthy, ControllersExpected: v.ControllersExpected, NodesHealthy: v.NodesHealthy, NodesExpected: v.NodesExpected, + ResourceExhausted: v.ResourceExhausted, CreateIndex: v.CreateIndex, ModifyIndex: v.ModifyIndex, } - - return &stub } // ReadSchedulable determines if the volume is potentially schedulable diff --git a/website/content/api-docs/volumes.mdx b/website/content/api-docs/volumes.mdx index 5221799455fe..1e4c7f9d32f3 100644 --- a/website/content/api-docs/volumes.mdx +++ b/website/content/api-docs/volumes.mdx @@ -64,6 +64,8 @@ $ curl \ ], "AccessMode": "multi-node-single-writer", "AttachmentMode": "file-system", + "CurrentReaders": 2, + "CurrentWriters": 1, "Schedulable": true, "PluginID": "plugin-id1", "Provider": "ebs",