From 2c40dbaac47b737f30c69b3882fcfe3275f5c92b Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 22 Mar 2022 15:40:24 -0400 Subject: [PATCH] csi: fix handling of garbage collected node in node unpublish (#12350) When a node is garbage collected, we assume that the volume is no longer attached to it and ignore the `ErrUnknownNode` error. But we used `errors.Is` to check for a wrapped error, and RPC flattens the errors during serialization. This results in an error check that works in automated testing but not in real clusters. Use a string contains check instead. --- .changelog/12350.txt | 3 +++ nomad/csi_endpoint.go | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 .changelog/12350.txt diff --git a/.changelog/12350.txt b/.changelog/12350.txt new file mode 100644 index 000000000000..a70ffae56a25 --- /dev/null +++ b/.changelog/12350.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where garbage collected nodes would block releasing a volume +``` diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index 1fcaf57d3783..43b92450b1b0 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -1,8 +1,8 @@ package nomad import ( - "errors" "fmt" + "strings" "time" metrics "github.com/armon/go-metrics" @@ -666,7 +666,9 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc // we should only get this error if the Nomad node disconnects and // is garbage-collected, so at this point we don't have any reason // to operate as though the volume is attached to it. - if !errors.Is(err, structs.ErrUnknownNode) { + // note: errors.Is cannot be used because the RPC call breaks + // error wrapping. + if !strings.Contains(err.Error(), structs.ErrUnknownNode.Error()) { return fmt.Errorf("could not detach from node: %w", err) } }