Skip to content

Commit

Permalink
improve error handling and fix bug for ineligible nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross committed Jul 20, 2023
1 parent 299d897 commit 2657637
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 8 deletions.
8 changes: 8 additions & 0 deletions .changelog/17996.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
```release-note:bug
csi: Fixed a bug in sending concurrent requests to CSI controller plugins by serializing them per plugin
```

```release-note:bug
csi: Fixed a bug where CSI controller requests could be sent to unhealthy plugins
```

```release-note:bug
csi: Fixed a bug where CSI controller requests could not be sent to controllers on nodes ineligible for scheduling
```
43 changes: 35 additions & 8 deletions nomad/client_csi_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
metrics "github.com/armon/go-metrics"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
multierror "github.com/hashicorp/go-multierror"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
Expand Down Expand Up @@ -275,21 +276,47 @@ func (a *ClientCSI) clientIDsForController(pluginID string) ([]string, error) {

clientIDs := []string{}

var merr multierror.Error

if len(plugin.Controllers) == 0 {
return nil, fmt.Errorf("failed to find instances of controller plugin %q", pluginID)
}

for clientID, controller := range plugin.Controllers {
if !controller.IsController() || !controller.Healthy {
// we don't have separate types for CSIInfo depending on
// whether it's a controller or node. this error shouldn't
// make it to production but is to aid developers during
// development
if !controller.IsController() {
// we don't have separate types for CSIInfo depending on whether
// it's a controller or node. this error should never make it to
// production
multierror.Append(&merr, fmt.Errorf(
"plugin instance %q is not a controller but was registered as one - this is always a bug", controller.AllocID))
continue
}

if !controller.Healthy {
multierror.Append(&merr, fmt.Errorf(
"plugin instance %q is not healthy", controller.AllocID))
continue
}

node, err := getNodeForRpc(snap, clientID)
if err == nil && node != nil && node.Ready() {
clientIDs = append(clientIDs, clientID)
if err != nil || node == nil {
multierror.Append(&merr, fmt.Errorf(
"cannot find node %q for plugin instance %q", clientID, controller.AllocID))
continue
}

if node.Status != structs.NodeStatusReady {
multierror.Append(&merr, fmt.Errorf(
"node %q for plugin instance %q is not ready", clientID, controller.AllocID))
continue
}

clientIDs = append(clientIDs, clientID)
}

if len(clientIDs) == 0 {
return nil, fmt.Errorf("failed to find clients running controller plugin %q", pluginID)
return nil, fmt.Errorf("failed to find clients running controller plugin %q: %v",
pluginID, merr.ErrorOrNil())
}

// Many plugins don't handle concurrent requests as described in the spec,
Expand Down

0 comments on commit 2657637

Please sign in to comment.