diff --git a/.changelog/12324.txt b/.changelog/12324.txt new file mode 100644 index 000000000000..c7086c64482c --- /dev/null +++ b/.changelog/12324.txt @@ -0,0 +1,3 @@ +```release-note:improvement +drainer: defer draining CSI plugin jobs until system jobs are drained +``` diff --git a/nomad/drainer/draining_node.go b/nomad/drainer/draining_node.go index 5a9ee1c15981..348556e4fa94 100644 --- a/nomad/drainer/draining_node.go +++ b/nomad/drainer/draining_node.go @@ -139,7 +139,7 @@ func (n *drainingNode) DrainingJobs() ([]structs.NamespacedID, error) { jobIDs := make(map[structs.NamespacedID]struct{}) var jobs []structs.NamespacedID for _, alloc := range allocs { - if alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem { + if alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem || alloc.Job.IsPlugin() { continue } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 325c07f76222..c4ac6a652cfb 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -4566,6 +4566,18 @@ func (j *Job) IsMultiregion() bool { return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0 } +// IsPlugin returns whether a job is implements a plugin (currently just CSI) +func (j *Job) IsPlugin() bool { + for _, tg := range j.TaskGroups { + for _, task := range tg.Tasks { + if task.CSIPluginConfig != nil { + return true + } + } + } + return false +} + // VaultPolicies returns the set of Vault policies per task group, per task func (j *Job) VaultPolicies() map[string]map[string]*Vault { policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) diff --git a/website/content/docs/commands/node/drain.mdx b/website/content/docs/commands/node/drain.mdx index 171304bed2ef..83a87d2ef228 100644 --- a/website/content/docs/commands/node/drain.mdx +++ b/website/content/docs/commands/node/drain.mdx @@ -70,9 +70,8 @@ capability. without being force stopped after a certain deadline. - `-ignore-system`: Ignore system allows the drain to complete without - stopping system job allocations. By default system jobs are stopped - last. You should always use this flag when draining a node running - [CSI node plugins][internals-csi]. + stopping system job allocations. By default system jobs (and CSI + plugins) are stopped last, after the `deadline` time has expired. - `-keep-ineligible`: Keep ineligible will maintain the node's scheduling ineligibility even if the drain is being disabled. This is useful when an