From b8db8f3642152073c7a3a21fc8995dc461df3e39 Mon Sep 17 00:00:00 2001 From: kainoaseto Date: Tue, 18 Jan 2022 16:40:44 -0800 Subject: [PATCH 01/33] fix auto promote of canary task groups when deploying them alongside rolling deploy taskgroups that do not use the canary deployment system --- nomad/deploymentwatcher/deployment_watcher.go | 9 +- .../deployments_watcher_test.go | 66 +++++++++++---- nomad/mock/mock.go | 82 +++++++++++++++++++ 3 files changed, 139 insertions(+), 18 deletions(-) diff --git a/nomad/deploymentwatcher/deployment_watcher.go b/nomad/deploymentwatcher/deployment_watcher.go index f12357d15514..bb7bc1f52584 100644 --- a/nomad/deploymentwatcher/deployment_watcher.go +++ b/nomad/deploymentwatcher/deployment_watcher.go @@ -283,9 +283,16 @@ func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStu return nil } - // AutoPromote iff every task group is marked auto_promote and is healthy. The whole + // AutoPromote iff every task group with canaries is marked auto_promote and is healthy. The whole // job version has been incremented, so we promote together. See also AutoRevert for _, dstate := range d.TaskGroups { + + // skip auto promote canary validation if the task group has no canaries + // to prevent auto promote hanging on mixed canary/non-canary taskgroup deploys + if dstate.DesiredCanaries < 1 { + continue + } + if !dstate.AutoPromote || dstate.DesiredCanaries != len(dstate.PlacedCanaries) { return nil } diff --git a/nomad/deploymentwatcher/deployments_watcher_test.go b/nomad/deploymentwatcher/deployments_watcher_test.go index 64fc6a724a4c..2ff17d01011e 100644 --- a/nomad/deploymentwatcher/deployments_watcher_test.go +++ b/nomad/deploymentwatcher/deployments_watcher_test.go @@ -535,15 +535,19 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { w, m := defaultTestDeploymentWatcher(t) now := time.Now() - // Create 1 UpdateStrategy, 1 job (1 TaskGroup), 2 canaries, and 1 deployment - upd := structs.DefaultUpdateStrategy.Copy() - upd.AutoPromote = true - upd.MaxParallel = 2 - upd.Canary = 2 - upd.ProgressDeadline = 5 * time.Second + // Create 1 UpdateStrategy, 1 job (2 TaskGroups), 2 canaries, and 1 deployment + canaryUpd := structs.DefaultUpdateStrategy.Copy() + canaryUpd.AutoPromote = true + canaryUpd.MaxParallel = 2 + canaryUpd.Canary = 2 + canaryUpd.ProgressDeadline = 5 * time.Second - j := mock.Job() - j.TaskGroups[0].Update = upd + rollingUpd := structs.DefaultUpdateStrategy.Copy() + rollingUpd.ProgressDeadline = 5 * time.Second + + j := mock.MultiTaskGroupJob() + j.TaskGroups[0].Update = canaryUpd + j.TaskGroups[1].Update = rollingUpd d := mock.Deployment() d.JobID = j.ID @@ -551,14 +555,20 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { // UpdateStrategy are copied in d.TaskGroups = map[string]*structs.DeploymentState{ "web": { - AutoPromote: upd.AutoPromote, - AutoRevert: upd.AutoRevert, - ProgressDeadline: upd.ProgressDeadline, + AutoPromote: canaryUpd.AutoPromote, + AutoRevert: canaryUpd.AutoRevert, + ProgressDeadline: canaryUpd.ProgressDeadline, + DesiredTotal: 2, + }, + "api": { + AutoPromote: rollingUpd.AutoPromote, + AutoRevert: rollingUpd.AutoRevert, + ProgressDeadline: rollingUpd.ProgressDeadline, DesiredTotal: 2, }, } - alloc := func() *structs.Allocation { + canaryAlloc := func() *structs.Allocation { a := mock.Alloc() a.DeploymentID = d.ID a.CreateTime = now.UnixNano() @@ -569,14 +579,36 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { return a } - a := alloc() - b := alloc() + rollingAlloc := func() *structs.Allocation { + a := mock.Alloc() + a.DeploymentID = d.ID + a.CreateTime = now.UnixNano() + a.ModifyTime = now.UnixNano() + a.TaskGroup = "api" + a.AllocatedResources.Tasks["api"] = a.AllocatedResources.Tasks["web"].Copy() + delete(a.AllocatedResources.Tasks, "web") + a.TaskResources["api"] = a.TaskResources["web"].Copy() + delete(a.TaskResources, "web") + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Canary: false, + } + return a + } + + // Web taskgroup (0) + a := canaryAlloc() + b := canaryAlloc() + + // Api taskgroup (1) + c := rollingAlloc() + e := rollingAlloc() d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID, b.ID} d.TaskGroups[a.TaskGroup].DesiredCanaries = 2 + d.TaskGroups[c.TaskGroup].PlacedAllocs = 2 require.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), j), "UpsertJob") require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") - require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, b}), "UpsertAllocs") + require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, b, c, e}), "UpsertAllocs") // ============================================================= // Support method calls @@ -595,7 +627,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { matchConfig1 := &matchDeploymentAllocHealthRequestConfig{ DeploymentID: d.ID, - Healthy: []string{a.ID, b.ID}, + Healthy: []string{a.ID, b.ID, c.ID, e.ID}, Eval: true, } matcher1 := matchDeploymentAllocHealthRequest(matchConfig1) @@ -629,7 +661,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { // Mark the canaries healthy req := &structs.DeploymentAllocHealthRequest{ DeploymentID: d.ID, - HealthyAllocationIDs: []string{a.ID, b.ID}, + HealthyAllocationIDs: []string{a.ID, b.ID, c.ID, e.ID}, } var resp structs.DeploymentUpdateResponse // Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 95886654624c..beda95a7cf16 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -337,6 +337,88 @@ func Job() *structs.Job { return job } +func MultiTaskGroupJob() *structs.Job { + job := Job() + apiTaskGroup := &structs.TaskGroup{ + Name: "api", + Count: 10, + EphemeralDisk: &structs.EphemeralDisk{ + SizeMB: 150, + }, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + Mode: structs.RestartPolicyModeDelay, + }, + ReschedulePolicy: &structs.ReschedulePolicy{ + Attempts: 2, + Interval: 10 * time.Minute, + Delay: 5 * time.Second, + DelayFunction: "constant", + }, + Migrate: structs.DefaultMigrateStrategy(), + Networks: []*structs.NetworkResource{ + { + Mode: "host", + DynamicPorts: []structs.Port{ + {Label: "http"}, + {Label: "admin"}, + }, + }, + }, + Tasks: []*structs.Task{ + { + Name: "api", + Driver: "exec", + Config: map[string]interface{}{ + "command": "/bin/date", + }, + Env: map[string]string{ + "FOO": "bar", + }, + Services: []*structs.Service{ + { + Name: "${TASK}-backend", + PortLabel: "http", + Tags: []string{"pci:${meta.pci-dss}", "datacenter:${node.datacenter}"}, + Checks: []*structs.ServiceCheck{ + { + Name: "check-table", + Type: structs.ServiceCheckScript, + Command: "/usr/local/check-table-${meta.database}", + Args: []string{"${meta.version}"}, + Interval: 30 * time.Second, + Timeout: 5 * time.Second, + }, + }, + }, + { + Name: "${TASK}-admin", + PortLabel: "admin", + }, + }, + LogConfig: structs.DefaultLogConfig(), + Resources: &structs.Resources{ + CPU: 500, + MemoryMB: 256, + }, + Meta: map[string]string{ + "foo": "bar", + }, + }, + }, + Meta: map[string]string{ + "elb_check_type": "http", + "elb_check_interval": "30s", + "elb_check_min": "3", + }, + } + job.TaskGroups = append(job.TaskGroups, apiTaskGroup) + job.Canonicalize() + return job +} + func LifecycleSideTask(resources structs.Resources, i int) *structs.Task { return &structs.Task{ Name: fmt.Sprintf("side-%d", i), From 08d3032313939c8258bf0aecf0bf94ccf3eef7fd Mon Sep 17 00:00:00 2001 From: James Rasell Date: Wed, 26 Jan 2022 15:56:16 +0100 Subject: [PATCH 02/33] docs: add `cores` to client reserved config block. --- website/content/docs/configuration/client.mdx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx index 22e9c2261447..d536c48e648c 100644 --- a/website/content/docs/configuration/client.mdx +++ b/website/content/docs/configuration/client.mdx @@ -336,6 +336,8 @@ see the [drivers documentation](/docs/drivers). - `cpu` `(int: 0)` - Specifies the amount of CPU to reserve, in MHz. +- `cores` `(int: 0)` - Specifies the number of CPU cores to reserve. + - `memory` `(int: 0)` - Specifies the amount of memory to reserve, in MB. - `disk` `(int: 0)` - Specifies the amount of disk to reserve, in MB. From 2c4a9d2ca78c565b4137414639976f79cb7d94ff Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Wed, 26 Jan 2022 12:48:39 -0500 Subject: [PATCH 03/33] ui: add npm script for running ember test server --- ui/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/package.json b/ui/package.json index 8f5a619ba917..5bd526ca3412 100644 --- a/ui/package.json +++ b/ui/package.json @@ -20,7 +20,7 @@ "build-storybook": "STORYBOOK=true ember build && build-storybook -s dist", "storybook": "STORYBOOK=true start-storybook -p 6006 -s dist", "test": "npm-run-all lint:* test:*", - "test:ember": "ember test" + "test:ember": "ember test --server --query=dockcontainer" }, "husky": { "hooks": { From 765c04c40d2ec42f3be7147306e7ef57c8bb3d68 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Wed, 26 Jan 2022 13:04:08 -0500 Subject: [PATCH 04/33] ui: add ember-exam --- ui/package.json | 1 + ui/yarn.lock | 109 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 4 deletions(-) diff --git a/ui/package.json b/ui/package.json index 5bd526ca3412..ebfae7f1f512 100644 --- a/ui/package.json +++ b/ui/package.json @@ -83,6 +83,7 @@ "ember-data": "~3.24", "ember-data-model-fragments": "5.0.0-beta.2", "ember-decorators": "^6.1.1", + "ember-exam": "^6.1.0", "ember-export-application-global": "^2.0.1", "ember-fetch": "^8.0.2", "ember-inflector": "^3.0.0", diff --git a/ui/yarn.lock b/ui/yarn.lock index 576b50462cde..de86220c420e 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -2493,6 +2493,19 @@ resolve "^1.8.1" semver "^7.3.2" +"@embroider/shared-internals@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@embroider/shared-internals/-/shared-internals-1.0.0.tgz#b081708ac79e4582f17ba0f3e3796e6612a8976c" + integrity sha512-Vx3dmejJxI5MG/qC7or3EUZY0AZBSBNOAR50PYotX3LxUSb4lAm5wISPnFbwEY4bbo2VhL/6XtWjMv8ZMcaP+g== + dependencies: + babel-import-util "^1.1.0" + ember-rfc176-data "^0.3.17" + fs-extra "^9.1.0" + lodash "^4.17.21" + resolve-package-path "^4.0.1" + semver "^7.3.5" + typescript-memoize "^1.0.1" + "@embroider/util@^0.36.0": version "0.36.0" resolved "https://registry.yarnpkg.com/@embroider/util/-/util-0.36.0.tgz#b2ffb2b06ac491f157a771392191ce91ef2216a6" @@ -4868,6 +4881,11 @@ babel-helpers@^6.24.1: babel-runtime "^6.22.0" babel-template "^6.24.1" +babel-import-util@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/babel-import-util/-/babel-import-util-1.1.0.tgz#4156b16ef090c4f0d3cdb869ff799202f24aeb93" + integrity sha512-sfzgAiJsUT1es9yrHAuJZuJfBkkOE7Og6rovAIwK/gNJX6MjDfWTprbPngdJZTd5ye4F3FvpvpQmvKXObRzVYA== + babel-loader@^8.0.6: version "8.2.3" resolved "https://registry.yarnpkg.com/babel-loader/-/babel-loader-8.2.3.tgz#8986b40f1a64cacfcb4b8429320085ef68b1342d" @@ -7007,6 +7025,15 @@ cli-table3@0.6.0: optionalDependencies: colors "^1.1.2" +cli-table3@^0.6.0: + version "0.6.1" + resolved "https://registry.yarnpkg.com/cli-table3/-/cli-table3-0.6.1.tgz#36ce9b7af4847f288d3cdd081fbd09bf7bd237b8" + integrity sha512-w0q/enDHhPLq44ovMGdQeeDLvwxwavsJX7oQGYt/LrBlYsyaxyDnp6z3QzFut/6kLLKnlcUVJLrpB7KBfgG/RA== + dependencies: + string-width "^4.2.0" + optionalDependencies: + colors "1.4.0" + cli-table@^0.3.1: version "0.3.4" resolved "https://registry.yarnpkg.com/cli-table/-/cli-table-0.3.4.tgz#5b37fd723751f1a6e9e70d55953a75e16eab958e" @@ -7153,7 +7180,7 @@ colorette@^2.0.16: resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.16.tgz#713b9af84fdb000139f04546bd4a93f62a5085da" integrity sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g== -colors@^1.1.2, colors@^1.4.0: +colors@1.4.0, colors@^1.1.2, colors@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78" integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA== @@ -7911,6 +7938,13 @@ debug@^4.0.0, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1: dependencies: ms "2.1.2" +debug@^4.2.0: + version "4.3.3" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.3.tgz#04266e0b70a98d4462e6e288e38259213332b664" + integrity sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q== + dependencies: + ms "2.1.2" + debug@^4.3.2, debug@~4.3.1, debug@~4.3.2: version "4.3.2" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" @@ -8420,6 +8454,41 @@ ember-auto-import@^1.10.0, ember-auto-import@^1.2.19, ember-auto-import@^1.6.0: walk-sync "^0.3.3" webpack "^4.43.0" +ember-auto-import@^1.10.1: + version "1.12.1" + resolved "https://registry.yarnpkg.com/ember-auto-import/-/ember-auto-import-1.12.1.tgz#09967bd35cd56ac45f413c48deabf7cfb3a785f6" + integrity sha512-Jm0vWKNAy/wYMrdSQIrG8sRsvarIRHZ2sS/CGhMdMqVKJR48AhGU7NgPJ5SIlO/+seL2VSO+dtv7aEOEIaT6BA== + dependencies: + "@babel/core" "^7.1.6" + "@babel/preset-env" "^7.10.2" + "@babel/traverse" "^7.1.6" + "@babel/types" "^7.1.6" + "@embroider/shared-internals" "^1.0.0" + babel-core "^6.26.3" + babel-loader "^8.0.6" + babel-plugin-syntax-dynamic-import "^6.18.0" + babylon "^6.18.0" + broccoli-debug "^0.6.4" + broccoli-node-api "^1.7.0" + broccoli-plugin "^4.0.0" + broccoli-source "^3.0.0" + debug "^3.1.0" + ember-cli-babel "^7.0.0" + enhanced-resolve "^4.0.0" + fs-extra "^6.0.1" + fs-tree-diff "^2.0.0" + handlebars "^4.3.1" + js-string-escape "^1.0.1" + lodash "^4.17.19" + mkdirp "^0.5.1" + resolve-package-path "^3.1.0" + rimraf "^2.6.2" + semver "^7.3.4" + symlink-or-copy "^1.2.0" + typescript-memoize "^1.0.0-alpha.3" + walk-sync "^0.3.3" + webpack "^4.43.0" + ember-basic-dropdown@^3.0.16: version "3.0.16" resolved "https://registry.yarnpkg.com/ember-basic-dropdown/-/ember-basic-dropdown-3.0.16.tgz#287fcde57b5a37405d89cc65e0a4ad9a2e8e1b0b" @@ -9244,6 +9313,26 @@ ember-element-helper@^0.3.2: ember-cli-htmlbars "^5.1.0" ember-compatibility-helpers "^1.2.1" +ember-exam@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/ember-exam/-/ember-exam-6.1.0.tgz#1ea2c0ece27ac8ad6a80d959b1c207611b7dfdd7" + integrity sha512-H9tg7eUgqkjAsr1/15UzxGyZobGLgsyTi56Ng0ySnkYGCRfvVpwtVc3xgcNOFnUaa9RExUFpxC0adjW3K87Uxw== + dependencies: + "@embroider/macros" "^0.36.0" + chalk "^4.1.0" + cli-table3 "^0.6.0" + debug "^4.2.0" + ember-auto-import "^1.10.1" + ember-cli-babel "^7.21.0" + ember-cli-version-checker "^5.1.2" + execa "^4.0.3" + fs-extra "^9.0.1" + js-yaml "^3.14.0" + npmlog "^4.1.2" + rimraf "^3.0.2" + semver "^7.3.2" + silent-error "^1.1.1" + ember-export-application-global@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/ember-export-application-global/-/ember-export-application-global-2.0.1.tgz#b120a70e322ab208defc9e2daebe8d0dfc2dcd46" @@ -10242,7 +10331,7 @@ execa@^3.0.0: signal-exit "^3.0.2" strip-final-newline "^2.0.0" -execa@^4.0.0: +execa@^4.0.0, execa@^4.0.3: version "4.1.0" resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a" integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA== @@ -10952,7 +11041,7 @@ fs-extra@^8.0.0, fs-extra@^8.0.1, fs-extra@^8.1.0: jsonfile "^4.0.0" universalify "^0.1.0" -fs-extra@^9.0.0, fs-extra@^9.0.1: +fs-extra@^9.0.0, fs-extra@^9.0.1, fs-extra@^9.1.0: version "9.1.0" resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d" integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ== @@ -12771,7 +12860,7 @@ js-tokens@^3.0.2: resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-3.0.2.tgz#9866df395102130e38f7f996bceb65443209c25b" integrity sha1-mGbfOVECEw449/mWvOtlRDIJwls= -js-yaml@^3.13.1, js-yaml@^3.2.5, js-yaml@^3.2.7: +js-yaml@^3.13.1, js-yaml@^3.14.0, js-yaml@^3.2.5, js-yaml@^3.2.7: version "3.14.1" resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537" integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g== @@ -16324,6 +16413,13 @@ resolve-package-path@^3.1.0: path-root "^0.1.1" resolve "^1.17.0" +resolve-package-path@^4.0.1: + version "4.0.3" + resolved "https://registry.yarnpkg.com/resolve-package-path/-/resolve-package-path-4.0.3.tgz#31dab6897236ea6613c72b83658d88898a9040aa" + integrity sha512-SRpNAPW4kewOaNUt8VPqhJ0UMxawMwzJD8V7m1cJfdSTK9ieZwS6K7Dabsm4bmLFM96Z5Y/UznrpG5kt1im8yA== + dependencies: + path-root "^0.1.1" + resolve-path@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/resolve-path/-/resolve-path-1.4.0.tgz#c4bda9f5efb2fce65247873ab36bb4d834fe16f7" @@ -18197,6 +18293,11 @@ typescript-memoize@^1.0.0-alpha.3: resolved "https://registry.yarnpkg.com/typescript-memoize/-/typescript-memoize-1.0.0-alpha.4.tgz#fd97ab63807c3392af5d0ac5f4754254a4fcd634" integrity sha512-woA2UUWSvx8ugkEjPN8DMuNjukBp8NQeLmz+LRXbEsQIvhLR8LSlD+8Qxdk7NmgE8xeJabJdU8zSrO4ozijGjg== +typescript-memoize@^1.0.1: + version "1.1.0" + resolved "https://registry.yarnpkg.com/typescript-memoize/-/typescript-memoize-1.1.0.tgz#4a8f512d06fc995167c703a3592219901db8bc79" + integrity sha512-LQPKVXK8QrBBkL/zclE6YgSWn0I8ew5m0Lf+XL00IwMhlotqRLlzHV+BRrljVQIc+NohUAuQP7mg4HQwrx5Xbg== + uc.micro@^1.0.0, uc.micro@^1.0.1, uc.micro@^1.0.5: version "1.0.6" resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-1.0.6.tgz#9c411a802a409a91fc6cf74081baba34b24499ac" From b5e3e32dd6e16417852e7341a09203a268136723 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Wed, 26 Jan 2022 13:06:56 -0500 Subject: [PATCH 05/33] ui: allow parallel test-runs --- ui/testem.js | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/testem.js b/ui/testem.js index f0d81dfdfa9d..0d47b107fe77 100644 --- a/ui/testem.js +++ b/ui/testem.js @@ -11,6 +11,7 @@ const config = { launch_in_ci: ['Chrome'], launch_in_dev: ['Chrome'], browser_start_timeout: 120, + parallel: -1, browser_args: { // New format in testem/master, but not in a release yet // Chrome: { From 757799d469bee7afa26b01fa6a184a8e807e8886 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Wed, 26 Jan 2022 13:08:31 -0500 Subject: [PATCH 06/33] ui: replace qunit start tests with ember-exam start --- ui/tests/test-helper.js | 2 +- ui/yarn.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/tests/test-helper.js b/ui/tests/test-helper.js index b246c315936a..a8efa1f76517 100644 --- a/ui/tests/test-helper.js +++ b/ui/tests/test-helper.js @@ -2,7 +2,7 @@ import 'core-js'; import Application from 'nomad-ui/app'; import config from 'nomad-ui/config/environment'; import { setApplication } from '@ember/test-helpers'; -import { start } from 'ember-qunit'; +import start from 'ember-exam/test-support/start'; import { useNativeEvents } from 'ember-cli-page-object/extend'; useNativeEvents(); diff --git a/ui/yarn.lock b/ui/yarn.lock index de86220c420e..739b5ba96726 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -9313,7 +9313,7 @@ ember-element-helper@^0.3.2: ember-cli-htmlbars "^5.1.0" ember-compatibility-helpers "^1.2.1" -ember-exam@^6.1.0: +ember-exam@6.1.0: version "6.1.0" resolved "https://registry.yarnpkg.com/ember-exam/-/ember-exam-6.1.0.tgz#1ea2c0ece27ac8ad6a80d959b1c207611b7dfdd7" integrity sha512-H9tg7eUgqkjAsr1/15UzxGyZobGLgsyTi56Ng0ySnkYGCRfvVpwtVc3xgcNOFnUaa9RExUFpxC0adjW3K87Uxw== From 9863aa45e02e4ab638e60864cceac5d0926fbe8c Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Wed, 26 Jan 2022 13:36:26 -0500 Subject: [PATCH 07/33] ui: add local testing script --- ui/package.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/package.json b/ui/package.json index ebfae7f1f512..6bf13e25e36f 100644 --- a/ui/package.json +++ b/ui/package.json @@ -20,7 +20,8 @@ "build-storybook": "STORYBOOK=true ember build && build-storybook -s dist", "storybook": "STORYBOOK=true start-storybook -p 6006 -s dist", "test": "npm-run-all lint:* test:*", - "test:ember": "ember test --server --query=dockcontainer" + "test:ember": "ember test --server --query=dockcontainer", + "test:local": "ember exam --server --load-balance --parallel=8" }, "husky": { "hooks": { @@ -83,7 +84,7 @@ "ember-data": "~3.24", "ember-data-model-fragments": "5.0.0-beta.2", "ember-decorators": "^6.1.1", - "ember-exam": "^6.1.0", + "ember-exam": "6.1.0", "ember-export-application-global": "^2.0.1", "ember-fetch": "^8.0.2", "ember-inflector": "^3.0.0", From 7f5e0b8256131fd6110cff1b026b5be97b848546 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Thu, 27 Jan 2022 09:19:03 -0500 Subject: [PATCH 08/33] fix: differentiate commands for circleci and local use --- ui/package.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/package.json b/ui/package.json index 6bf13e25e36f..e7f5845be2bb 100644 --- a/ui/package.json +++ b/ui/package.json @@ -20,8 +20,9 @@ "build-storybook": "STORYBOOK=true ember build && build-storybook -s dist", "storybook": "STORYBOOK=true start-storybook -p 6006 -s dist", "test": "npm-run-all lint:* test:*", - "test:ember": "ember test --server --query=dockcontainer", - "test:local": "ember exam --server --load-balance --parallel=8" + "test:ember": "ember test", + "local:qunitdom": "ember test --server --query=dockcontainer", + "local:exam": "ember exam --server --load-balance --parallel=4" }, "husky": { "hooks": { From 87d54b8c21617d6186e20543e8709c1ff4d84de5 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 26 Jan 2022 06:19:39 -0600 Subject: [PATCH 09/33] client: change test to not poke cgroupv2 edge case This PR tweaks the TestCpusetManager_AddAlloc unit test to not break when being run on a machine using cgroupsv2. The behavior of writing an empty cpuset.cpu changes in cgroupv2, where such a group now inherits the value of its parent group, rather than remaining empty. The test in question was written such that a task would consume all available cores shared on an alloc, causing the empty set to be written to the shared group, which works fine on cgroupsv1 but breaks on cgroupsv2. By adjusting the test to consume only 1 core instead of all cores, it no longer triggers that edge case. The actual fix for the new cgroupsv2 behavior will be in #11933 --- .../lib/cgutil/cpuset_manager_linux_test.go | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/client/lib/cgutil/cpuset_manager_linux_test.go b/client/lib/cgutil/cpuset_manager_linux_test.go index c2ed95a55c98..e2bb00679c36 100644 --- a/client/lib/cgutil/cpuset_manager_linux_test.go +++ b/client/lib/cgutil/cpuset_manager_linux_test.go @@ -57,27 +57,31 @@ func TestCpusetManager_Init(t *testing.T) { require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) } -func TestCpusetManager_AddAlloc(t *testing.T) { +func TestCpusetManager_AddAlloc_single(t *testing.T) { manager, cleanup := tmpCpusetManager(t) defer cleanup() require.NoError(t, manager.Init()) alloc := mock.Alloc() - alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = manager.parentCpuset.ToSlice() + // reserve just one core (the 0th core, which probably exists) + alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice() manager.AddAlloc(alloc) + // force reconcile manager.reconcileCpusets() - // check that no more cores exist in the shared cgroup + // check that the 0th core is no longer available in the shared group + // actual contents of shared group depends on machine core count require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) - require.Empty(t, sharedCpus.ToSlice()) + require.NotEmpty(t, sharedCpus.ToSlice()) + require.NotContains(t, sharedCpus.ToSlice(), uint16(0)) - // check that all cores are allocated to reserved cgroup + // check that the 0th core is allocated to reserved cgroup require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) reservedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) @@ -100,6 +104,17 @@ func TestCpusetManager_AddAlloc(t *testing.T) { require.Exactly(t, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores, taskCpus.ToSlice()) } +func TestCpusetManager_AddAlloc_subset(t *testing.T) { + t.Skip("todo: add test for #11933") +} + +func TestCpusetManager_AddAlloc_all(t *testing.T) { + // cgroupsv2 changes behavior of writing empty cpuset.cpu, which is what + // happens to the /shared group when one or more allocs consume all available + // cores. + t.Skip("todo: add test for #11933") +} + func TestCpusetManager_RemoveAlloc(t *testing.T) { manager, cleanup := tmpCpusetManager(t) defer cleanup() From d0624fc09f5acf536e87db15dce154575c96daa7 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 27 Jan 2022 09:30:03 -0500 Subject: [PATCH 10/33] CSI: resolve invalid claim states (#11890) * csi: resolve invalid claim states on read It's currently possible for CSI volumes to be claimed by allocations that no longer exist. This changeset asserts a reasonable state at the state store level by registering these nil allocations as "past claims" on any read. This will cause any pass through the periodic GC or volumewatcher to trigger the unpublishing workflow for those claims. * csi: make feasibility check errors more understandable When the feasibility checker finds we have no free write claims, it checks to see if any of those claims are for the job we're currently scheduling (so that earlier versions of a job can't block claims for new versions) and reports a conflict if the volume can't be scheduled so that the user can fix their claims. But when the checker hits a claim that has a GCd allocation, the state is recoverable by the server once claim reaping completes and no user intervention is required; the blocked eval should complete. Differentiate the scheduler error produced by these two conditions. --- .changelog/11890.txt | 3 + nomad/core_sched.go | 4 + nomad/core_sched_test.go | 55 +++++- nomad/state/state_store.go | 26 +++ nomad/state/testing.go | 187 ++++++++++++++++++++ nomad/volumewatcher/volume_watcher_test.go | 29 +++ nomad/volumewatcher/volumes_watcher_test.go | 7 +- scheduler/feasible.go | 38 ++-- 8 files changed, 329 insertions(+), 20 deletions(-) create mode 100644 .changelog/11890.txt diff --git a/.changelog/11890.txt b/.changelog/11890.txt new file mode 100644 index 000000000000..1074aa29cb44 --- /dev/null +++ b/.changelog/11890.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where garbage collected allocations could block new claims on a volume +``` diff --git a/nomad/core_sched.go b/nomad/core_sched.go index cffb6114ba20..4306783efcb8 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -782,6 +782,10 @@ NEXT_VOLUME: continue } + // TODO(tgross): consider moving the TerminalStatus check into + // the denormalize volume logic so that we can just check the + // volume for past claims + // we only call the claim release RPC if the volume has claims // that no longer have valid allocations. otherwise we'd send // out a lot of do-nothing RPCs. diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index a19d4395b07d..095975a31f66 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -2383,19 +2383,64 @@ func TestCoreScheduler_CSIVolumeClaimGC(t *testing.T) { c := core.(*CoreScheduler) require.NoError(c.csiVolumeClaimGC(gc)) - // the volumewatcher will hit an error here because there's no - // path to the node. but we can't update the claim to bypass the - // client RPCs without triggering the volumewatcher's normal code - // path. + // TODO(tgross): the condition below means this test doesn't tell + // us much; ideally we should be intercepting the claim request + // and verifying that we send the expected claims but we don't + // have test infra in place to do that for server RPCs + + // sending the GC claim will trigger the volumewatcher's normal + // code path. but the volumewatcher will hit an error here + // because there's no path to the node, so we shouldn't see + // the WriteClaims removed require.Eventually(func() bool { vol, _ := state.CSIVolumeByID(ws, ns, volID) return len(vol.WriteClaims) == 1 && len(vol.WriteAllocs) == 1 && - len(vol.PastClaims) == 0 + len(vol.PastClaims) == 1 }, time.Second*1, 10*time.Millisecond, "claims were released unexpectedly") } +func TestCoreScheduler_CSIBadState_ClaimGC(t *testing.T) { + t.Parallel() + require := require.New(t) + + srv, shutdown := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + + defer shutdown() + testutil.WaitForLeader(t, srv.RPC) + + err := state.TestBadCSIState(t, srv.State()) + require.NoError(err) + + snap, err := srv.State().Snapshot() + require.NoError(err) + core := NewCoreScheduler(srv, snap) + + index, _ := srv.State().LatestIndex() + index++ + gc := srv.coreJobEval(structs.CoreJobForceGC, index) + c := core.(*CoreScheduler) + require.NoError(c.csiVolumeClaimGC(gc)) + + require.Eventually(func() bool { + vol, _ := srv.State().CSIVolumeByID(nil, + structs.DefaultNamespace, "csi-volume-nfs0") + if len(vol.PastClaims) != 2 { + return false + } + for _, claim := range vol.PastClaims { + if claim.State != structs.CSIVolumeClaimStateUnpublishing { + return false + } + } + return true + }, time.Second*1, 10*time.Millisecond, "invalid claims should be marked for GC") + +} + func TestCoreScheduler_FailLoop(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 7d8decfdd798..7551ad045354 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2513,6 +2513,18 @@ func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *st State: structs.CSIVolumeClaimStateTaken, } } + } else if _, ok := vol.PastClaims[id]; !ok { + // ensure that any allocs that have been GC'd since + // our last read are marked as past claims + vol.PastClaims[id] = &structs.CSIVolumeClaim{ + AllocationID: id, + Mode: structs.CSIVolumeClaimRead, + State: structs.CSIVolumeClaimStateUnpublishing, + } + readClaim := vol.ReadClaims[id] + if readClaim != nil { + vol.PastClaims[id].NodeID = readClaim.NodeID + } } } @@ -2531,6 +2543,20 @@ func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *st State: structs.CSIVolumeClaimStateTaken, } } + } else if _, ok := vol.PastClaims[id]; !ok { + // ensure that any allocs that have been GC'd since + // our last read are marked as past claims + + vol.PastClaims[id] = &structs.CSIVolumeClaim{ + AllocationID: id, + Mode: structs.CSIVolumeClaimWrite, + State: structs.CSIVolumeClaimStateUnpublishing, + } + writeClaim := vol.WriteClaims[id] + if writeClaim != nil { + vol.PastClaims[id].NodeID = writeClaim.NodeID + } + } } diff --git a/nomad/state/testing.go b/nomad/state/testing.go index 460df609773c..0d570a95ade4 100644 --- a/nomad/state/testing.go +++ b/nomad/state/testing.go @@ -1,7 +1,9 @@ package state import ( + "math" "testing" + "time" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" @@ -124,3 +126,188 @@ func createTestCSIPlugin(s *StateStore, id string, requiresController bool) func s.DeleteNode(structs.MsgTypeTestSetup, index, ids) } } + +func TestBadCSIState(t testing.TB, store *StateStore) error { + + pluginID := "org.democratic-csi.nfs" + + controllerInfo := func(isHealthy bool) map[string]*structs.CSIInfo { + desc := "healthy" + if !isHealthy { + desc = "failed fingerprinting with error" + } + return map[string]*structs.CSIInfo{ + pluginID: { + PluginID: pluginID, + AllocID: uuid.Generate(), + Healthy: isHealthy, + HealthDescription: desc, + RequiresControllerPlugin: true, + ControllerInfo: &structs.CSIControllerInfo{ + SupportsReadOnlyAttach: true, + SupportsAttachDetach: true, + }, + }, + } + } + + nodeInfo := func(nodeName string, isHealthy bool) map[string]*structs.CSIInfo { + desc := "healthy" + if !isHealthy { + desc = "failed fingerprinting with error" + } + return map[string]*structs.CSIInfo{ + pluginID: { + PluginID: pluginID, + AllocID: uuid.Generate(), + Healthy: isHealthy, + HealthDescription: desc, + RequiresControllerPlugin: true, + NodeInfo: &structs.CSINodeInfo{ + ID: nodeName, + MaxVolumes: math.MaxInt64, + RequiresNodeStageVolume: true, + }, + }, + } + } + + nodes := make([]*structs.Node, 3) + for i := range nodes { + n := mock.Node() + n.Attributes["nomad.version"] = "1.2.4" + nodes[i] = n + } + + nodes[0].CSIControllerPlugins = controllerInfo(true) + nodes[0].CSINodePlugins = nodeInfo("nomad-client0", true) + + drainID := uuid.Generate() + + // drained node + nodes[1].CSIControllerPlugins = controllerInfo(false) + nodes[1].CSINodePlugins = nodeInfo("nomad-client1", false) + + nodes[1].LastDrain = &structs.DrainMetadata{ + StartedAt: time.Now().Add(-10 * time.Minute), + UpdatedAt: time.Now().Add(-30 * time.Second), + Status: structs.DrainStatusComplete, + AccessorID: drainID, + } + nodes[1].SchedulingEligibility = structs.NodeSchedulingIneligible + + // previously drained but now eligible + nodes[2].CSIControllerPlugins = controllerInfo(true) + nodes[2].CSINodePlugins = nodeInfo("nomad-client2", true) + nodes[2].LastDrain = &structs.DrainMetadata{ + StartedAt: time.Now().Add(-15 * time.Minute), + UpdatedAt: time.Now().Add(-5 * time.Minute), + Status: structs.DrainStatusComplete, + AccessorID: drainID, + } + nodes[2].SchedulingEligibility = structs.NodeSchedulingEligible + + // Insert nodes into the state store + index := uint64(999) + for _, n := range nodes { + index++ + err := store.UpsertNode(structs.MsgTypeTestSetup, index, n) + if err != nil { + return err + } + } + + allocID0 := uuid.Generate() // nil alloc + allocID2 := uuid.Generate() // nil alloc + + alloc1 := mock.Alloc() + alloc1.ClientStatus = "complete" + alloc1.DesiredStatus = "stop" + + // Insert allocs into the state store + err := store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1}) + if err != nil { + return err + } + + vol := &structs.CSIVolume{ + ID: "csi-volume-nfs0", + Name: "csi-volume-nfs0", + ExternalID: "csi-volume-nfs0", + Namespace: "default", + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + MountOptions: &structs.CSIMountOptions{ + MountFlags: []string{"noatime"}, + }, + Context: map[string]string{ + "node_attach_driver": "nfs", + "provisioner_driver": "nfs-client", + "server": "192.168.56.69", + }, + Capacity: 0, + RequestedCapacityMin: 107374182, + RequestedCapacityMax: 107374182, + RequestedCapabilities: []*structs.CSIVolumeCapability{ + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + }, + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, + }, + }, + WriteAllocs: map[string]*structs.Allocation{ + allocID0: nil, + alloc1.ID: nil, + allocID2: nil, + }, + WriteClaims: map[string]*structs.CSIVolumeClaim{ + allocID0: { + AllocationID: allocID0, + NodeID: nodes[0].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + alloc1.ID: { + AllocationID: alloc1.ID, + NodeID: nodes[1].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + allocID2: { + AllocationID: allocID2, + NodeID: nodes[2].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + }, + Schedulable: true, + PluginID: pluginID, + Provider: pluginID, + ProviderVersion: "1.4.3", + ControllerRequired: true, + ControllersHealthy: 2, + ControllersExpected: 2, + NodesHealthy: 2, + NodesExpected: 0, + } + + err = store.CSIVolumeRegister(index, []*structs.CSIVolume{vol}) + if err != nil { + return err + } + + return nil +} diff --git a/nomad/volumewatcher/volume_watcher_test.go b/nomad/volumewatcher/volume_watcher_test.go index 848ca58b925a..4e8a556a49c2 100644 --- a/nomad/volumewatcher/volume_watcher_test.go +++ b/nomad/volumewatcher/volume_watcher_test.go @@ -75,3 +75,32 @@ func TestVolumeWatch_Reap(t *testing.T) { require.NoError(err) require.Len(vol.PastClaims, 2) // alloc claim + GC claim } + +func TestVolumeReapBadState(t *testing.T) { + + store := state.TestStateStore(t) + err := state.TestBadCSIState(t, store) + require.NoError(t, err) + srv := &MockRPCServer{ + state: store, + } + + vol, err := srv.state.CSIVolumeByID(nil, + structs.DefaultNamespace, "csi-volume-nfs0") + require.NoError(t, err) + srv.state.CSIVolumeDenormalize(nil, vol) + + ctx, exitFn := context.WithCancel(context.Background()) + w := &volumeWatcher{ + v: vol, + rpc: srv, + state: srv.State(), + ctx: ctx, + exitFn: exitFn, + logger: testlog.HCLogger(t), + } + + err = w.volumeReapImpl(vol) + require.NoError(t, err) + require.Equal(t, 2, srv.countCSIUnpublish) +} diff --git a/nomad/volumewatcher/volumes_watcher_test.go b/nomad/volumewatcher/volumes_watcher_test.go index 7f0365be3518..2271c2f2036d 100644 --- a/nomad/volumewatcher/volumes_watcher_test.go +++ b/nomad/volumewatcher/volumes_watcher_test.go @@ -70,10 +70,15 @@ func TestVolumeWatch_LeadershipTransition(t *testing.T) { alloc.ClientStatus = structs.AllocClientStatusComplete vol := testVolume(plugin, alloc, node.ID) + index++ + err := srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{alloc}) + require.NoError(err) + watcher.SetEnabled(true, srv.State(), "") index++ - err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol}) + err = srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol}) require.NoError(err) // we should get or start up a watcher when we get an update for diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 206544249653..3b10331c5c88 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -15,17 +15,18 @@ import ( ) const ( - FilterConstraintHostVolumes = "missing compatible host volumes" - FilterConstraintCSIPluginTemplate = "CSI plugin %s is missing from client %s" - FilterConstraintCSIPluginUnhealthyTemplate = "CSI plugin %s is unhealthy on client %s" - FilterConstraintCSIPluginMaxVolumesTemplate = "CSI plugin %s has the maximum number of volumes on client %s" - FilterConstraintCSIVolumesLookupFailed = "CSI volume lookup failed" - FilterConstraintCSIVolumeNotFoundTemplate = "missing CSI Volume %s" - FilterConstraintCSIVolumeNoReadTemplate = "CSI volume %s is unschedulable or has exhausted its available reader claims" - FilterConstraintCSIVolumeNoWriteTemplate = "CSI volume %s is unschedulable or is read-only" - FilterConstraintCSIVolumeInUseTemplate = "CSI volume %s has exhausted its available writer claims" // - FilterConstraintDrivers = "missing drivers" - FilterConstraintDevices = "missing devices" + FilterConstraintHostVolumes = "missing compatible host volumes" + FilterConstraintCSIPluginTemplate = "CSI plugin %s is missing from client %s" + FilterConstraintCSIPluginUnhealthyTemplate = "CSI plugin %s is unhealthy on client %s" + FilterConstraintCSIPluginMaxVolumesTemplate = "CSI plugin %s has the maximum number of volumes on client %s" + FilterConstraintCSIVolumesLookupFailed = "CSI volume lookup failed" + FilterConstraintCSIVolumeNotFoundTemplate = "missing CSI Volume %s" + FilterConstraintCSIVolumeNoReadTemplate = "CSI volume %s is unschedulable or has exhausted its available reader claims" + FilterConstraintCSIVolumeNoWriteTemplate = "CSI volume %s is unschedulable or is read-only" + FilterConstraintCSIVolumeInUseTemplate = "CSI volume %s has exhausted its available writer claims" + FilterConstraintCSIVolumeGCdAllocationTemplate = "CSI volume %s has exhausted its available writer claims and is claimed by a garbage collected allocation %s; waiting for claim to be released" + FilterConstraintDrivers = "missing drivers" + FilterConstraintDevices = "missing devices" ) var ( @@ -320,11 +321,20 @@ func (c *CSIVolumeChecker) isFeasible(n *structs.Node) (bool, string) { return false, fmt.Sprintf(FilterConstraintCSIVolumeNoWriteTemplate, vol.ID) } if !vol.WriteFreeClaims() { - // Check the blocking allocations to see if they belong to this job for id := range vol.WriteAllocs { a, err := c.ctx.State().AllocByID(ws, id) - if err != nil || a == nil || - a.Namespace != c.namespace || a.JobID != c.jobID { + // the alloc for this blocking claim has been + // garbage collected but the volumewatcher hasn't + // finished releasing the claim (and possibly + // detaching the volume), so we need to block + // until it can be scheduled + if err != nil || a == nil { + return false, fmt.Sprintf( + FilterConstraintCSIVolumeGCdAllocationTemplate, vol.ID, id) + } else if a.Namespace != c.namespace || a.JobID != c.jobID { + // the blocking claim is for another live job + // so it's legitimately blocking more write + // claims return false, fmt.Sprintf( FilterConstraintCSIVolumeInUseTemplate, vol.ID) } From b588a7bd73b5c0f8607ba70b7ecd6f338e4ff9b6 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 27 Jan 2022 10:05:41 -0500 Subject: [PATCH 11/33] csi: ensure that PastClaims are populated with correct mode (#11932) In the client's `(*csiHook) Postrun()` method, we make an unpublish RPC that includes a claim in the `CSIVolumeClaimStateUnpublishing` state and using the mode from the client. But then in the `(*CSIVolume) Unpublish` RPC handler, we query the volume from the state store (because we only get an ID from the client). And when we make the client RPC for the node unpublish step, we use the _current volume's_ view of the mode. If the volume's mode has been changed before the old allocations can have their claims released, then we end up making a CSI RPC that will never succeed. Why does this code path get the mode from the volume and not the claim? Because the claim written by the GC job in `(*CoreScheduler) csiVolumeClaimGC` doesn't have a mode. Instead it just writes a claim in the unpublishing state to ensure the volumewatcher detects a "past claim" change and reaps all the claims on the volumes. Fix this by ensuring that the `CSIVolumeDenormalize` creates past claims for all nil allocations with a correct access mode set. --- nomad/state/state_store.go | 136 ++++++++++++++++++------------------- nomad/state/testing.go | 5 +- 2 files changed, 71 insertions(+), 70 deletions(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 7551ad045354..952730168870 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2194,7 +2194,7 @@ func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*st // we return the volume with the plugins denormalized by default, // because the scheduler needs them for feasibility checking - return s.CSIVolumeDenormalizePluginsTxn(txn, vol.Copy()) + return s.csiVolumeDenormalizePluginsTxn(txn, vol.Copy()) } // CSIVolumesByPluginID looks up csi_volumes by pluginID. Caller should @@ -2326,11 +2326,11 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *s } } - volume, err := s.CSIVolumeDenormalizePluginsTxn(txn, orig.Copy()) + volume, err := s.csiVolumeDenormalizePluginsTxn(txn, orig.Copy()) if err != nil { return err } - volume, err = s.CSIVolumeDenormalizeTxn(txn, nil, volume) + volume, err = s.csiVolumeDenormalizeTxn(txn, nil, volume) if err != nil { return err } @@ -2414,7 +2414,7 @@ func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []s // volSafeToForce checks if the any of the remaining allocations // are in a non-terminal state. func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool { - vol, err := s.CSIVolumeDenormalizeTxn(txn, nil, v) + vol, err := s.csiVolumeDenormalizeTxn(txn, nil, v) if err != nil { return false } @@ -2443,15 +2443,12 @@ func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs } txn := s.db.ReadTxn() defer txn.Abort() - return s.CSIVolumeDenormalizePluginsTxn(txn, vol) + return s.csiVolumeDenormalizePluginsTxn(txn, vol) } -// CSIVolumeDenormalizePluginsTxn returns a CSIVolume with current health and -// plugins, but without allocations. -// Use this for current volume metadata, handling lists of volumes. -// Use CSIVolumeDenormalize for volumes containing both health and current -// allocations. -func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { +// csiVolumeDenormalizePluginsTxn implements +// CSIVolumeDenormalizePlugins, inside a transaction. +func (s *StateStore) csiVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { if vol == nil { return nil, nil } @@ -2484,80 +2481,83 @@ func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVol return vol, nil } -// CSIVolumeDenormalize returns a CSIVolume with allocations +// CSIVolumeDenormalize returns a CSIVolume with its current +// Allocations and Claims, including creating new PastClaims for +// terminal or garbage collected allocations. This ensures we have a +// consistent state. Note that it mutates the original volume and so +// should always be called on a Copy after reading from the state +// store. func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { txn := s.db.ReadTxn() - return s.CSIVolumeDenormalizeTxn(txn, ws, vol) + return s.csiVolumeDenormalizeTxn(txn, ws, vol) } -// CSIVolumeDenormalizeTxn populates a CSIVolume with allocations -func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { +// csiVolumeDenormalizeTxn implements CSIVolumeDenormalize inside a transaction +func (s *StateStore) csiVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { if vol == nil { return nil, nil } - for id := range vol.ReadAllocs { - a, err := s.allocByIDImpl(txn, ws, id) - if err != nil { - return nil, err - } - if a != nil { - vol.ReadAllocs[id] = a - // COMPAT(1.0): the CSIVolumeClaim fields were added - // after 0.11.1, so claims made before that may be - // missing this value. (same for WriteAlloc below) - if _, ok := vol.ReadClaims[id]; !ok { - vol.ReadClaims[id] = &structs.CSIVolumeClaim{ + + // note: denormalize mutates the maps we pass in! + denormalize := func( + currentAllocs map[string]*structs.Allocation, + currentClaims, pastClaims map[string]*structs.CSIVolumeClaim, + fallbackMode structs.CSIVolumeClaimMode) error { + + for id := range currentAllocs { + a, err := s.allocByIDImpl(txn, ws, id) + if err != nil { + return err + } + pastClaim := pastClaims[id] + currentClaim := currentClaims[id] + if currentClaim == nil { + // COMPAT(1.4.0): the CSIVolumeClaim fields were added + // after 0.11.1, so claims made before that may be + // missing this value. No clusters should see this + // anymore, so warn nosily in the logs so that + // operators ask us about it. Remove this block and + // the now-unused fallbackMode parameter, and return + // an error if currentClaim is nil in 1.4.0 + s.logger.Warn("volume was missing claim for allocation", + "volume_id", vol.ID, "alloc", id) + currentClaim = &structs.CSIVolumeClaim{ AllocationID: a.ID, NodeID: a.NodeID, - Mode: structs.CSIVolumeClaimRead, + Mode: fallbackMode, State: structs.CSIVolumeClaimStateTaken, } + currentClaims[id] = currentClaim } - } else if _, ok := vol.PastClaims[id]; !ok { - // ensure that any allocs that have been GC'd since - // our last read are marked as past claims - vol.PastClaims[id] = &structs.CSIVolumeClaim{ - AllocationID: id, - Mode: structs.CSIVolumeClaimRead, - State: structs.CSIVolumeClaimStateUnpublishing, - } - readClaim := vol.ReadClaims[id] - if readClaim != nil { - vol.PastClaims[id].NodeID = readClaim.NodeID - } - } - } - for id := range vol.WriteAllocs { - a, err := s.allocByIDImpl(txn, ws, id) - if err != nil { - return nil, err - } - if a != nil { - vol.WriteAllocs[id] = a - if _, ok := vol.WriteClaims[id]; !ok { - vol.WriteClaims[id] = &structs.CSIVolumeClaim{ - AllocationID: a.ID, - NodeID: a.NodeID, - Mode: structs.CSIVolumeClaimWrite, - State: structs.CSIVolumeClaimStateTaken, + currentAllocs[id] = a + if a == nil && pastClaim == nil { + // the alloc is garbage collected but nothing has written a PastClaim, + // so create one now + pastClaim = &structs.CSIVolumeClaim{ + AllocationID: id, + NodeID: currentClaim.NodeID, + Mode: currentClaim.Mode, + State: structs.CSIVolumeClaimStateUnpublishing, + AccessMode: currentClaim.AccessMode, + AttachmentMode: currentClaim.AttachmentMode, } - } - } else if _, ok := vol.PastClaims[id]; !ok { - // ensure that any allocs that have been GC'd since - // our last read are marked as past claims - - vol.PastClaims[id] = &structs.CSIVolumeClaim{ - AllocationID: id, - Mode: structs.CSIVolumeClaimWrite, - State: structs.CSIVolumeClaimStateUnpublishing, - } - writeClaim := vol.WriteClaims[id] - if writeClaim != nil { - vol.PastClaims[id].NodeID = writeClaim.NodeID + pastClaims[id] = pastClaim } } + return nil + } + + err := denormalize(vol.ReadAllocs, vol.ReadClaims, vol.PastClaims, + structs.CSIVolumeClaimRead) + if err != nil { + return nil, err + } + err = denormalize(vol.WriteAllocs, vol.WriteClaims, vol.PastClaims, + structs.CSIVolumeClaimWrite) + if err != nil { + return nil, err } // COMPAT: the AccessMode and AttachmentMode fields were added to claims diff --git a/nomad/state/testing.go b/nomad/state/testing.go index 0d570a95ade4..c7a2f3e8e27c 100644 --- a/nomad/state/testing.go +++ b/nomad/state/testing.go @@ -221,8 +221,8 @@ func TestBadCSIState(t testing.TB, store *StateStore) error { allocID2 := uuid.Generate() // nil alloc alloc1 := mock.Alloc() - alloc1.ClientStatus = "complete" - alloc1.DesiredStatus = "stop" + alloc1.ClientStatus = structs.AllocClientStatusRunning + alloc1.DesiredStatus = structs.AllocDesiredStatusRun // Insert allocs into the state store err := store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1}) @@ -303,6 +303,7 @@ func TestBadCSIState(t testing.TB, store *StateStore) error { NodesHealthy: 2, NodesExpected: 0, } + vol = vol.Copy() // canonicalize err = store.CSIVolumeRegister(index, []*structs.CSIVolume{vol}) if err != nil { From 2e3571634e144a3a438d2668b01d96a0cefa0a00 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 27 Jan 2022 10:39:08 -0500 Subject: [PATCH 12/33] CSI: move terminal alloc handling into denormalization (#11931) * The volume claim GC method and volumewatcher both have logic collecting terminal allocations that duplicates most of the logic that's now in the state store's `CSIVolumeDenormalize` method. Copy this logic into the state store so that all code paths have the same view of the past claims. * Remove logic in the volume claim GC that now lives in the state store's `CSIVolumeDenormalize` method. * Remove logic in the volumewatcher that now lives in the state store's `CSIVolumeDenormalize` method. * Remove logic in the node unpublish RPC that now lives in the state store's `CSIVolumeDenormalize` method. --- nomad/core_sched.go | 33 ++---------------- nomad/csi_endpoint.go | 38 +++++++-------------- nomad/state/state_store.go | 2 +- nomad/volumewatcher/volume_watcher.go | 15 ++------ nomad/volumewatcher/volume_watcher_test.go | 4 +++ nomad/volumewatcher/volumes_watcher_test.go | 2 +- 6 files changed, 24 insertions(+), 70 deletions(-) diff --git a/nomad/core_sched.go b/nomad/core_sched.go index 4306783efcb8..f6aa3c112812 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -773,7 +773,6 @@ func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error { "index", oldThreshold, "csi_volume_claim_gc_threshold", c.srv.config.CSIVolumeClaimGCThreshold) -NEXT_VOLUME: for i := iter.Next(); i != nil; i = iter.Next() { vol := i.(*structs.CSIVolume) @@ -782,38 +781,12 @@ NEXT_VOLUME: continue } - // TODO(tgross): consider moving the TerminalStatus check into - // the denormalize volume logic so that we can just check the - // volume for past claims - // we only call the claim release RPC if the volume has claims // that no longer have valid allocations. otherwise we'd send // out a lot of do-nothing RPCs. - for id := range vol.ReadClaims { - alloc, err := c.snap.AllocByID(ws, id) - if err != nil { - return err - } - if alloc == nil || alloc.TerminalStatus() { - err = gcClaims(vol.Namespace, vol.ID) - if err != nil { - return err - } - goto NEXT_VOLUME - } - } - for id := range vol.WriteClaims { - alloc, err := c.snap.AllocByID(ws, id) - if err != nil { - return err - } - if alloc == nil || alloc.TerminalStatus() { - err = gcClaims(vol.Namespace, vol.ID) - if err != nil { - return err - } - goto NEXT_VOLUME - } + vol, err := c.snap.CSIVolumeDenormalize(ws, vol) + if err != nil { + return err } if len(vol.PastClaims) > 0 { err = gcClaims(vol.Namespace, vol.ID) diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index fea730dfc20b..ac63b8fa5659 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -615,39 +615,25 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C return v.checkpointClaim(vol, claim) } - // The RPC sent from the 'nomad node detach' command won't have an + // The RPC sent from the 'nomad node detach' command or GC won't have an // allocation ID set so we try to unpublish every terminal or invalid - // alloc on the node - allocIDs := []string{} + // alloc on the node, all of which will be in PastClaims after denormalizing state := v.srv.fsm.State() vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol) if err != nil { return err } - for allocID, alloc := range vol.ReadAllocs { - if alloc == nil { - rclaim, ok := vol.ReadClaims[allocID] - if ok && rclaim.NodeID == claim.NodeID { - allocIDs = append(allocIDs, allocID) - } - } else if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { - allocIDs = append(allocIDs, allocID) - } - } - for allocID, alloc := range vol.WriteAllocs { - if alloc == nil { - wclaim, ok := vol.WriteClaims[allocID] - if ok && wclaim.NodeID == claim.NodeID { - allocIDs = append(allocIDs, allocID) - } - } else if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { - allocIDs = append(allocIDs, allocID) + + claimsToUnpublish := []*structs.CSIVolumeClaim{} + for _, pastClaim := range vol.PastClaims { + if claim.NodeID == pastClaim.NodeID { + claimsToUnpublish = append(claimsToUnpublish, pastClaim) } } + var merr multierror.Error - for _, allocID := range allocIDs { - claim.AllocationID = allocID - err := v.nodeUnpublishVolumeImpl(vol, claim) + for _, pastClaim := range claimsToUnpublish { + err := v.nodeUnpublishVolumeImpl(vol, pastClaim) if err != nil { merr.Errors = append(merr.Errors, err) } @@ -668,8 +654,8 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc ExternalID: vol.RemoteID(), AllocID: claim.AllocationID, NodeID: claim.NodeID, - AttachmentMode: vol.AttachmentMode, - AccessMode: vol.AccessMode, + AttachmentMode: claim.AttachmentMode, + AccessMode: claim.AccessMode, ReadOnly: claim.Mode == structs.CSIVolumeClaimRead, } err := v.srv.RPC("ClientCSI.NodeDetachVolume", diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 952730168870..f40d44e936c6 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2531,7 +2531,7 @@ func (s *StateStore) csiVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *st } currentAllocs[id] = a - if a == nil && pastClaim == nil { + if (a == nil || a.TerminalStatus()) && pastClaim == nil { // the alloc is garbage collected but nothing has written a PastClaim, // so create one now pastClaim = &structs.CSIVolumeClaim{ diff --git a/nomad/volumewatcher/volume_watcher.go b/nomad/volumewatcher/volume_watcher.go index 28fc94f35366..fe69bca4189c 100644 --- a/nomad/volumewatcher/volume_watcher.go +++ b/nomad/volumewatcher/volume_watcher.go @@ -177,17 +177,10 @@ func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool { return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0 } +// volumeReapImpl unpublished all the volume's PastClaims. PastClaims +// will be populated from nil or terminal allocs when we call +// CSIVolumeDenormalize(), so this assumes we've done so in the caller func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { - - // PastClaims written by a volume GC core job will have no allocation, - // so we need to find out which allocs are eligible for cleanup. - for _, claim := range vol.PastClaims { - if claim.AllocationID == "" { - vol = vw.collectPastClaims(vol) - break // only need to collect once - } - } - var result *multierror.Error for _, claim := range vol.PastClaims { err := vw.unpublish(vol, claim) @@ -195,9 +188,7 @@ func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { result = multierror.Append(result, err) } } - return result.ErrorOrNil() - } func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume { diff --git a/nomad/volumewatcher/volume_watcher_test.go b/nomad/volumewatcher/volume_watcher_test.go index 4e8a556a49c2..4bb4ddae4b66 100644 --- a/nomad/volumewatcher/volume_watcher_test.go +++ b/nomad/volumewatcher/volume_watcher_test.go @@ -37,6 +37,7 @@ func TestVolumeWatch_Reap(t *testing.T) { logger: testlog.HCLogger(t), } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err := w.volumeReapImpl(vol) require.NoError(err) @@ -48,6 +49,7 @@ func TestVolumeWatch_Reap(t *testing.T) { State: structs.CSIVolumeClaimStateNodeDetached, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 1) @@ -59,6 +61,7 @@ func TestVolumeWatch_Reap(t *testing.T) { Mode: structs.CSIVolumeClaimGC, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 2) // alloc claim + GC claim @@ -71,6 +74,7 @@ func TestVolumeWatch_Reap(t *testing.T) { Mode: structs.CSIVolumeClaimRead, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 2) // alloc claim + GC claim diff --git a/nomad/volumewatcher/volumes_watcher_test.go b/nomad/volumewatcher/volumes_watcher_test.go index 2271c2f2036d..c66411631b1a 100644 --- a/nomad/volumewatcher/volumes_watcher_test.go +++ b/nomad/volumewatcher/volumes_watcher_test.go @@ -67,7 +67,7 @@ func TestVolumeWatch_LeadershipTransition(t *testing.T) { plugin := mock.CSIPlugin() node := testNode(plugin, srv.State()) alloc := mock.Alloc() - alloc.ClientStatus = structs.AllocClientStatusComplete + alloc.ClientStatus = structs.AllocClientStatusRunning vol := testVolume(plugin, alloc, node.ID) index++ From 8364eda1d7bc21ac23b6f77cc332b7995298653b Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 28 Jan 2022 08:30:31 -0500 Subject: [PATCH 13/33] CSI: node unmount from the client before unpublish RPC (#11892) When an allocation stops, the `csi_hook` makes an unpublish RPC to the servers to unpublish via the CSI RPCs: first to the node plugins and then the controller plugins. The controller RPCs must happen after the node RPCs so that the node has had a chance to unmount the volume before the controller tries to detach the associated device. But the client has local access to the node plugins and can independently determine if it's safe to send unpublish RPC to those plugins. This will allow the server to treat the node plugin as abandoned if a client is disconnected and `stop_on_client_disconnect` is set. This will let the server try to send unpublish RPCs to the controller plugins, under the assumption that the client will be trying to unmount the volume on its end first. Note that the CSI `NodeUnpublishVolume`/`NodeUnstageVolume` RPCs can return ignorable errors in the case where the volume has already been unmounted from the node. Handle all other errors by retrying until we get success so as to give operators the opportunity to reschedule a failed node plugin (ex. in the case where they accidentally drained a node without `-ignore-system`). Fan-out the work for each volume into its own goroutine so that we can release a subset of volumes if only one is stuck. --- client/allocrunner/csi_hook.go | 160 ++++++++++++++++++---- client/allocrunner/csi_hook_test.go | 10 +- client/pluginmanager/csimanager/volume.go | 7 +- 3 files changed, 143 insertions(+), 34 deletions(-) diff --git a/client/allocrunner/csi_hook.go b/client/allocrunner/csi_hook.go index e7e7385a3e8a..6fb1b2866abb 100644 --- a/client/allocrunner/csi_hook.go +++ b/client/allocrunner/csi_hook.go @@ -3,6 +3,8 @@ package allocrunner import ( "context" "fmt" + "sync" + "time" hclog "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" @@ -24,7 +26,9 @@ type csiHook struct { updater hookResourceSetter nodeSecret string - volumeRequests map[string]*volumeAndRequest + volumeRequests map[string]*volumeAndRequest + maxBackoffInterval time.Duration + maxBackoffDuration time.Duration } // implemented by allocrunner @@ -42,6 +46,8 @@ func newCSIHook(alloc *structs.Allocation, logger hclog.Logger, csi csimanager.M updater: updater, nodeSecret: nodeSecret, volumeRequests: map[string]*volumeAndRequest{}, + maxBackoffInterval: time.Minute, + maxBackoffDuration: time.Hour * 24, } } @@ -103,41 +109,43 @@ func (c *csiHook) Postrun() error { return nil } - var mErr *multierror.Error + var wg sync.WaitGroup + errs := make(chan error, len(c.volumeRequests)) for _, pair := range c.volumeRequests { + wg.Add(1) + + // CSI RPCs can potentially fail for a very long time if a + // node plugin has failed. split the work into goroutines so + // that operators could potentially reuse one of a set of + // volumes even if this hook is stuck waiting on the others + go func(pair *volumeAndRequest) { + defer wg.Done() + + // we can recover an unmount failure if the operator + // brings the plugin back up, so retry every few minutes + // but eventually give up + err := c.unmountWithRetry(pair) + if err != nil { + errs <- err + return + } - mode := structs.CSIVolumeClaimRead - if !pair.request.ReadOnly { - mode = structs.CSIVolumeClaimWrite - } + // we can't recover from this RPC error client-side; the + // volume claim GC job will have to clean up for us once + // the allocation is marked terminal + errs <- c.unpublish(pair) + }(pair) + } - source := pair.request.Source - if pair.request.PerAlloc { - // NOTE: PerAlloc can't be set if we have canaries - source = source + structs.AllocSuffix(c.alloc.Name) - } + wg.Wait() + close(errs) // so we don't block waiting if there were no errors - req := &structs.CSIVolumeUnpublishRequest{ - VolumeID: source, - Claim: &structs.CSIVolumeClaim{ - AllocationID: c.alloc.ID, - NodeID: c.alloc.NodeID, - Mode: mode, - State: structs.CSIVolumeClaimStateUnpublishing, - }, - WriteRequest: structs.WriteRequest{ - Region: c.alloc.Job.Region, - Namespace: c.alloc.Job.Namespace, - AuthToken: c.nodeSecret, - }, - } - err := c.rpcClient.RPC("CSIVolume.Unpublish", - req, &structs.CSIVolumeUnpublishResponse{}) - if err != nil { - mErr = multierror.Append(mErr, err) - } + var mErr *multierror.Error + for err := range errs { + mErr = multierror.Append(mErr, err) } + return mErr.ErrorOrNil() } @@ -231,3 +239,95 @@ func (c *csiHook) shouldRun() bool { return false } + +func (c *csiHook) unpublish(pair *volumeAndRequest) error { + + mode := structs.CSIVolumeClaimRead + if !pair.request.ReadOnly { + mode = structs.CSIVolumeClaimWrite + } + + source := pair.request.Source + if pair.request.PerAlloc { + // NOTE: PerAlloc can't be set if we have canaries + source = source + structs.AllocSuffix(c.alloc.Name) + } + + req := &structs.CSIVolumeUnpublishRequest{ + VolumeID: source, + Claim: &structs.CSIVolumeClaim{ + AllocationID: c.alloc.ID, + NodeID: c.alloc.NodeID, + Mode: mode, + State: structs.CSIVolumeClaimStateUnpublishing, + }, + WriteRequest: structs.WriteRequest{ + Region: c.alloc.Job.Region, + Namespace: c.alloc.Job.Namespace, + AuthToken: c.nodeSecret, + }, + } + + return c.rpcClient.RPC("CSIVolume.Unpublish", + req, &structs.CSIVolumeUnpublishResponse{}) + +} + +// unmountWithRetry tries to unmount/unstage the volume, retrying with +// exponential backoff capped to a maximum interval +func (c *csiHook) unmountWithRetry(pair *volumeAndRequest) error { + + // note: allocrunner hooks don't have access to the client's + // shutdown context, just the allocrunner's shutdown; if we make + // it available in the future we should thread it through here so + // that retry can exit gracefully instead of dropping the + // in-flight goroutine + ctx, cancel := context.WithTimeout(context.TODO(), c.maxBackoffDuration) + defer cancel() + var err error + backoff := time.Second + ticker := time.NewTicker(backoff) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return err + case <-ticker.C: + } + + err = c.unmountImpl(pair) + if err == nil { + break + } + + if backoff < c.maxBackoffInterval { + backoff = backoff * 2 + if backoff > c.maxBackoffInterval { + backoff = c.maxBackoffInterval + } + } + ticker.Reset(backoff) + } + return nil +} + +// unmountImpl implements the call to the CSI plugin manager to +// unmount the volume. Each retry will write an "Unmount volume" +// NodeEvent +func (c *csiHook) unmountImpl(pair *volumeAndRequest) error { + + mounter, err := c.csimanager.MounterForPlugin(context.TODO(), pair.volume.PluginID) + if err != nil { + return err + } + + usageOpts := &csimanager.UsageOptions{ + ReadOnly: pair.request.ReadOnly, + AttachmentMode: pair.request.AttachmentMode, + AccessMode: pair.request.AccessMode, + MountOptions: pair.request.MountOptions, + } + + return mounter.UnmountVolume(context.TODO(), + pair.volume.ID, pair.volume.RemoteID(), c.alloc.ID, usageOpts) +} diff --git a/client/allocrunner/csi_hook_test.go b/client/allocrunner/csi_hook_test.go index 045ef3e0afce..d05d07385c3d 100644 --- a/client/allocrunner/csi_hook_test.go +++ b/client/allocrunner/csi_hook_test.go @@ -5,6 +5,7 @@ import ( "fmt" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" @@ -59,7 +60,7 @@ func TestCSIHook(t *testing.T) { "test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)}, }, expectedMountCalls: 1, - expectedUnmountCalls: 0, // not until this is done client-side + expectedUnmountCalls: 1, expectedClaimCalls: 1, expectedUnpublishCalls: 1, }, @@ -83,7 +84,7 @@ func TestCSIHook(t *testing.T) { "test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)}, }, expectedMountCalls: 1, - expectedUnmountCalls: 0, // not until this is done client-side + expectedUnmountCalls: 1, expectedClaimCalls: 1, expectedUnpublishCalls: 1, }, @@ -122,7 +123,7 @@ func TestCSIHook(t *testing.T) { // "test-alloc-dir/%s/testvolume0/ro-file-system-multi-node-reader-only", alloc.ID)}, // }, // expectedMountCalls: 1, - // expectedUnmountCalls: 0, // not until this is done client-side + // expectedUnmountCalls: 1, // expectedClaimCalls: 1, // expectedUnpublishCalls: 1, // }, @@ -144,6 +145,9 @@ func TestCSIHook(t *testing.T) { }, } hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar, "secret") + hook.maxBackoffInterval = 100 * time.Millisecond + hook.maxBackoffDuration = 2 * time.Second + require.NotNil(t, hook) require.NoError(t, hook.Prerun()) diff --git a/client/pluginmanager/csimanager/volume.go b/client/pluginmanager/csimanager/volume.go index 9bca471cf085..4c6bf1d144e4 100644 --- a/client/pluginmanager/csimanager/volume.go +++ b/client/pluginmanager/csimanager/volume.go @@ -353,11 +353,16 @@ func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allo } } + if errors.Is(err, structs.ErrCSIClientRPCIgnorable) { + logger.Trace("unmounting volume failed with ignorable error", "error", err) + err = nil + } + event := structs.NewNodeEvent(). SetSubsystem(structs.NodeEventSubsystemStorage). SetMessage("Unmount volume"). AddDetail("volume_id", volID) - if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) { + if err == nil { event.AddDetail("success", "true") } else { event.AddDetail("success", "false") From 0b70c1a4cd5c2e8b0d5bb7606d5839a3b00dd202 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Mon, 20 Dec 2021 15:11:01 -0500 Subject: [PATCH 14/33] feat: add evalutions view with table --- ui/app/controllers/evaluations.js | 14 ++++++++ ui/app/router.js | 2 ++ ui/app/routes/evaluations.js | 21 ++++++++++++ ui/app/templates/evaluations.hbs | 57 +++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 ui/app/controllers/evaluations.js create mode 100644 ui/app/routes/evaluations.js create mode 100644 ui/app/templates/evaluations.hbs diff --git a/ui/app/controllers/evaluations.js b/ui/app/controllers/evaluations.js new file mode 100644 index 000000000000..f462b944dacd --- /dev/null +++ b/ui/app/controllers/evaluations.js @@ -0,0 +1,14 @@ +import Controller from '@ember/controller'; +import { action } from '@ember/object'; +import { tracked } from '@glimmer/tracking'; + +export default class EvaluationsController extends Controller { + queryParams = ['pageSize']; + + @tracked pageSize = 25; + + @action + onChange(newPageSize) { + this.pageSize = newPageSize; + } +} diff --git a/ui/app/router.js b/ui/app/router.js index d567714f18b7..d944e12856ba 100644 --- a/ui/app/router.js +++ b/ui/app/router.js @@ -74,5 +74,7 @@ Router.map(function () { this.route('tokens'); }); + this.route('evaluations'); + this.route('not-found', { path: '/*' }); }); diff --git a/ui/app/routes/evaluations.js b/ui/app/routes/evaluations.js new file mode 100644 index 000000000000..0967f9cee17c --- /dev/null +++ b/ui/app/routes/evaluations.js @@ -0,0 +1,21 @@ +import { inject as service } from '@ember/service'; +import Route from '@ember/routing/route'; + +const ALL_NAMESPACE_WILDCARD = '*'; + +export default class EvaluationsRoute extends Route { + @service store; + + queryParams = { + pageSize: { + refreshModel: true, + }, + }; + + model({ pageSize }) { + return this.store.query('evaluation', { + namespace: ALL_NAMESPACE_WILDCARD, + per_page: pageSize, + }); + } +} diff --git a/ui/app/templates/evaluations.hbs b/ui/app/templates/evaluations.hbs new file mode 100644 index 000000000000..ad6a85696674 --- /dev/null +++ b/ui/app/templates/evaluations.hbs @@ -0,0 +1,57 @@ + + + + Evaluation ID + + + Resource + + + Priority + + + Created + + + Triggered By + + + Status + + + Placement Failures + + + + + + {{row.model.shortId}} + + + Resource Placeholder + + + {{row.model.priority}} + + + {{format-month-ts row.model.createTime}} + + + {{row.model.triggeredBy}} + + + {{row.model.status}} + + + {{#if (eq row.model.status "blocked")}} + N/A - In Progress + {{else if row.model.hasPlacementFailures}} + True + {{else}} + False + {{/if}} + + + + + \ No newline at end of file From 2ef93947d904114539781528896b45160e5b7109 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Mon, 20 Dec 2021 15:38:41 -0500 Subject: [PATCH 15/33] chore: run prettier on gutter-menu --- ui/app/templates/components/gutter-menu.hbs | 63 ++++++++++++++------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/ui/app/templates/components/gutter-menu.hbs b/ui/app/templates/components/gutter-menu.hbs index f2ef4048f39b..9e7da209db53 100644 --- a/ui/app/templates/components/gutter-menu.hbs +++ b/ui/app/templates/components/gutter-menu.hbs @@ -1,7 +1,12 @@
- + @@ -12,7 +17,8 @@ {{#if this.system.shouldShowRegions}}
{{yield}}
-
+
\ No newline at end of file From 6ffee67500bce5b801af319941771b16becb29a0 Mon Sep 17 00:00:00 2001 From: Jai Bhagat Date: Mon, 20 Dec 2021 15:40:30 -0500 Subject: [PATCH 16/33] fix: move evaluations template to index and inside page layout --- .../{evaluations.js => evaluations/index.js} | 0 ui/app/router.js | 3 +- .../{evaluations.js => evaluations/index.js} | 2 +- ui/app/templates/components/gutter-menu.hbs | 10 +++ ui/app/templates/evaluations.hbs | 60 +----------------- ui/app/templates/evaluations/index.hbs | 62 +++++++++++++++++++ 6 files changed, 78 insertions(+), 59 deletions(-) rename ui/app/controllers/{evaluations.js => evaluations/index.js} (100%) rename ui/app/routes/{evaluations.js => evaluations/index.js} (86%) create mode 100644 ui/app/templates/evaluations/index.hbs diff --git a/ui/app/controllers/evaluations.js b/ui/app/controllers/evaluations/index.js similarity index 100% rename from ui/app/controllers/evaluations.js rename to ui/app/controllers/evaluations/index.js diff --git a/ui/app/router.js b/ui/app/router.js index d944e12856ba..c54fc9322cd1 100644 --- a/ui/app/router.js +++ b/ui/app/router.js @@ -74,7 +74,8 @@ Router.map(function () { this.route('tokens'); }); - this.route('evaluations'); + // if we don't include function() the outlet won't render + this.route('evaluations', function() {}); this.route('not-found', { path: '/*' }); }); diff --git a/ui/app/routes/evaluations.js b/ui/app/routes/evaluations/index.js similarity index 86% rename from ui/app/routes/evaluations.js rename to ui/app/routes/evaluations/index.js index 0967f9cee17c..b3581310e468 100644 --- a/ui/app/routes/evaluations.js +++ b/ui/app/routes/evaluations/index.js @@ -3,7 +3,7 @@ import Route from '@ember/routing/route'; const ALL_NAMESPACE_WILDCARD = '*'; -export default class EvaluationsRoute extends Route { +export default class EvaluationsIndexRoute extends Route { @service store; queryParams = { diff --git a/ui/app/templates/components/gutter-menu.hbs b/ui/app/templates/components/gutter-menu.hbs index 9e7da209db53..fc9d25c49915 100644 --- a/ui/app/templates/components/gutter-menu.hbs +++ b/ui/app/templates/components/gutter-menu.hbs @@ -79,6 +79,16 @@ + + {{#if this.system.agent.version}}