From 8282f86d9c0b57574795a81ec830ab4a76daa3dd Mon Sep 17 00:00:00 2001 From: stefanprodan Date: Sun, 22 Sep 2019 13:23:19 +0300 Subject: [PATCH 1/4] Implement confirm-promotion hook The confirm promotion hooks are executed right before the promotion step. The canary promotion is paused until the hooks return HTTP 200. While the promotion is paused, Flagger will continue to run the metrics checks and load tests. --- pkg/apis/flagger/v1alpha3/types.go | 2 ++ pkg/controller/scheduler.go | 34 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/pkg/apis/flagger/v1alpha3/types.go b/pkg/apis/flagger/v1alpha3/types.go index 9cd7cea43..20a98cd2a 100755 --- a/pkg/apis/flagger/v1alpha3/types.go +++ b/pkg/apis/flagger/v1alpha3/types.go @@ -139,6 +139,8 @@ const ( PostRolloutHook HookType = "post-rollout" // ConfirmRolloutHook halt canary analysis until webhook returns HTTP 200 ConfirmRolloutHook HookType = "confirm-rollout" + // ConfirmPromotionHook halt canary promotion until webhook returns HTTP 200 + ConfirmPromotionHook HookType = "confirm-promotion" ) // CanaryWebhook holds the reference to external checks used for canary analysis diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 230eb9d07..2dbeea82d 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -323,6 +323,11 @@ func (c *Controller) advanceCanary(name string, namespace string, skipLivenessCh return } + // check promotion gate + if promote := c.runConfirmPromotionHooks(cd); !promote { + return + } + // promote canary - max iterations reached if cd.Spec.CanaryAnalysis.Iterations == cd.Status.Iterations { c.recordEventInfof(cd, "Copying %s.%s template spec to %s.%s", @@ -375,6 +380,11 @@ func (c *Controller) advanceCanary(name string, namespace string, skipLivenessCh return } + // check promotion gate + if promote := c.runConfirmPromotionHooks(cd); !promote { + return + } + // route all traffic to canary - max iterations reached if cd.Spec.CanaryAnalysis.Iterations == cd.Status.Iterations { if provider != "kubernetes" { @@ -445,6 +455,13 @@ func (c *Controller) advanceCanary(name string, namespace string, skipLivenessCh primaryWeight = 100 } + // check promotion gate + if canaryWeight >= maxWeight { + if promote := c.runConfirmPromotionHooks(cd); !promote { + return + } + } + if err := meshRouter.SetRoutes(cd, primaryWeight, canaryWeight); err != nil { c.recordEventWarningf(cd, "%v", err) return @@ -637,6 +654,23 @@ func (c *Controller) runConfirmRolloutHooks(canary *flaggerv1.Canary) bool { return true } +func (c *Controller) runConfirmPromotionHooks(canary *flaggerv1.Canary) bool { + for _, webhook := range canary.Spec.CanaryAnalysis.Webhooks { + if webhook.Type == flaggerv1.ConfirmPromotionHook { + err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) + if err != nil { + c.recordEventWarningf(canary, "Halt %s.%s advancement waiting for promotion approval %s", + canary.Name, canary.Namespace, webhook.Name) + c.sendNotification(canary, "Canary promotion is waiting for approval.", false, false) + return false + } else { + c.recordEventInfof(canary, "Confirm-promotion check %s passed", webhook.Name) + } + } + } + return true +} + func (c *Controller) runPreRolloutHooks(canary *flaggerv1.Canary) bool { for _, webhook := range canary.Spec.CanaryAnalysis.Webhooks { if webhook.Type == flaggerv1.PreRolloutHook { From 4a4c261a886362c1a312a44add7565502565aa1a Mon Sep 17 00:00:00 2001 From: stefanprodan Date: Sun, 22 Sep 2019 13:36:07 +0300 Subject: [PATCH 2/4] Add confirm-promotion webhook type to CRD --- artifacts/flagger/crd.yaml | 3 ++- charts/flagger/templates/crd.yaml | 3 ++- kustomize/base/flagger/crd.yaml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index c9689050f..a02004aab 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -215,7 +215,7 @@ spec: properties: items: type: object - required: ["name", "url", "timeout"] + required: ["name", "url"] properties: name: description: Name of the webhook @@ -228,6 +228,7 @@ spec: - confirm-rollout - pre-rollout - rollout + - confirm-promotion - post-rollout url: description: URL address of this webhook diff --git a/charts/flagger/templates/crd.yaml b/charts/flagger/templates/crd.yaml index b8863630a..bca2a1379 100644 --- a/charts/flagger/templates/crd.yaml +++ b/charts/flagger/templates/crd.yaml @@ -216,7 +216,7 @@ spec: properties: items: type: object - required: ['name', 'url', 'timeout'] + required: ["name", "url"] properties: name: description: Name of the webhook @@ -229,6 +229,7 @@ spec: - confirm-rollout - pre-rollout - rollout + - confirm-promotion - post-rollout url: description: URL address of this webhook diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index c9689050f..a02004aab 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -215,7 +215,7 @@ spec: properties: items: type: object - required: ["name", "url", "timeout"] + required: ["name", "url"] properties: name: description: Name of the webhook @@ -228,6 +228,7 @@ spec: - confirm-rollout - pre-rollout - rollout + - confirm-promotion - post-rollout url: description: URL address of this webhook From b8a64c79be710534784d3851fa57d948beca134a Mon Sep 17 00:00:00 2001 From: stefanprodan Date: Sun, 22 Sep 2019 13:44:55 +0300 Subject: [PATCH 3/4] Add confirm-promotion webhook to e2e tests --- test/e2e-tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/e2e-tests.sh b/test/e2e-tests.sh index f0e04da72..66371c80e 100755 --- a/test/e2e-tests.sh +++ b/test/e2e-tests.sh @@ -257,6 +257,9 @@ spec: type: cmd cmd: "hey -z 10m -q 10 -c 2 -H 'Cookie: type=insider' http://podinfo-canary.test:9898/" logCmdOutput: "true" + - name: promote-gate + type: confirm-promotion + url: http://flagger-loadtester.test/gate/approve - name: post type: post-rollout url: http://flagger-loadtester.test/ From 96ccfa54fb9dbfa476d421478e81392e11a37b42 Mon Sep 17 00:00:00 2001 From: stefanprodan Date: Sun, 22 Sep 2019 14:10:29 +0300 Subject: [PATCH 4/4] Add confirm-promotion hook example to docs --- Makefile | 11 ++--------- docs/gitbook/how-it-works.md | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 5685b9755..e0ceb3def 100644 --- a/Makefile +++ b/Makefile @@ -7,15 +7,8 @@ LT_VERSION?=$(shell grep 'VERSION' cmd/loadtester/main.go | awk '{ print $$4 }' TS=$(shell date +%Y-%m-%d_%H-%M-%S) run: - GO111MODULE=on go run cmd/flagger/* -kubeconfig=$$HOME/.kube/config -log-level=info -mesh-provider=istio -namespace=test \ - -metrics-server=https://prometheus.istio.weavedx.com \ - -enable-leader-election=true - -run2: - GO111MODULE=on go run cmd/flagger/* -kubeconfig=$$HOME/.kube/config -log-level=info -mesh-provider=istio -namespace=test \ - -metrics-server=https://prometheus.istio.weavedx.com \ - -enable-leader-election=true \ - -port=9092 + GO111MODULE=on go run cmd/flagger/* -kubeconfig=$$HOME/.kube/config -log-level=info -mesh-provider=istio -namespace=test-istio \ + -metrics-server=https://prometheus.istio.flagger.dev run-appmesh: GO111MODULE=on go run cmd/flagger/* -kubeconfig=$$HOME/.kube/config -log-level=info -mesh-provider=appmesh \ diff --git a/docs/gitbook/how-it-works.md b/docs/gitbook/how-it-works.md index 1546c9ce6..32626ad5e 100644 --- a/docs/gitbook/how-it-works.md +++ b/docs/gitbook/how-it-works.md @@ -663,6 +663,9 @@ The canary advancement is paused if a pre-rollout hook fails and if the number o threshold the canary will be rollback. * Rollout hooks are executed during the analysis on each iteration before the metric checks. If a rollout hook call fails the canary advancement is paused and eventfully rolled back. +* Confirm-promotion hooks are executed before the promotion step. +The canary promotion is paused until the hooks return HTTP 200. +While the promotion is paused, Flagger will continue to run the metrics checks and rollout hooks. * Post-rollout hooks are executed after the canary has been promoted or rolled back. If a post rollout hook fails the error is logged. @@ -687,6 +690,9 @@ Spec: timeout: 15s metadata: cmd: "hey -z 1m -q 5 -c 2 http://podinfo-canary.test:9898/" + - name: "promotion gate" + type: confirm-promotion + url: http://flagger-loadtester.test/gate/approve - name: "notify" type: post-rollout url: http://telegram.bot:8080/ @@ -914,8 +920,8 @@ Note that you should create a ConfigMap with your Bats tests and mount it inside ### Manual Gating -For manual approval of a canary deployment you can use the `confirm-rollout` webhook. -The confirmation hooks are executed before the pre-rollout hooks. +For manual approval of a canary deployment you can use the `confirm-rollout` and `confirm-promotion` webhooks. +The confirmation rollout hooks are executed before the pre-rollout hooks. Flagger will halt the canary traffic shifting and analysis until the confirm webhook returns HTTP status 200. Manual gating with Flagger's tester: @@ -974,3 +980,16 @@ kubectl get canary/podinfo NAME STATUS WEIGHT podinfo Waiting 0 ``` + +The `confirm-promotion` hook type can be used to manually approve the canary promotion. +While the promotion is paused, Flagger will continue to run the metrics checks and load tests. + +```yaml + canaryAnalysis: + webhooks: + - name: "promotion gate" + type: confirm-promotion + url: http://flagger-loadtester.test/gate/halt +``` + +If you have notifications enabled, Flagger will post a message to Slack or MS Teams if a canary promotion is waiting for approval.