From d418a597d193b415b58ee8ee637f03fe7ee799b8 Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Sat, 1 Jan 2022 04:57:02 +0000 Subject: [PATCH] refactoring: Utilize early returns ot make cell reconcilation logic a bit more readable --- pkg/cell/cell.go | 912 ++++++++++++++++++++++++----------------------- 1 file changed, 461 insertions(+), 451 deletions(-) diff --git a/pkg/cell/cell.go b/pkg/cell/cell.go index a1f3c34..a371e7c 100644 --- a/pkg/cell/cell.go +++ b/pkg/cell/cell.go @@ -103,7 +103,7 @@ func Sync(config SyncInput) error { labelKeys = []string{okrav1alpha1.DefaultVersionLabelKey} } - desiredVer, latestTGs, err := awstargetgroupset.ListLatestAWSTargetGroups(awstargetgroupset.ListLatestAWSTargetGroupsInput{ + latestVer, latestTGs, err := awstargetgroupset.ListLatestAWSTargetGroups(awstargetgroupset.ListLatestAWSTargetGroupsInput{ ListAWSTargetGroupsInput: awstargetgroupset.ListAWSTargetGroupsInput{ NS: cell.Namespace, Selector: tgSelector.String(), @@ -114,6 +114,8 @@ func Sync(config SyncInput) error { return err } + var desiredVer *semver.Version + if cell.Spec.Version != "" { ver, err := semver.Parse(cell.Spec.Version) if err != nil { @@ -121,9 +123,11 @@ func Sync(config SyncInput) error { return err } - log.Printf("Detected a manual rollback request from %s to %s", desiredVer, ver) + log.Printf("Using cell.Spec.Version(%s) instead of latest version(%s)", ver, desiredVer) desiredVer = &ver + } else { + desiredVer = latestVer } currentTGs, err := awstargetgroupset.ListAWSTargetGroups(awstargetgroupset.ListAWSTargetGroupsInput{ @@ -199,7 +203,11 @@ func Sync(config SyncInput) error { } log.Printf("Created target groups and weights to: %v", updated) - } else if currentALBConfigSpecHash != desiredALBConfigSpecHash { + + return nil + } + + if currentALBConfigSpecHash != desiredALBConfigSpecHash { metav1.SetMetaDataAnnotation(&albConfig.ObjectMeta, LabelKeyALBConfigHash, desiredALBConfigSpecHash) albConfig.Spec = desiredALBConfigSpec @@ -207,603 +215,605 @@ func Sync(config SyncInput) error { if err := runtimeClient.Update(ctx, &albConfig); err != nil { return fmt.Errorf("updating albconfig: %w", err) } - } else { - // This is a standard cell update for releasing a new app/cluster version. - // Do a canary release. - // Ensure that the previous analysis run has been successful, if any + return nil + } - var currentStableTGsWeight, currentCanaryTGsWeight, canaryTGsWeight int + // This is a standard cell update for releasing a new app/cluster version. + // Do a canary release. - var ( - stableTGs []okrav1alpha1.ForwardTargetGroup - canaryTGs []okrav1alpha1.ForwardTargetGroup - ) + // Ensure that the previous analysis run has been successful, if any - for _, tg := range albConfig.Spec.Listener.Rule.Forward.TargetGroups { - tg := tg + var currentStableTGsWeight, currentCanaryTGsWeight, canaryTGsWeight int - if _, ok := desiredTGs[tg.Name]; ok { - currentCanaryTGsWeight += tg.Weight - canaryTGs = append(canaryTGs, tg) - continue - } + var ( + stableTGs []okrav1alpha1.ForwardTargetGroup + canaryTGs []okrav1alpha1.ForwardTargetGroup + ) - stableTGs = append(stableTGs, tg) + for _, tg := range albConfig.Spec.Listener.Rule.Forward.TargetGroups { + tg := tg - currentStableTGsWeight += tg.Weight + if _, ok := desiredTGs[tg.Name]; ok { + currentCanaryTGsWeight += tg.Weight + canaryTGs = append(canaryTGs, tg) + continue } - var ( - desiredAndCanaryAreSameVersion bool - rollbackRequested bool - ) - - if len(canaryTGs) > 0 { - for _, tg := range canaryTGs { - ver := currentTGNameToVer[tg.Name] - if ver == desiredVer.String() { - desiredAndCanaryAreSameVersion = true - break - } - - currentVer, err := semver.Parse(ver) - if err != nil { - log.Printf("Skipped incorrect label value %s: %v", ver, err) - continue - } + stableTGs = append(stableTGs, tg) - if desiredVer.LT(currentVer) { - rollbackRequested = true - } - } - } - - // Do update immediately without analysis or step update when - // it seems to have been triggered by an additional cluster that might have been - // added to deal with more load. - scaleRequested := desiredAndCanaryAreSameVersion && len(desiredTGs) != len(canaryTGs) + currentStableTGsWeight += tg.Weight + } - if rollbackRequested || scaleRequested { - // Immediately update LB config as quickly as possible when - // either a rollback or a scale in/out is requested. + var ( + desiredAndCanaryAreSameVersion bool + rollbackRequested bool + ) - albConfig.Spec.Listener.Rule.Forward.TargetGroups = nil - for _, tg := range desiredTGs { - albConfig.Spec.Listener.Rule.Forward.TargetGroups = append(albConfig.Spec.Listener.Rule.Forward.TargetGroups, tg) + if len(canaryTGs) > 0 { + for _, tg := range canaryTGs { + ver := currentTGNameToVer[tg.Name] + if ver == desiredVer.String() { + desiredAndCanaryAreSameVersion = true + break } - if err := runtimeClient.Update(ctx, &albConfig); err != nil { - return fmt.Errorf("updating albconfig: %w", err) + currentVer, err := semver.Parse(ver) + if err != nil { + log.Printf("Skipped incorrect label value %s: %v", ver, err) + continue } - updated := make(map[string]int) - for _, tg := range desiredTGs { - updated[tg.Name] = tg.Weight + if desiredVer.LT(currentVer) { + rollbackRequested = true } + } + } - log.Printf("Updated target groups and weights to: %v", updated) + // Do update immediately without analysis or step update when + // it seems to have been triggered by an additional cluster that might have been + // added to deal with more load. + scaleRequested := desiredAndCanaryAreSameVersion && len(desiredTGs) != len(canaryTGs) - if rollbackRequested { - log.Printf("Finished rollback") - } else if scaleRequested { - log.Printf("Finished scaling") - } + if rollbackRequested || scaleRequested { + // Immediately update LB config as quickly as possible when + // either a rollback or a scale in/out is requested. + + albConfig.Spec.Listener.Rule.Forward.TargetGroups = nil + for _, tg := range desiredTGs { + albConfig.Spec.Listener.Rule.Forward.TargetGroups = append(albConfig.Spec.Listener.Rule.Forward.TargetGroups, tg) + } - return nil + if err := runtimeClient.Update(ctx, &albConfig); err != nil { + return fmt.Errorf("updating albconfig: %w", err) } - var updatedTGs []okrav1alpha1.ForwardTargetGroup + updated := make(map[string]int) + for _, tg := range desiredTGs { + updated[tg.Name] = tg.Weight + } - var ( - passedAllCanarySteps bool - anyAnalysisRunFailed bool - experimentFailed bool - desiredVerIsBlocked bool - ) + log.Printf("Updated target groups and weights to: %v", updated) - // TODO Use client.MatchingLabels? - ownedByCellLabelSelector, err := labels.Parse(LabelKeyCell + "=" + cell.Name) - if err != nil { - return err + if rollbackRequested { + log.Printf("Finished rollback") + } else if scaleRequested { + log.Printf("Finished scaling") } - desiredStableTGsWeight := 100 + return nil + } - var bl okrav1alpha1.VersionBlocklist + var updatedTGs []okrav1alpha1.ForwardTargetGroup - if err := runtimeClient.Get(ctx, types.NamespacedName{Namespace: cell.Namespace, Name: cell.Name}, &bl); err != nil { - if !kerrors.IsNotFound(err) { - return err - } + var ( + passedAllCanarySteps bool + anyAnalysisRunFailed bool + experimentFailed bool + desiredVerIsBlocked bool + ) + + // TODO Use client.MatchingLabels? + ownedByCellLabelSelector, err := labels.Parse(LabelKeyCell + "=" + cell.Name) + if err != nil { + return err + } + + desiredStableTGsWeight := 100 + + var bl okrav1alpha1.VersionBlocklist + + if err := runtimeClient.Get(ctx, types.NamespacedName{Namespace: cell.Namespace, Name: cell.Name}, &bl); err != nil { + if !kerrors.IsNotFound(err) { + return err } + } - for _, item := range bl.Spec.Items { - if item.Version == desiredVer.String() { - desiredVerIsBlocked = true - break - } + for _, item := range bl.Spec.Items { + if item.Version == desiredVer.String() { + desiredVerIsBlocked = true + break } + } - if !desiredVerIsBlocked { - canarySteps := cell.Spec.UpdateStrategy.Canary.Steps + if !desiredVerIsBlocked { + canarySteps := cell.Spec.UpdateStrategy.Canary.Steps - passedAllCanarySteps = currentCanaryTGsWeight == 100 + passedAllCanarySteps = currentCanaryTGsWeight == 100 - if len(canarySteps) > 0 && !passedAllCanarySteps { - var analysisRunList rolloutsv1alpha1.AnalysisRunList + if len(canarySteps) > 0 && !passedAllCanarySteps { + var analysisRunList rolloutsv1alpha1.AnalysisRunList - if err := runtimeClient.List(ctx, &analysisRunList, &client.ListOptions{ - LabelSelector: ownedByCellLabelSelector, - }); err != nil { - return err - } + if err := runtimeClient.List(ctx, &analysisRunList, &client.ListOptions{ + LabelSelector: ownedByCellLabelSelector, + }); err != nil { + return err + } - var maxSuccessfulAnalysisRunStepIndex int - for _, ar := range analysisRunList.Items { - if ar.Status.Phase.Completed() { - stepIndexStr, ok := ar.Labels[LabelKeyStepIndex] - if !ok { - log.Printf("AnalysisRun %q does not have as step-index label. Perhaps this is not the one managed by okra? Skipping.", ar.Name) - continue - } - stepIndex, err := strconv.Atoi(stepIndexStr) - if err != nil { - return fmt.Errorf("parsing step index %q: %v", stepIndexStr, err) - } + var maxSuccessfulAnalysisRunStepIndex int + for _, ar := range analysisRunList.Items { + if ar.Status.Phase.Completed() { + stepIndexStr, ok := ar.Labels[LabelKeyStepIndex] + if !ok { + log.Printf("AnalysisRun %q does not have as step-index label. Perhaps this is not the one managed by okra? Skipping.", ar.Name) + continue + } + stepIndex, err := strconv.Atoi(stepIndexStr) + if err != nil { + return fmt.Errorf("parsing step index %q: %v", stepIndexStr, err) + } - if stepIndex > maxSuccessfulAnalysisRunStepIndex { - maxSuccessfulAnalysisRunStepIndex = stepIndex - } + if stepIndex > maxSuccessfulAnalysisRunStepIndex { + maxSuccessfulAnalysisRunStepIndex = stepIndex } } + } - STEPS: - for stepIndex, step := range canarySteps { - stepIndexStr := strconv.Itoa(stepIndex) + STEPS: + for stepIndex, step := range canarySteps { + stepIndexStr := strconv.Itoa(stepIndex) - if step.Analysis != nil { - // - // Ensure that the previous analysis run has been successful, if any - // + if step.Analysis != nil { + // + // Ensure that the previous analysis run has been successful, if any + // - var analysisRunList rolloutsv1alpha1.AnalysisRunList + var analysisRunList rolloutsv1alpha1.AnalysisRunList - labelSelector, err := labels.Parse(LabelKeyStepIndex + "=" + stepIndexStr) - if err != nil { - return err - } + labelSelector, err := labels.Parse(LabelKeyStepIndex + "=" + stepIndexStr) + if err != nil { + return err + } - if err := runtimeClient.List(ctx, &analysisRunList, &client.ListOptions{ - LabelSelector: labelSelector, - }); err != nil { - return err - } + if err := runtimeClient.List(ctx, &analysisRunList, &client.ListOptions{ + LabelSelector: labelSelector, + }); err != nil { + return err + } - switch len(analysisRunList.Items) { - case 0: - tmpl := step.Analysis.Templates[0] + switch len(analysisRunList.Items) { + case 0: + tmpl := step.Analysis.Templates[0] - var args []rolloutsv1alpha1.Argument - argsMap := make(map[string]rolloutsv1alpha1.Argument) + var args []rolloutsv1alpha1.Argument + argsMap := make(map[string]rolloutsv1alpha1.Argument) - var at rolloutsv1alpha1.AnalysisTemplate - nsName := types.NamespacedName{Namespace: cell.Namespace, Name: tmpl.TemplateName} - if err := runtimeClient.Get(ctx, nsName, &at); err != nil { - log.Printf("Failed getting analysistemplate %s: %v", nsName, err) - return err - } + var at rolloutsv1alpha1.AnalysisTemplate + nsName := types.NamespacedName{Namespace: cell.Namespace, Name: tmpl.TemplateName} + if err := runtimeClient.Get(ctx, nsName, &at); err != nil { + log.Printf("Failed getting analysistemplate %s: %v", nsName, err) + return err + } - for _, a := range at.Spec.Args { - argsMap[a.Name] = *a.DeepCopy() - } + for _, a := range at.Spec.Args { + argsMap[a.Name] = *a.DeepCopy() + } - for _, a := range step.Analysis.Args { - fromTemplate, ok := argsMap[a.Name] - if ok { - if a.Value != "" { - fromTemplate.Value = &a.Value - } - argsMap[a.Name] = fromTemplate - } else { - arg := rolloutsv1alpha1.Argument{ - Name: a.Name, - } - - if a.Value != "" { - arg.Value = &a.Value - } - - argsMap[a.Name] = arg + for _, a := range step.Analysis.Args { + fromTemplate, ok := argsMap[a.Name] + if ok { + if a.Value != "" { + fromTemplate.Value = &a.Value + } + argsMap[a.Name] = fromTemplate + } else { + arg := rolloutsv1alpha1.Argument{ + Name: a.Name, } - } - for _, a := range argsMap { - args = append(args, a) - } + if a.Value != "" { + arg.Value = &a.Value + } - ar := rolloutsv1alpha1.AnalysisRun{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: cell.Namespace, - Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, tmpl.TemplateName), - Labels: map[string]string{ - LabelKeyStepIndex: stepIndexStr, - LabelKeyCell: cell.Name, - }, - }, - Spec: rolloutsv1alpha1.AnalysisRunSpec{ - Args: args, - Metrics: at.Spec.Metrics, - }, - } - if err := ctrl.SetControllerReference(&cell, &ar, scheme); err != nil { - log.Printf("Failed setting controller reference on %s/%s: %v", ar.Namespace, ar.Name, err) + argsMap[a.Name] = arg } + } - if err := runtimeClient.Create(ctx, &ar); err != nil { - return err - } + for _, a := range argsMap { + args = append(args, a) + } - log.Printf("Created analysisrun %s", ar.Name) + ar := rolloutsv1alpha1.AnalysisRun{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cell.Namespace, + Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, tmpl.TemplateName), + Labels: map[string]string{ + LabelKeyStepIndex: stepIndexStr, + LabelKeyCell: cell.Name, + }, + }, + Spec: rolloutsv1alpha1.AnalysisRunSpec{ + Args: args, + Metrics: at.Spec.Metrics, + }, + } + if err := ctrl.SetControllerReference(&cell, &ar, scheme); err != nil { + log.Printf("Failed setting controller reference on %s/%s: %v", ar.Namespace, ar.Name, err) + } - break STEPS - case 1: - for _, ar := range analysisRunList.Items { - if ar.Status.Phase != rolloutsv1alpha1.AnalysisPhaseSuccessful { - if ar.Status.Phase == rolloutsv1alpha1.AnalysisPhaseFailed { - // TODO Suspend and mark it as permanent failure when analysis run timed out - log.Printf("AnalysisRun %s failed", ar.Name) + if err := runtimeClient.Create(ctx, &ar); err != nil { + return err + } - anyAnalysisRunFailed = true - break STEPS - } + log.Printf("Created analysisrun %s", ar.Name) - log.Printf("Waiting for analysisrun %s of %s to become %s", ar.Name, ar.Status.Phase, rolloutsv1alpha1.AnalysisPhaseSuccessful) + break STEPS + case 1: + for _, ar := range analysisRunList.Items { + if ar.Status.Phase != rolloutsv1alpha1.AnalysisPhaseSuccessful { + if ar.Status.Phase == rolloutsv1alpha1.AnalysisPhaseFailed { + // TODO Suspend and mark it as permanent failure when analysis run timed out + log.Printf("AnalysisRun %s failed", ar.Name) - // We need to wait for this analysis run to succeed + anyAnalysisRunFailed = true break STEPS } - } - default: - return errors.New("too many analysis runs") - } - } else if step.Experiment != nil { - // - // Ensure that the previous experiments has been successful, if any - // - var experimentList rolloutsv1alpha1.ExperimentList + log.Printf("Waiting for analysisrun %s of %s to become %s", ar.Name, ar.Status.Phase, rolloutsv1alpha1.AnalysisPhaseSuccessful) - labelSelector, err := labels.Parse(LabelKeyStepIndex + "=" + stepIndexStr) - if err != nil { - return err + // We need to wait for this analysis run to succeed + break STEPS + } } + default: + return errors.New("too many analysis runs") + } + } else if step.Experiment != nil { + // + // Ensure that the previous experiments has been successful, if any + // - if err := runtimeClient.List(ctx, &experimentList, &client.ListOptions{ - LabelSelector: labelSelector, - }); err != nil { - return err - } + var experimentList rolloutsv1alpha1.ExperimentList - numExperiments := len(experimentList.Items) + labelSelector, err := labels.Parse(LabelKeyStepIndex + "=" + stepIndexStr) + if err != nil { + return err + } - if numExperiments == 0 { - exTemplate := step.Experiment + if err := runtimeClient.List(ctx, &experimentList, &client.ListOptions{ + LabelSelector: labelSelector, + }); err != nil { + return err + } - d := exTemplate.Duration + numExperiments := len(experimentList.Items) - var templates []rolloutsv1alpha1.TemplateSpec + if numExperiments == 0 { + exTemplate := step.Experiment - for _, t := range exTemplate.Templates { - var rs appsv1.ReplicaSet - nsName := types.NamespacedName{Namespace: cell.Namespace, Name: string(t.SpecRef)} - if err := runtimeClient.Get(ctx, nsName, &rs); err != nil { - log.Printf("Failed getting experiment template replicaset %s: %v", nsName, err) - return err - } + d := exTemplate.Duration - templates = append(templates, rolloutsv1alpha1.TemplateSpec{ - Name: t.Name, - Replicas: t.Replicas, - Selector: t.Selector, - Template: rs.Spec.Template, - }) - } + var templates []rolloutsv1alpha1.TemplateSpec - var analyses []rolloutsv1alpha1.ExperimentAnalysisTemplateRef - for _, a := range exTemplate.Analyses { - var args []rolloutsv1alpha1.Argument - for _, arg := range a.Args { - args = append(args, rolloutsv1alpha1.Argument{ - Name: arg.Name, - // TODO - Value: &arg.Value, - }) - } - analyses = append(analyses, rolloutsv1alpha1.ExperimentAnalysisTemplateRef{ - Name: a.Name, - TemplateName: a.TemplateName, - Args: args, - RequiredForCompletion: a.RequiredForCompletion, - }) + for _, t := range exTemplate.Templates { + var rs appsv1.ReplicaSet + nsName := types.NamespacedName{Namespace: cell.Namespace, Name: string(t.SpecRef)} + if err := runtimeClient.Get(ctx, nsName, &rs); err != nil { + log.Printf("Failed getting experiment template replicaset %s: %v", nsName, err) + return err } - ex := rolloutsv1alpha1.Experiment{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: cell.Namespace, - Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, "experiment"), - Labels: map[string]string{ - LabelKeyStepIndex: stepIndexStr, - LabelKeyCell: cell.Name, - }, - }, - Spec: rolloutsv1alpha1.ExperimentSpec{ - Duration: d, - Templates: templates, - Analyses: analyses, - }, - } - if err := ctrl.SetControllerReference(&cell, &ex, scheme); err != nil { - log.Printf("Failed setting controller reference on %s/%s: %v", ex.Namespace, ex.Name, err) - } + templates = append(templates, rolloutsv1alpha1.TemplateSpec{ + Name: t.Name, + Replicas: t.Replicas, + Selector: t.Selector, + Template: rs.Spec.Template, + }) + } - if err := runtimeClient.Create(ctx, &ex); err != nil { - return err + var analyses []rolloutsv1alpha1.ExperimentAnalysisTemplateRef + for _, a := range exTemplate.Analyses { + var args []rolloutsv1alpha1.Argument + for _, arg := range a.Args { + args = append(args, rolloutsv1alpha1.Argument{ + Name: arg.Name, + // TODO + Value: &arg.Value, + }) } + analyses = append(analyses, rolloutsv1alpha1.ExperimentAnalysisTemplateRef{ + Name: a.Name, + TemplateName: a.TemplateName, + Args: args, + RequiredForCompletion: a.RequiredForCompletion, + }) + } - log.Printf("Created experiment %s", ex.Name) + ex := rolloutsv1alpha1.Experiment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cell.Namespace, + Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, "experiment"), + Labels: map[string]string{ + LabelKeyStepIndex: stepIndexStr, + LabelKeyCell: cell.Name, + }, + }, + Spec: rolloutsv1alpha1.ExperimentSpec{ + Duration: d, + Templates: templates, + Analyses: analyses, + }, + } + if err := ctrl.SetControllerReference(&cell, &ex, scheme); err != nil { + log.Printf("Failed setting controller reference on %s/%s: %v", ex.Namespace, ex.Name, err) + } - break STEPS + if err := runtimeClient.Create(ctx, &ex); err != nil { + return err } - if numExperiments == 1 { - for _, ex := range experimentList.Items { - if ex.Status.Phase != rolloutsv1alpha1.AnalysisPhaseSuccessful { - if ex.Status.Phase == rolloutsv1alpha1.AnalysisPhaseFailed { - // TODO Suspend and mark it as permanent failure when experiment timed out - log.Printf("Experiment %s failed", ex.Name) + log.Printf("Created experiment %s", ex.Name) - experimentFailed = true - break STEPS - } + break STEPS + } - log.Printf("Waiting for experiment %s of %s to become %s", ex.Name, ex.Status.Phase, rolloutsv1alpha1.AnalysisPhaseSuccessful) + if numExperiments == 1 { + for _, ex := range experimentList.Items { + if ex.Status.Phase != rolloutsv1alpha1.AnalysisPhaseSuccessful { + if ex.Status.Phase == rolloutsv1alpha1.AnalysisPhaseFailed { + // TODO Suspend and mark it as permanent failure when experiment timed out + log.Printf("Experiment %s failed", ex.Name) - // We need to wait for this analysis run to succeed + experimentFailed = true break STEPS } + + log.Printf("Waiting for experiment %s of %s to become %s", ex.Name, ex.Status.Phase, rolloutsv1alpha1.AnalysisPhaseSuccessful) + + // We need to wait for this analysis run to succeed + break STEPS } } + } - return errors.New("too many experiments") - } else if step.SetWeight != nil { - desiredStableTGsWeight -= int(*step.SetWeight) + return errors.New("too many experiments") + } else if step.SetWeight != nil { + desiredStableTGsWeight -= int(*step.SetWeight) - if desiredStableTGsWeight < currentStableTGsWeight { - break STEPS - } - } else if step.Pause != nil { - // TODO List Pause resource and break if it isn't expired yet - var pauseList okrav1alpha1.PauseList + if desiredStableTGsWeight < currentStableTGsWeight { + break STEPS + } + } else if step.Pause != nil { + // TODO List Pause resource and break if it isn't expired yet + var pauseList okrav1alpha1.PauseList - ns := cell.Namespace + ns := cell.Namespace - labels := map[string]string{ - LabelKeyStepIndex: stepIndexStr, - LabelKeyCell: cell.Name, - } + labels := map[string]string{ + LabelKeyStepIndex: stepIndexStr, + LabelKeyCell: cell.Name, + } - if err := runtimeClient.List(ctx, &pauseList, client.InNamespace(ns), client.MatchingLabels(labels)); err != nil { - return err + if err := runtimeClient.List(ctx, &pauseList, client.InNamespace(ns), client.MatchingLabels(labels)); err != nil { + return err + } + + switch c := len(pauseList.Items); c { + case 0: + t := metav1.Time{ + Time: time.Now().Add(time.Duration(time.Second.Nanoseconds() * int64(step.Pause.DurationSeconds()))), } - switch c := len(pauseList.Items); c { - case 0: - t := metav1.Time{ - Time: time.Now().Add(time.Duration(time.Second.Nanoseconds() * int64(step.Pause.DurationSeconds()))), - } + pause := okrav1alpha1.Pause{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, "pause"), + Labels: labels, + }, + Spec: okrav1alpha1.PauseSpec{ + ExpireTime: t, + }, + } + ctrl.SetControllerReference(&cell, &pause, scheme) - pause := okrav1alpha1.Pause{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: ns, - Name: fmt.Sprintf("%s-%d-%s", cell.Name, stepIndex, "pause"), - Labels: labels, - }, - Spec: okrav1alpha1.PauseSpec{ - ExpireTime: t, - }, - } - ctrl.SetControllerReference(&cell, &pause, scheme) + if err := runtimeClient.Create(ctx, &pause); err != nil { + return err + } - if err := runtimeClient.Create(ctx, &pause); err != nil { - return err - } + log.Printf("Initiated pause %s until %s", pause.Name, t) - log.Printf("Initiated pause %s until %s", pause.Name, t) + break STEPS + case 1: + pause := pauseList.Items[0] + switch phase := pause.Status.Phase; phase { + case okrav1alpha1.PausePhaseCancelled: + log.Printf("Observed that pause %s had been cancelled. Continuing to the next step", pause.Name) + case okrav1alpha1.PausePhaseExpired: + log.Printf("Observed that pause %s had expired. Continuing to the next step", pause.Name) + case okrav1alpha1.PausePhaseStarted: + log.Printf("Still waiting for pause %s to expire or get cancelled", pause.Name) + break STEPS + case "": + log.Printf("Still waiting for pause %s to start", pause.Name) break STEPS - case 1: - pause := pauseList.Items[0] - - switch phase := pause.Status.Phase; phase { - case okrav1alpha1.PausePhaseCancelled: - log.Printf("Observed that pause %s had been cancelled. Continuing to the next step", pause.Name) - case okrav1alpha1.PausePhaseExpired: - log.Printf("Observed that pause %s had expired. Continuing to the next step", pause.Name) - case okrav1alpha1.PausePhaseStarted: - log.Printf("Still waiting for pause %s to expire or get cancelled", pause.Name) - break STEPS - case "": - log.Printf("Still waiting for pause %s to start", pause.Name) - break STEPS - default: - return fmt.Errorf("unexpected pause phase: %s", phase) - } default: - return fmt.Errorf("unexpected number of pauses found: %d", c) + return fmt.Errorf("unexpected pause phase: %s", phase) } - } else { - return fmt.Errorf("steps[%d]: only setWeight, analysis, and pause step are supported. got %v", stepIndex, step) - } - - if stepIndex+1 == len(canarySteps) { - passedAllCanarySteps = true + default: + return fmt.Errorf("unexpected number of pauses found: %d", c) } + } else { + return fmt.Errorf("steps[%d]: only setWeight, analysis, and pause step are supported. got %v", stepIndex, step) } - } - if passedAllCanarySteps || len(canarySteps) == 0 { - desiredStableTGsWeight = 0 + if stepIndex+1 == len(canarySteps) { + passedAllCanarySteps = true + } } + } - if anyAnalysisRunFailed || experimentFailed { - desiredStableTGsWeight = 100 - } + if passedAllCanarySteps || len(canarySteps) == 0 { + desiredStableTGsWeight = 0 + } - if desiredStableTGsWeight < 0 { - return fmt.Errorf("stable tgs weight cannot be less than 0: %v", desiredStableTGsWeight) - } + if anyAnalysisRunFailed || experimentFailed { + desiredStableTGsWeight = 100 + } - log.Printf("stable weight: %d -> %d\n", currentStableTGsWeight, desiredStableTGsWeight) + if desiredStableTGsWeight < 0 { + return fmt.Errorf("stable tgs weight cannot be less than 0: %v", desiredStableTGsWeight) + } - // Do update by step weight + log.Printf("stable weight: %d -> %d\n", currentStableTGsWeight, desiredStableTGsWeight) - if desiredStableTGsWeight > 0 { - numStableTGs := len(stableTGs) + // Do update by step weight - updatedStableTGs := map[string]okrav1alpha1.ForwardTargetGroup{} + if desiredStableTGsWeight > 0 { + numStableTGs := len(stableTGs) - for i, tg := range stableTGs { - tg := tg + updatedStableTGs := map[string]okrav1alpha1.ForwardTargetGroup{} - weight := desiredStableTGsWeight / numStableTGs + for i, tg := range stableTGs { + tg := tg - if i == numStableTGs-1 && numStableTGs > 1 { - weight = desiredStableTGsWeight - (weight * (numStableTGs - 1)) - } + weight := desiredStableTGsWeight / numStableTGs - updatedStableTGs[tg.Name] = okrav1alpha1.ForwardTargetGroup{ - Name: tg.Name, - ARN: tg.ARN, - Weight: weight, - } + if i == numStableTGs-1 && numStableTGs > 1 { + weight = desiredStableTGsWeight - (weight * (numStableTGs - 1)) } - for _, tg := range updatedStableTGs { - updatedTGs = append(updatedTGs, tg) + updatedStableTGs[tg.Name] = okrav1alpha1.ForwardTargetGroup{ + Name: tg.Name, + ARN: tg.ARN, + Weight: weight, } } - canaryTGsWeight = 100 - desiredStableTGsWeight + for _, tg := range updatedStableTGs { + updatedTGs = append(updatedTGs, tg) + } + } + + canaryTGsWeight = 100 - desiredStableTGsWeight - if canaryTGsWeight > 0 { - var canaryVersion string - for _, tg := range latestTGs { - for _, l := range labelKeys { - v, ok := tg.Labels[l] - if ok { - canaryVersion = v - break - } + if canaryTGsWeight > 0 { + var canaryVersion string + for _, tg := range latestTGs { + for _, l := range labelKeys { + v, ok := tg.Labels[l] + if ok { + canaryVersion = v + break } } - log.Printf("canary(%s) weight: %d -> %d\n", canaryVersion, currentCanaryTGsWeight, canaryTGsWeight) + } + log.Printf("canary(%s) weight: %d -> %d\n", canaryVersion, currentCanaryTGsWeight, canaryTGsWeight) - updatedCanatyTGs := map[string]okrav1alpha1.ForwardTargetGroup{} + updatedCanatyTGs := map[string]okrav1alpha1.ForwardTargetGroup{} - for i, tg := range latestTGs { - weight := canaryTGsWeight / numLatestTGs + for i, tg := range latestTGs { + weight := canaryTGsWeight / numLatestTGs - if i == numLatestTGs-1 && numLatestTGs > 1 { - weight = canaryTGsWeight - (weight * (numLatestTGs - 1)) - } - - updatedCanatyTGs[tg.Name] = okrav1alpha1.ForwardTargetGroup{ - Name: tg.Name, - ARN: tg.Spec.ARN, - Weight: weight, - } + if i == numLatestTGs-1 && numLatestTGs > 1 { + weight = canaryTGsWeight - (weight * (numLatestTGs - 1)) } - for _, tg := range updatedCanatyTGs { - updatedTGs = append(updatedTGs, tg) + updatedCanatyTGs[tg.Name] = okrav1alpha1.ForwardTargetGroup{ + Name: tg.Name, + ARN: tg.Spec.ARN, + Weight: weight, } } - } - updated := make(map[string]int) - for _, tg := range updatedTGs { - updated[tg.Name] = tg.Weight + for _, tg := range updatedCanatyTGs { + updatedTGs = append(updatedTGs, tg) + } } + } - log.Printf("updating target groups and weights to: %v\n", updated) + updated := make(map[string]int) + for _, tg := range updatedTGs { + updated[tg.Name] = tg.Weight + } - albConfig.Spec.Listener.Rule.Forward.TargetGroups = updatedTGs + log.Printf("updating target groups and weights to: %v\n", updated) - if err := runtimeClient.Update(ctx, &albConfig); err != nil { - return err - } - - if anyAnalysisRunFailed { - var bl okrav1alpha1.VersionBlocklist + albConfig.Spec.Listener.Rule.Forward.TargetGroups = updatedTGs - item := okrav1alpha1.VersionBlocklistItem{ - Version: desiredVer.String(), - Cause: "AnalysisRun failed", - } + if err := runtimeClient.Update(ctx, &albConfig); err != nil { + return err + } - if err := runtimeClient.Get(ctx, types.NamespacedName{Namespace: cell.Namespace, Name: cell.Name}, &bl); err != nil { - if !kerrors.IsNotFound(err) { - return err - } + if anyAnalysisRunFailed { + var bl okrav1alpha1.VersionBlocklist - bl = okrav1alpha1.VersionBlocklist{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: cell.Namespace, - Name: cell.Name, - }, - Spec: okrav1alpha1.VersionBlocklistSpec{ - Items: []okrav1alpha1.VersionBlocklistItem{ - item, - }, - }, - } - if err := runtimeClient.Create(ctx, &bl); err != nil { - return err - } - } else { - bl.Spec.Items = append(bl.Spec.Items, item) + item := okrav1alpha1.VersionBlocklistItem{ + Version: desiredVer.String(), + Cause: "AnalysisRun failed", + } - if err := runtimeClient.Update(ctx, &bl); err != nil { - return err - } + if err := runtimeClient.Get(ctx, types.NamespacedName{Namespace: cell.Namespace, Name: cell.Name}, &bl); err != nil { + if !kerrors.IsNotFound(err) { + return err } - } - if desiredStableTGsWeight == 0 && passedAllCanarySteps || anyAnalysisRunFailed || desiredVerIsBlocked { - // Seems like we need to explicitly specify the namespace with client.InNamespace. - // Otherwise it results in `Error: the server could not find the requested resource (delete analysisruns.argoproj.io)` - if err := runtimeClient.DeleteAllOf(ctx, &rolloutsv1alpha1.AnalysisRun{}, client.InNamespace(cell.Namespace), &client.DeleteAllOfOptions{ - ListOptions: client.ListOptions{ - LabelSelector: ownedByCellLabelSelector, + bl = okrav1alpha1.VersionBlocklist{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cell.Namespace, + Name: cell.Name, }, - }); err != nil { - log.Printf("Failed deleting analysis runs: %v", err) + Spec: okrav1alpha1.VersionBlocklistSpec{ + Items: []okrav1alpha1.VersionBlocklistItem{ + item, + }, + }, + } + if err := runtimeClient.Create(ctx, &bl); err != nil { return err } + } else { + bl.Spec.Items = append(bl.Spec.Items, item) - log.Printf("Deleted all analysis runs with %s, if any", ownedByCellLabelSelector) - - if err := runtimeClient.DeleteAllOf(ctx, &okrav1alpha1.Pause{}, client.InNamespace(cell.Namespace), &client.DeleteAllOfOptions{ - ListOptions: client.ListOptions{ - LabelSelector: ownedByCellLabelSelector, - }, - }); err != nil { + if err := runtimeClient.Update(ctx, &bl); err != nil { return err } + } + } - log.Printf("Deleted all pauses with %s as completed, if any", ownedByCellLabelSelector) + if desiredStableTGsWeight == 0 && passedAllCanarySteps || anyAnalysisRunFailed || desiredVerIsBlocked { + // Seems like we need to explicitly specify the namespace with client.InNamespace. + // Otherwise it results in `Error: the server could not find the requested resource (delete analysisruns.argoproj.io)` + if err := runtimeClient.DeleteAllOf(ctx, &rolloutsv1alpha1.AnalysisRun{}, client.InNamespace(cell.Namespace), &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + LabelSelector: ownedByCellLabelSelector, + }, + }); err != nil { + log.Printf("Failed deleting analysis runs: %v", err) + return err + } + + log.Printf("Deleted all analysis runs with %s, if any", ownedByCellLabelSelector) + + if err := runtimeClient.DeleteAllOf(ctx, &okrav1alpha1.Pause{}, client.InNamespace(cell.Namespace), &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + LabelSelector: ownedByCellLabelSelector, + }, + }); err != nil { + return err } + + log.Printf("Deleted all pauses with %s as completed, if any", ownedByCellLabelSelector) } return nil