diff --git a/pkg/client/actionclient.go b/pkg/client/actionclient.go index 00def322..a487f120 100644 --- a/pkg/client/actionclient.go +++ b/pkg/client/actionclient.go @@ -62,6 +62,7 @@ type ActionInterface interface { Get(name string, opts ...GetOption) (*release.Release, error) Install(name, namespace string, chrt *chart.Chart, vals map[string]interface{}, opts ...InstallOption) (*release.Release, error) Upgrade(name, namespace string, chrt *chart.Chart, vals map[string]interface{}, opts ...UpgradeOption) (*release.Release, error) + MarkFailed(release *release.Release, reason string) error Uninstall(name string, opts ...UninstallOption) (*release.UninstallReleaseResponse, error) Reconcile(rel *release.Release) error } @@ -180,6 +181,14 @@ func (c *actionClient) Upgrade(name, namespace string, chrt *chart.Chart, vals m return rel, nil } +func (c *actionClient) MarkFailed(rel *release.Release, reason string) error { + infoCopy := *rel.Info + releaseCopy := *rel + releaseCopy.Info = &infoCopy + releaseCopy.SetStatus(release.StatusFailed, reason) + return c.conf.Releases.Update(&releaseCopy) +} + func (c *actionClient) Uninstall(name string, opts ...UninstallOption) (*release.UninstallReleaseResponse, error) { uninstall := action.NewUninstall(c.conf) for _, o := range opts { diff --git a/pkg/reconciler/internal/conditions/conditions.go b/pkg/reconciler/internal/conditions/conditions.go index 55e2c656..86beb90b 100644 --- a/pkg/reconciler/internal/conditions/conditions.go +++ b/pkg/reconciler/internal/conditions/conditions.go @@ -41,6 +41,7 @@ const ( ReasonUpgradeError = status.ConditionReason("UpgradeError") ReasonReconcileError = status.ConditionReason("ReconcileError") ReasonUninstallError = status.ConditionReason("UninstallError") + ReasonPendingError = status.ConditionReason("PendingError") ) func Initialized(stat corev1.ConditionStatus, reason status.ConditionReason, message interface{}) status.Condition { diff --git a/pkg/reconciler/internal/fake/actionclient.go b/pkg/reconciler/internal/fake/actionclient.go index 92b7da63..cdf5f77f 100644 --- a/pkg/reconciler/internal/fake/actionclient.go +++ b/pkg/reconciler/internal/fake/actionclient.go @@ -48,17 +48,19 @@ func (hcg *fakeActionClientGetter) ActionClientFor(_ crclient.Object) (client.Ac } type ActionClient struct { - Gets []GetCall - Installs []InstallCall - Upgrades []UpgradeCall - Uninstalls []UninstallCall - Reconciles []ReconcileCall - - HandleGet func() (*release.Release, error) - HandleInstall func() (*release.Release, error) - HandleUpgrade func() (*release.Release, error) - HandleUninstall func() (*release.UninstallReleaseResponse, error) - HandleReconcile func() error + Gets []GetCall + Installs []InstallCall + Upgrades []UpgradeCall + MarkFaileds []MarkFailedCall + Uninstalls []UninstallCall + Reconciles []ReconcileCall + + HandleGet func() (*release.Release, error) + HandleInstall func() (*release.Release, error) + HandleUpgrade func() (*release.Release, error) + HandleMarkFailed func() error + HandleUninstall func() (*release.UninstallReleaseResponse, error) + HandleReconcile func() error } func NewActionClient() ActionClient { @@ -77,12 +79,14 @@ func NewActionClient() ActionClient { Upgrades: make([]UpgradeCall, 0), Uninstalls: make([]UninstallCall, 0), Reconciles: make([]ReconcileCall, 0), + MarkFaileds: make([]MarkFailedCall, 0), HandleGet: relFunc(errors.New("get not implemented")), HandleInstall: relFunc(errors.New("install not implemented")), HandleUpgrade: relFunc(errors.New("upgrade not implemented")), HandleUninstall: uninstFunc(errors.New("uninstall not implemented")), HandleReconcile: recFunc(errors.New("reconcile not implemented")), + HandleMarkFailed: recFunc(errors.New("mark failed not implemented")), } } @@ -109,6 +113,11 @@ type UpgradeCall struct { Opts []client.UpgradeOption } +type MarkFailedCall struct { + Release *release.Release + Reason string +} + type UninstallCall struct { Name string Opts []client.UninstallOption @@ -133,6 +142,11 @@ func (c *ActionClient) Upgrade(name, namespace string, chrt *chart.Chart, vals m return c.HandleUpgrade() } +func (c *ActionClient) MarkFailed(rel *release.Release, reason string) error { + c.MarkFaileds = append(c.MarkFaileds, MarkFailedCall{rel, reason}) + return c.HandleMarkFailed() +} + func (c *ActionClient) Uninstall(name string, opts ...client.UninstallOption) (*release.UninstallReleaseResponse, error) { c.Uninstalls = append(c.Uninstalls, UninstallCall{name, opts}) return c.HandleUninstall() diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 533cfff5..410fdbfc 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -73,6 +73,7 @@ type Reconciler struct { skipDependentWatches bool maxConcurrentReconciles int reconcilePeriod time.Duration + markFailedAfter time.Duration maxHistory int annotSetupOnce sync.Once @@ -293,6 +294,18 @@ func WithMaxReleaseHistory(maxHistory int) Option { } } +// WithMarkFailedAfter specifies the duration after which the reconciler will mark a release in a pending (locked) +// state as false in order to allow rolling forward. +func WithMarkFailedAfter(duration time.Duration) Option { + return func(r *Reconciler) error { + if duration < 0 { + return errors.New("auto-rollback after duration must not be negative") + } + r.markFailedAfter = duration + return nil + } +} + // WithInstallAnnotations is an Option that configures Install annotations // to enable custom action.Install fields to be set based on the value of // annotations found in the custom resource watched by this reconciler. @@ -490,6 +503,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl. ) return ctrl.Result{}, err } + if state == statePending { + return r.handlePending(actionClient, rel, &u, log) + } + u.UpdateStatus(updater.EnsureCondition(conditions.Irreconcilable(corev1.ConditionFalse, "", ""))) for _, h := range r.preHooks { @@ -556,6 +573,7 @@ const ( stateNeedsInstall helmReleaseState = "needs install" stateNeedsUpgrade helmReleaseState = "needs upgrade" stateUnchanged helmReleaseState = "unchanged" + statePending helmReleaseState = "pending" stateError helmReleaseState = "error" ) @@ -604,6 +622,10 @@ func (r *Reconciler) getReleaseState(client helmclient.ActionInterface, obj meta return nil, stateNeedsInstall, nil } + if currentRelease.Info != nil && currentRelease.Info.Status.IsPending() { + return currentRelease, statePending, nil + } + var opts []helmclient.UpgradeOption if r.maxHistory > 0 { opts = append(opts, func(u *action.Upgrade) error { @@ -681,6 +703,35 @@ func (r *Reconciler) doUpgrade(actionClient helmclient.ActionInterface, u *updat return rel, nil } +func (r *Reconciler) handlePending(actionClient helmclient.ActionInterface, rel *release.Release, u *updater.Updater, log logr.Logger) (ctrl.Result, error) { + err := r.doHandlePending(actionClient, rel, log) + if err == nil { + err = errors.New("unknown error handling pending release") + } + u.UpdateStatus( + updater.EnsureCondition(conditions.Irreconcilable(corev1.ConditionTrue, conditions.ReasonPendingError, err))) + return ctrl.Result{}, err +} + +func (r *Reconciler) doHandlePending(actionClient helmclient.ActionInterface, rel *release.Release, log logr.Logger) error { + if r.markFailedAfter <= 0 { + return errors.New("Release is in a pending (locked) state and cannot be modified. User intervention is required.") + } + if rel.Info == nil || rel.Info.LastDeployed.IsZero() { + return errors.New("Release is in a pending (locked) state and lacks 'last deployed' timestamp. User intervention is required.") + } + if pendingSince := time.Since(rel.Info.LastDeployed.Time); pendingSince < r.markFailedAfter { + return fmt.Errorf("Release is in a pending (locked) state and cannot currently be modified. Release will be marked failed to allow a roll-forward in %v.", r.markFailedAfter-pendingSince) + } + + log.Info("Marking release as failed", "releaseName", rel.Name) + err := actionClient.MarkFailed(rel, fmt.Sprintf("operator marked pending (locked) release as failed after state did not change for %v", r.markFailedAfter)) + if err != nil { + return fmt.Errorf("Failed to mark pending (locked) release as failed: %w", err) + } + return fmt.Errorf("marked release %s as failed to allow upgrade to succeed in next reconcile attempt", rel.Name) +} + func (r *Reconciler) reportOverrideEvents(obj runtime.Object) { for k, v := range r.overrideValues { r.eventRecorder.Eventf(obj, "Warning", "ValueOverridden", diff --git a/pkg/reconciler/reconciler_test.go b/pkg/reconciler/reconciler_test.go index 290a5f39..fd5f89be 100644 --- a/pkg/reconciler/reconciler_test.go +++ b/pkg/reconciler/reconciler_test.go @@ -58,6 +58,7 @@ import ( helmfake "github.com/operator-framework/helm-operator-plugins/pkg/reconciler/internal/fake" "github.com/operator-framework/helm-operator-plugins/pkg/sdk/controllerutil" "github.com/operator-framework/helm-operator-plugins/pkg/values" + helmTime "helm.sh/helm/v3/pkg/time" ) var _ = Describe("Reconciler", func() { @@ -372,6 +373,12 @@ var _ = Describe("Reconciler", func() { Expect(r.valueMapper.Map(chartutil.Values{})).To(Equal(chartutil.Values{"mapped": true})) }) }) + var _ = Describe("WithMarkedFailAfter", func() { + It("should set the reconciler mark failed after duration", func() { + Expect(WithMarkFailedAfter(1*time.Minute)(r)).To(Succeed()) + Expect(r.markFailedAfter).To(Equal(1*time.Minute)) + }) + }) }) var _ = Describe("Reconcile", func() { @@ -464,6 +471,65 @@ var _ = Describe("Reconciler", func() { Expect(mgr.GetClient().Create(ctx, obj)).To(Succeed()) }) + When("release is in pending state", func() { + fakeClient := helmfake.NewActionClient() + deployTime := helmTime.Now().Add(-5 * time.Minute) + exampleRelease := &release.Release{ + Name: "example-release", + Info: &release.Info{ + Status: release.StatusPendingUpgrade, + LastDeployed: deployTime, + FirstDeployed: deployTime, + }, + } + + BeforeEach(func() { + r.actionClientGetter = helmclient.ActionClientGetterFunc(func(object client.Object) (helmclient.ActionInterface, error) { + fakeClient.HandleGet = func() (*release.Release, error) { + return exampleRelease, nil + } + fakeClient.HandleMarkFailed = func() error { + return nil + } + return &fakeClient, nil + }) + }) + AfterEach(func() { + r.actionClientGetter = nil + }) + + When("time elapsed since last deployment exceeds markFailedAfter duration", func() { + It("should be marked as failed", func() { + r.markFailedAfter = 1 * time.Minute + res, err := r.Reconcile(ctx, req) + Expect(res).To(Equal(reconcile.Result{})) + Expect(err).ToNot(BeNil()) + Expect(err).To(MatchError("marked release example-release as failed to allow upgrade to succeed in next reconcile attempt")) + Expect(len(fakeClient.MarkFaileds)).Should(Equal(1)) + }) + }) + + When("markFailedAfter is disabled", func() { + It("should require user intervention", func() { + r.markFailedAfter = 0 + res, err := r.Reconcile(ctx, req) + Expect(res).To(Equal(reconcile.Result{})) + Expect(err).ToNot(BeNil()) + Expect(err).To(MatchError("Release is in a pending (locked) state and cannot be modified. User intervention is required.")) + }) + }) + + When("time since last deployment is higher than markFiledAfter duration", func () { + It("should return duration until the release will be marked as failed", func () { + r.markFailedAfter = 10*time.Minute + res, err := r.Reconcile(ctx, req) + Expect(res).To(Equal(reconcile.Result{})) + Expect(err).ToNot(BeNil()) + Expect(err.Error()).Should(ContainSubstring("Release is in a pending (locked) state and cannot currently be modified. Release will be marked failed to allow a roll-forward in")) + }) + }) + }) + When("requested CR release is not present", func() { When("action client getter is not working", func() { It("returns an error getting the action client", func() {