-
Notifications
You must be signed in to change notification settings - Fork 47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow marking releases stuck in a pending state as failed #116
base: main
Are you sure you want to change the base?
Changes from 4 commits
2961ecb
3deb69b
aa12c68
a98ee77
44e9cd1
3ef3256
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,6 +77,7 @@ type Reconciler struct { | |
skipDependentWatches bool | ||
maxConcurrentReconciles int | ||
reconcilePeriod time.Duration | ||
markFailedAfter time.Duration | ||
maxHistory int | ||
|
||
annotSetupOnce sync.Once | ||
|
@@ -297,6 +298,18 @@ func WithMaxReleaseHistory(maxHistory int) Option { | |
} | ||
} | ||
|
||
// WithMarkFailedAfter specifies the duration after which the reconciler will mark a release in a pending (locked) | ||
// state as false in order to allow rolling forward. | ||
func WithMarkFailedAfter(duration time.Duration) Option { | ||
return func(r *Reconciler) error { | ||
if duration < 0 { | ||
return errors.New("auto-rollback after duration must not be negative") | ||
} | ||
r.markFailedAfter = duration | ||
return nil | ||
} | ||
} | ||
|
||
// WithInstallAnnotations is an Option that configures Install annotations | ||
// to enable custom action.Install fields to be set based on the value of | ||
// annotations found in the custom resource watched by this reconciler. | ||
|
@@ -531,6 +544,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl. | |
) | ||
return ctrl.Result{}, err | ||
} | ||
if state == statePending { | ||
return r.handlePending(actionClient, rel, &u, log) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @joelanford To be honest, I don't get to a good solution here and would like that you take a decision here. 1. handlePending in actionClient 2. Moving handlePending to the |
||
} | ||
|
||
u.UpdateStatus(updater.EnsureCondition(conditions.Irreconcilable(corev1.ConditionFalse, "", ""))) | ||
|
||
for _, h := range r.preHooks { | ||
|
@@ -597,6 +614,7 @@ const ( | |
stateNeedsInstall helmReleaseState = "needs install" | ||
stateNeedsUpgrade helmReleaseState = "needs upgrade" | ||
stateUnchanged helmReleaseState = "unchanged" | ||
statePending helmReleaseState = "pending" | ||
stateError helmReleaseState = "error" | ||
) | ||
|
||
|
@@ -645,6 +663,10 @@ func (r *Reconciler) getReleaseState(client helmclient.ActionInterface, obj meta | |
return nil, stateNeedsInstall, nil | ||
} | ||
|
||
if currentRelease.Info != nil && currentRelease.Info.Status.IsPending() { | ||
return currentRelease, statePending, nil | ||
} | ||
|
||
var opts []helmclient.UpgradeOption | ||
if r.maxHistory > 0 { | ||
opts = append(opts, func(u *action.Upgrade) error { | ||
|
@@ -722,6 +744,35 @@ func (r *Reconciler) doUpgrade(actionClient helmclient.ActionInterface, u *updat | |
return rel, nil | ||
} | ||
|
||
func (r *Reconciler) handlePending(actionClient helmclient.ActionInterface, rel *release.Release, u *updater.Updater, log logr.Logger) (ctrl.Result, error) { | ||
err := r.doHandlePending(actionClient, rel, log) | ||
if err == nil { | ||
err = errors.New("unknown error handling pending release") | ||
} | ||
u.UpdateStatus( | ||
updater.EnsureCondition(conditions.Irreconcilable(corev1.ConditionTrue, conditions.ReasonPendingError, err))) | ||
return ctrl.Result{}, err | ||
} | ||
|
||
func (r *Reconciler) doHandlePending(actionClient helmclient.ActionInterface, rel *release.Release, log logr.Logger) error { | ||
if r.markFailedAfter <= 0 { | ||
return errors.New("Release is in a pending (locked) state and cannot be modified. User intervention is required.") | ||
} | ||
if rel.Info == nil || rel.Info.LastDeployed.IsZero() { | ||
return errors.New("Release is in a pending (locked) state and lacks 'last deployed' timestamp. User intervention is required.") | ||
} | ||
if pendingSince := time.Since(rel.Info.LastDeployed.Time); pendingSince < r.markFailedAfter { | ||
return fmt.Errorf("Release is in a pending (locked) state and cannot currently be modified. Release will be marked failed to allow a roll-forward in %v.", r.markFailedAfter-pendingSince) | ||
} | ||
|
||
log.Info("Marking release as failed", "releaseName", rel.Name) | ||
err := actionClient.MarkFailed(rel, fmt.Sprintf("operator marked pending (locked) release as failed after state did not change for %v", r.markFailedAfter)) | ||
if err != nil { | ||
return fmt.Errorf("Failed to mark pending (locked) release as failed: %w", err) | ||
} | ||
return fmt.Errorf("marked release %s as failed to allow upgrade to succeed in next reconcile attempt", rel.Name) | ||
} | ||
|
||
func (r *Reconciler) reportOverrideEvents(obj runtime.Object) { | ||
for k, v := range r.overrideValues { | ||
r.eventRecorder.Eventf(obj, "Warning", "ValueOverridden", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I see how it's convenient to put this functionality into the actionClient, I'm not convinced it makes a ton of sense otherwise. There's really only one way to mark a release as failed (this is it), so I'm wondering if we pull this back out of the action client interface and just put this logic directly into the reconciler.
The only missing piece I see for that is giving the Reconciler an
ActionConfigGetter
field, which would likely just involve adding the field, adding aWithActionConfigGetter
functional option, and then tweaking theaddDefaults
function slightly to handle the fact that the reconciler may already have anActionConfigGetter
setup via the new functional option.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, extracted it and now head to adding a fake implementation for the
ActionConfigGetter
. After that it should be finished. The implementation is still a bit rough though.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the delay @joelanford.
I've tried to fake the
ActionConfig
but it was more complex than expected, i.e. also interacting with thestorage.Storage
interface to call theUpdate
func for the given release.I stopped from there and wondered if it fits the abstraction if the
MarkFailed
func is renamed toUpdate
to be more generic, so theActionClient
wraps all interactions with Helm in a single struct.Also the already existing fake client can be leveraged and easily extended.
If the
Update
func does not match the expectations I would implement a fakeActionConfig
with a memory release driver in a separate PR.