Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚠️ Remove RequeueAfterError #3929

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions controllers/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/noderefutil"
"sigs.k8s.io/cluster-api/controllers/remote"
capierrors "sigs.k8s.io/cluster-api/errors"
kubedrain "sigs.k8s.io/cluster-api/third_party/kubernetes-drain"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
Expand Down Expand Up @@ -333,10 +332,12 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
}

if err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); err != nil {
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
return ctrl.Result{}, err
if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil {
if err != nil {
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
}
return result, err
}

conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition)
Expand Down Expand Up @@ -489,28 +490,28 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *cl
}
}

func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) error {
func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx, "cluster", cluster.Name, "node", nodeName)

restConfig, err := remote.RESTConfig(ctx, MachineControllerName, r.Client, util.ObjectKey(cluster))
if err != nil {
log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
return nil
return ctrl.Result{}, nil
}
kubeClient, err := kubernetes.NewForConfig(restConfig)
if err != nil {
log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
return nil
return ctrl.Result{}, nil
}

node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
// If an admin deletes the node directly, we'll end up here.
log.Error(err, "Could not find node from noderef, it may have already been deleted")
return nil
return ctrl.Result{}, nil
}
return errors.Errorf("unable to get node %q: %v", nodeName, err)
return ctrl.Result{}, errors.Errorf("unable to get node %q: %v", nodeName, err)
}

drainer := &kubedrain.Helper{
Expand Down Expand Up @@ -543,17 +544,17 @@ func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cl
if err := kubedrain.RunCordonOrUncordon(ctx, drainer, node, true); err != nil {
// Machine will be re-reconciled after a cordon failure.
log.Error(err, "Cordon failed")
return errors.Errorf("unable to cordon node %s: %v", node.Name, err)
return ctrl.Result{}, errors.Errorf("unable to cordon node %s: %v", node.Name, err)
}

if err := kubedrain.RunNodeDrain(ctx, drainer, node.Name); err != nil {
// Machine will be re-reconciled after a drain failure.
log.Error(err, "Drain failed")
return &capierrors.RequeueAfterError{RequeueAfter: 20 * time.Second}
log.Error(err, "Drain failed, retry in 20s")
return ctrl.Result{RequeueAfter: 20 * time.Second}, nil
}

log.Info("Drain successful")
return nil
return ctrl.Result{}, nil
}

func (r *MachineReconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error {
Expand Down
19 changes: 12 additions & 7 deletions controllers/machine_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package controllers
import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -97,9 +96,8 @@ func (r *MachineReconciler) reconcileExternal(ctx context.Context, cluster *clus
obj, err := external.Get(ctx, r.Client, ref, m.Namespace)
if err != nil {
if apierrors.IsNotFound(errors.Cause(err)) {
return external.ReconcileOutput{}, errors.Wrapf(&capierrors.RequeueAfterError{RequeueAfter: externalReadyWait},
"could not find %v %q for Machine %q in namespace %q, requeuing",
ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
log.Info("could not find external ref, requeueing", "RefGVK", ref.GroupVersionKind(), "RefName", ref.Name, "Machine", m.Name, "Namespace", m.Namespace)
return external.ReconcileOutput{RequeueAfter: externalReadyWait}, nil
}
return external.ReconcileOutput{}, err
}
Expand Down Expand Up @@ -192,6 +190,9 @@ func (r *MachineReconciler) reconcileBootstrap(ctx context.Context, cluster *clu
if err != nil {
return ctrl.Result{}, err
}
if externalResult.RequeueAfter > 0 {
return ctrl.Result{RequeueAfter: externalResult.RequeueAfter}, nil
}
Comment on lines +193 to +195
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of external.ReconcileOutput here makes this quite confusing. Not sure I have any good suggestions on how to make it less confusing without requiring more invasive refactoring, though :(

if externalResult.Paused {
return ctrl.Result{}, nil
}
Expand Down Expand Up @@ -240,14 +241,18 @@ func (r *MachineReconciler) reconcileInfrastructure(ctx context.Context, cluster
// Call generic external reconciler.
infraReconcileResult, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
if err != nil {
if m.Status.InfrastructureReady && strings.Contains(err.Error(), "could not find") {
// Infra object went missing after the machine was up and running
return ctrl.Result{}, err
}
if infraReconcileResult.RequeueAfter > 0 {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of brittle, but TBH also the previous implementation checking the error message wasn't bullet proof...

// Infra object went missing after the machine was up and running
if m.Status.InfrastructureReady {
log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state")
m.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.InvalidConfigurationMachineError)
m.Status.FailureMessage = pointer.StringPtr(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready",
m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name))
return ctrl.Result{}, errors.Errorf("could not find %v %q for Machine %q in namespace %q, requeueing", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace)
}
return ctrl.Result{}, err
return ctrl.Result{RequeueAfter: infraReconcileResult.RequeueAfter}, nil
}
// if the external object is paused, return without any further processing
if infraReconcileResult.Paused {
Expand Down
58 changes: 32 additions & 26 deletions controllers/machine_controller_phases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,9 +587,9 @@ func TestReconcileBootstrap(t *testing.T) {
name string
bootstrapConfig map[string]interface{}
machine *clusterv1.Machine
expectResult ctrl.Result
expectError bool
expected func(g *WithT, m *clusterv1.Machine)
result *ctrl.Result
}{
{
name: "new machine, bootstrap config ready with data",
Expand All @@ -606,7 +606,8 @@ func TestReconcileBootstrap(t *testing.T) {
"dataSecretName": "secret-data",
},
},
expectError: false,
expectResult: ctrl.Result{},
expectError: false,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeTrue())
g.Expect(m.Spec.Bootstrap.DataSecretName).ToNot(BeNil())
Expand All @@ -627,7 +628,8 @@ func TestReconcileBootstrap(t *testing.T) {
"ready": true,
},
},
expectError: true,
expectResult: ctrl.Result{},
expectError: true,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeFalse())
g.Expect(m.Spec.Bootstrap.DataSecretName).To(BeNil())
Expand All @@ -645,8 +647,8 @@ func TestReconcileBootstrap(t *testing.T) {
"spec": map[string]interface{}{},
"status": map[string]interface{}{},
},
expectError: false,
result: &ctrl.Result{RequeueAfter: externalReadyWait},
expectResult: ctrl.Result{RequeueAfter: externalReadyWait},
expectError: false,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeFalse())
},
Expand All @@ -663,7 +665,8 @@ func TestReconcileBootstrap(t *testing.T) {
"spec": map[string]interface{}{},
"status": map[string]interface{}{},
},
expectError: true,
expectResult: ctrl.Result{RequeueAfter: externalReadyWait},
expectError: false,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeFalse())
},
Expand All @@ -680,7 +683,8 @@ func TestReconcileBootstrap(t *testing.T) {
"spec": map[string]interface{}{},
"status": map[string]interface{}{},
},
expectError: true,
expectResult: ctrl.Result{RequeueAfter: externalReadyWait},
expectError: false,
},
{
name: "existing machine, bootstrap data should not change",
Expand Down Expand Up @@ -716,7 +720,8 @@ func TestReconcileBootstrap(t *testing.T) {
BootstrapReady: true,
},
},
expectError: false,
expectResult: ctrl.Result{},
expectError: false,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeTrue())
g.Expect(*m.Spec.Bootstrap.DataSecretName).To(BeEquivalentTo("secret-data"))
Expand Down Expand Up @@ -763,8 +768,8 @@ func TestReconcileBootstrap(t *testing.T) {
BootstrapReady: true,
},
},
expectError: false,
result: &ctrl.Result{RequeueAfter: externalReadyWait},
expectResult: ctrl.Result{RequeueAfter: externalReadyWait},
expectError: false,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.GetOwnerReferences()).NotTo(ContainRefOfGroupKind("cluster.x-k8s.io", "MachineSet"))
},
Expand Down Expand Up @@ -810,7 +815,8 @@ func TestReconcileBootstrap(t *testing.T) {
BootstrapReady: true,
},
},
expectError: true,
expectResult: ctrl.Result{},
expectError: true,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.GetOwnerReferences()).NotTo(ContainRefOfGroupKind("cluster.x-k8s.io", "MachineSet"))
},
Expand Down Expand Up @@ -839,6 +845,7 @@ func TestReconcileBootstrap(t *testing.T) {
}

res, err := r.reconcileBootstrap(ctx, defaultCluster, tc.machine)
g.Expect(res).To(Equal(tc.expectResult))
if tc.expectError {
g.Expect(err).ToNot(BeNil())
} else {
Expand All @@ -848,10 +855,6 @@ func TestReconcileBootstrap(t *testing.T) {
if tc.expected != nil {
tc.expected(g, tc.machine)
}

if tc.result != nil {
g.Expect(res).To(Equal(*tc.result))
}
})
}
}
Expand Down Expand Up @@ -889,14 +892,14 @@ func TestReconcileInfrastructure(t *testing.T) {
}

testCases := []struct {
name string
bootstrapConfig map[string]interface{}
infraConfig map[string]interface{}
machine *clusterv1.Machine
expectError bool
expectChanged bool
expectRequeueAfter bool
expected func(g *WithT, m *clusterv1.Machine)
name string
bootstrapConfig map[string]interface{}
infraConfig map[string]interface{}
machine *clusterv1.Machine
expectResult ctrl.Result
expectError bool
expectChanged bool
expected func(g *WithT, m *clusterv1.Machine)
}{
{
name: "new machine, infrastructure config ready",
Expand Down Expand Up @@ -933,6 +936,7 @@ func TestReconcileInfrastructure(t *testing.T) {
},
},
},
expectResult: ctrl.Result{},
expectError: false,
expectChanged: true,
expected: func(g *WithT, m *clusterv1.Machine) {
Expand Down Expand Up @@ -985,8 +989,8 @@ func TestReconcileInfrastructure(t *testing.T) {
"apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha4",
"metadata": map[string]interface{}{},
},
expectError: true,
expectRequeueAfter: true,
expectResult: ctrl.Result{},
expectError: true,
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.InfrastructureReady).To(BeTrue())
g.Expect(m.Status.FailureMessage).ToNot(BeNil())
Expand Down Expand Up @@ -1023,6 +1027,7 @@ func TestReconcileInfrastructure(t *testing.T) {
},
},
},
expectResult: ctrl.Result{},
expectError: false,
expectChanged: false,
expected: func(g *WithT, m *clusterv1.Machine) {
Expand Down Expand Up @@ -1052,8 +1057,9 @@ func TestReconcileInfrastructure(t *testing.T) {
).Build(),
}

_, err := r.reconcileInfrastructure(ctx, defaultCluster, tc.machine)
result, err := r.reconcileInfrastructure(ctx, defaultCluster, tc.machine)
r.reconcilePhase(ctx, tc.machine)
g.Expect(result).To(Equal(tc.expectResult))
if tc.expectError {
g.Expect(err).ToNot(BeNil())
} else {
Expand Down
16 changes: 5 additions & 11 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha4"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -161,13 +160,6 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
}

defer func() {
if requeueErr, ok := errors.Cause(reterr).(capierrors.HasRequeueAfterError); ok {
if res.RequeueAfter == 0 {
res.RequeueAfter = requeueErr.GetRequeueAfter()
reterr = nil
}
}

// Always attempt to update status.
if err := r.updateStatus(ctx, kcp, cluster); err != nil {
var connFailure *internal.RemoteClusterConnectionError
Expand Down Expand Up @@ -265,9 +257,11 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
}

// Generate Cluster Kubeconfig if needed
if err := r.reconcileKubeconfig(ctx, cluster, kcp); err != nil {
log.Error(err, "failed to reconcile Kubeconfig")
return ctrl.Result{}, err
if result, err := r.reconcileKubeconfig(ctx, cluster, kcp); !result.IsZero() || err != nil {
if err != nil {
log.Error(err, "failed to reconcile Kubeconfig")
}
return result, err
}

controlPlaneMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, util.ObjectKey(cluster), machinefilters.ControlPlaneMachines(cluster.Name))
Expand Down
Loading