Skip to content

Commit

Permalink
Remove RequeueAfterError
Browse files Browse the repository at this point in the history
  • Loading branch information
fabriziopandini committed Jan 15, 2021
1 parent 61dc332 commit d47a724
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 134 deletions.
31 changes: 16 additions & 15 deletions controllers/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/noderefutil"
"sigs.k8s.io/cluster-api/controllers/remote"
capierrors "sigs.k8s.io/cluster-api/errors"
kubedrain "sigs.k8s.io/cluster-api/third_party/kubernetes-drain"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
Expand Down Expand Up @@ -328,10 +327,12 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste
return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
}

if err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); err != nil {
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
return ctrl.Result{}, err
if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil {
if err != nil {
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
}
return result, err
}

conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition)
Expand Down Expand Up @@ -484,28 +485,28 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *cl
}
}

func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) error {
func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx, "cluster", cluster.Name, "node", nodeName)

restConfig, err := remote.RESTConfig(ctx, r.Client, util.ObjectKey(cluster))
if err != nil {
log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
return nil
return ctrl.Result{}, nil
}
kubeClient, err := kubernetes.NewForConfig(restConfig)
if err != nil {
log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
return nil
return ctrl.Result{}, nil
}

node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
// If an admin deletes the node directly, we'll end up here.
log.Error(err, "Could not find node from noderef, it may have already been deleted")
return nil
return ctrl.Result{}, nil
}
return errors.Errorf("unable to get node %q: %v", nodeName, err)
return ctrl.Result{}, errors.Errorf("unable to get node %q: %v", nodeName, err)
}

drainer := &kubedrain.Helper{
Expand Down Expand Up @@ -537,18 +538,18 @@ func (r *MachineReconciler) drainNode(ctx context.Context, cluster *clusterv1.Cl

if err := kubedrain.RunCordonOrUncordon(ctx, drainer, node, true); err != nil {
// Machine will be re-reconciled after a cordon failure.
log.Error(err, "Cordon failed")
return errors.Errorf("unable to cordon node %s: %v", node.Name, err)
log.Error(err, "Cordon failed, retry in 20s")
return ctrl.Result{}, errors.Errorf("unable to cordon node %s: %v", node.Name, err)
}

if err := kubedrain.RunNodeDrain(ctx, drainer, node.Name); err != nil {
// Machine will be re-reconciled after a drain failure.
log.Error(err, "Drain failed")
return &capierrors.RequeueAfterError{RequeueAfter: 20 * time.Second}
log.Error(err, "Drain failed, retry in 20s")
return ctrl.Result{RequeueAfter: 20 * time.Second}, nil
}

log.Info("Drain successful")
return nil
return ctrl.Result{}, nil
}

func (r *MachineReconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error {
Expand Down
29 changes: 18 additions & 11 deletions controllers/machine_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package controllers
import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -97,9 +96,18 @@ func (r *MachineReconciler) reconcileExternal(ctx context.Context, cluster *clus
obj, err := external.Get(ctx, r.Client, ref, m.Namespace)
if err != nil {
if apierrors.IsNotFound(errors.Cause(err)) {
return external.ReconcileOutput{}, errors.Wrapf(&capierrors.RequeueAfterError{RequeueAfter: externalReadyWait},
"could not find %v %q for Machine %q in namespace %q, requeuing",
ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
// if the reference missing is the infrastructureRef and the machine was up and running, this is an error.
if *ref == m.Spec.InfrastructureRef && m.Status.InfrastructureReady {
err := errors.Errorf("could not find %v %q for Machine %q in namespace %q, requeuing", ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state")
m.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.InvalidConfigurationMachineError)
m.Status.FailureMessage = pointer.StringPtr(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready",
m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name))
return external.ReconcileOutput{}, err
}

log.Info(fmt.Sprintf("could not find %v %q for Machine %q in namespace %q, requeueing", ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace))
return external.ReconcileOutput{RequeueAfter: externalReadyWait}, nil
}
return external.ReconcileOutput{}, err
}
Expand Down Expand Up @@ -192,6 +200,9 @@ func (r *MachineReconciler) reconcileBootstrap(ctx context.Context, cluster *clu
if err != nil {
return ctrl.Result{}, err
}
if externalResult.RequeueAfter > 0 {
return ctrl.Result{RequeueAfter: externalResult.RequeueAfter}, nil
}
if externalResult.Paused {
return ctrl.Result{}, nil
}
Expand Down Expand Up @@ -240,15 +251,11 @@ func (r *MachineReconciler) reconcileInfrastructure(ctx context.Context, cluster
// Call generic external reconciler.
infraReconcileResult, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
if err != nil {
if m.Status.InfrastructureReady && strings.Contains(err.Error(), "could not find") {
// Infra object went missing after the machine was up and running
log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state")
m.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.InvalidConfigurationMachineError)
m.Status.FailureMessage = pointer.StringPtr(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready",
m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name))
}
return ctrl.Result{}, err
}
if infraReconcileResult.RequeueAfter > 0 {
return ctrl.Result{RequeueAfter: infraReconcileResult.RequeueAfter}, nil
}
// if the external object is paused, return without any further processing
if infraReconcileResult.Paused {
return ctrl.Result{}, nil
Expand Down
6 changes: 4 additions & 2 deletions controllers/machine_controller_phases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,8 @@ func TestReconcileBootstrap(t *testing.T) {
"spec": map[string]interface{}{},
"status": map[string]interface{}{},
},
expectError: true,
expectError: false,
result: &ctrl.Result{RequeueAfter: externalReadyWait},
expected: func(g *WithT, m *clusterv1.Machine) {
g.Expect(m.Status.BootstrapReady).To(BeFalse())
},
Expand All @@ -680,7 +681,8 @@ func TestReconcileBootstrap(t *testing.T) {
"spec": map[string]interface{}{},
"status": map[string]interface{}{},
},
expectError: true,
expectError: false,
result: &ctrl.Result{RequeueAfter: externalReadyWait},
},
{
name: "existing machine, bootstrap data should not change",
Expand Down
16 changes: 5 additions & 11 deletions controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha4"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -161,13 +160,6 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
}

defer func() {
if requeueErr, ok := errors.Cause(reterr).(capierrors.HasRequeueAfterError); ok {
if res.RequeueAfter == 0 {
res.RequeueAfter = requeueErr.GetRequeueAfter()
reterr = nil
}
}

// Always attempt to update status.
if err := r.updateStatus(ctx, kcp, cluster); err != nil {
var connFailure *internal.RemoteClusterConnectionError
Expand Down Expand Up @@ -265,9 +257,11 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
}

// Generate Cluster Kubeconfig if needed
if err := r.reconcileKubeconfig(ctx, cluster, kcp); err != nil {
log.Error(err, "failed to reconcile Kubeconfig")
return ctrl.Result{}, err
if result, err := r.reconcileKubeconfig(ctx, cluster, kcp); !result.IsZero() || err != nil {
if err != nil {
log.Error(err, "failed to reconcile Kubeconfig")
}
return result, err
}

controlPlaneMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, util.ObjectKey(cluster), machinefilters.ControlPlaneMachines(cluster.Name))
Expand Down
22 changes: 10 additions & 12 deletions controlplane/kubeadm/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha4"
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/certs"
"sigs.k8s.io/cluster-api/util/conditions"
Expand All @@ -43,12 +42,12 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
)

func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) error {
func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)

endpoint := cluster.Spec.ControlPlaneEndpoint
if endpoint.IsZero() {
return nil
return ctrl.Result{}, nil
}

controllerOwnerRef := *metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane"))
Expand All @@ -64,41 +63,40 @@ func (r *KubeadmControlPlaneReconciler) reconcileKubeconfig(ctx context.Context,
controllerOwnerRef,
)
if errors.Is(createErr, kubeconfig.ErrDependentCertificateNotFound) {
return errors.Wrapf(&capierrors.RequeueAfterError{RequeueAfter: dependentCertRequeueAfter},
"could not find secret %q, requeuing", secret.ClusterCA)
return ctrl.Result{RequeueAfter: dependentCertRequeueAfter}, nil
}
// always return if we have just created in order to skip rotation checks
return createErr
return ctrl.Result{}, createErr
case err != nil:
return errors.Wrap(err, "failed to retrieve kubeconfig Secret")
return ctrl.Result{}, errors.Wrap(err, "failed to retrieve kubeconfig Secret")
}

// check if the kubeconfig secret was created by v1alpha2 controllers, and thus it has the Cluster as the owner instead of KCP;
// if yes, adopt it.
if util.IsOwnedByObject(configSecret, cluster) && !util.IsControlledBy(configSecret, kcp) {
if err := r.adoptKubeconfigSecret(ctx, cluster, configSecret, controllerOwnerRef); err != nil {
return err
return ctrl.Result{}, err
}
}

// only do rotation on owned secrets
if !util.IsControlledBy(configSecret, kcp) {
return nil
return ctrl.Result{}, nil
}

needsRotation, err := kubeconfig.NeedsClientCertRotation(configSecret, certs.ClientCertificateRenewalDuration)
if err != nil {
return err
return ctrl.Result{}, err
}

if needsRotation {
log.Info("rotating kubeconfig secret")
if err := kubeconfig.RegenerateSecret(ctx, r.Client, configSecret); err != nil {
return errors.Wrap(err, "failed to regenerate kubeconfig")
return ctrl.Result{}, errors.Wrap(err, "failed to regenerate kubeconfig")
}
}

return nil
return ctrl.Result{}, nil
}

func (r *KubeadmControlPlaneReconciler) adoptKubeconfigSecret(ctx context.Context, cluster *clusterv1.Cluster, configSecret *corev1.Secret, controllerOwnerRef metav1.OwnerReference) error {
Expand Down
21 changes: 16 additions & 5 deletions controlplane/kubeadm/controllers/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/secret"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -76,7 +77,9 @@ func TestReconcileKubeconfigEmptyAPIEndpoints(t *testing.T) {
recorder: record.NewFakeRecorder(32),
}

g.Expect(r.reconcileKubeconfig(ctx, cluster, kcp)).To(Succeed())
result, err := r.reconcileKubeconfig(ctx, cluster, kcp)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result).To(BeZero())

kubeconfigSecret := &corev1.Secret{}
secretName := client.ObjectKey{
Expand Down Expand Up @@ -123,7 +126,9 @@ func TestReconcileKubeconfigMissingCACertificate(t *testing.T) {
recorder: record.NewFakeRecorder(32),
}

g.Expect(r.reconcileKubeconfig(ctx, cluster, kcp)).NotTo(Succeed())
result, err := r.reconcileKubeconfig(ctx, cluster, kcp)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: dependentCertRequeueAfter}))

kubeconfigSecret := &corev1.Secret{}
secretName := client.ObjectKey{
Expand Down Expand Up @@ -181,7 +186,9 @@ func TestReconcileKubeconfigSecretAdoptsV1alpha2Secrets(t *testing.T) {
recorder: record.NewFakeRecorder(32),
}

g.Expect(r.reconcileKubeconfig(ctx, cluster, kcp)).To(Succeed())
result, err := r.reconcileKubeconfig(ctx, cluster, kcp)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result).To(Equal(ctrl.Result{}))

kubeconfigSecret := &corev1.Secret{}
secretName := client.ObjectKey{
Expand Down Expand Up @@ -243,7 +250,9 @@ func TestReconcileKubeconfigSecretDoesNotAdoptsUserSecrets(t *testing.T) {
recorder: record.NewFakeRecorder(32),
}

g.Expect(r.reconcileKubeconfig(ctx, cluster, kcp)).To(Succeed())
result, err := r.reconcileKubeconfig(ctx, cluster, kcp)
g.Expect(err).To(Succeed())
g.Expect(result).To(BeZero())

kubeconfigSecret := &corev1.Secret{}
secretName := client.ObjectKey{
Expand Down Expand Up @@ -300,7 +309,9 @@ func TestKubeadmControlPlaneReconciler_reconcileKubeconfig(t *testing.T) {
Client: fakeClient,
recorder: record.NewFakeRecorder(32),
}
g.Expect(r.reconcileKubeconfig(ctx, cluster, kcp)).To(Succeed())
result, err := r.reconcileKubeconfig(ctx, cluster, kcp)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result).To(Equal(ctrl.Result{}))

kubeconfigSecret := &corev1.Secret{}
secretName := client.ObjectKey{
Expand Down
Loading

0 comments on commit d47a724

Please sign in to comment.