Skip to content

Commit

Permalink
馃尡 Reduce reconcilements
Browse files Browse the repository at this point in the history
- Filter HCloudMachine events so that status updates of the resource
  don't trigger another reconcilement of HCloudMachine
- Filter HetznerCluster events so that status updates of the resource
  don't trigger another reconcilement of HetznerCluster
- Filter HetznerCluster events so that some status updates that are not
  important don't trigger reconcilement of HCloudMachine
- Filter Machine events so that status updates of the resource don't
  trigger another reconcilement of HCloudMachine a
- Filter Cluster events so that status updates of the resource don't
  trigger another reconcilement of HetznerCluster
- Increase the time period to requeue if a server is starting
- Increase the time period to requeue if a server has not status yet
- Increase the time period to requeue if the kubeapi server is not yet
  reachable before adding it as target to the load balancer
- Don't trigger multiple shutdown calls for one server and increase the
  timeout to requeue after a shutdown is triggered
  • Loading branch information
janiskemper committed Jun 21, 2024
1 parent 31c3167 commit 0cd4ab0
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 14 deletions.
125 changes: 122 additions & 3 deletions controllers/hcloudmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,16 @@ func (r *HCloudMachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl
WithOptions(options).
For(&infrav1.HCloudMachine{}).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)).
WithEventFilter(IgnoreInsignificantHCloudMachineStatusUpdates(log)).
Watches(
&clusterv1.Machine{},
handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1.GroupVersion.WithKind("HCloudMachine"))),
builder.WithPredicates(IgnoreInsignificantMachineStatusUpdates(log)),
).
Watches(
&infrav1.HetznerCluster{},
handler.EnqueueRequestsFromMapFunc(r.HetznerClusterToHCloudMachines(ctx)),
builder.WithPredicates(IgnoreHetznerClusterConditionUpdates(log)),
builder.WithPredicates(IgnoreInsignificantHetznerClusterUpdates(log)),
).
Build(r)
if err != nil {
Expand Down Expand Up @@ -303,8 +305,8 @@ func (r *HCloudMachineReconciler) HetznerClusterToHCloudMachines(_ context.Conte
}
}

// IgnoreHetznerClusterConditionUpdates is a predicate used for ignoring HetznerCluster condition updates.
func IgnoreHetznerClusterConditionUpdates(logger logr.Logger) predicate.Funcs {
// IgnoreInsignificantHetznerClusterUpdates is a predicate used for ignoring insignificant HetznerCluster.Status updates.
func IgnoreInsignificantHetznerClusterUpdates(logger logr.Logger) predicate.Funcs {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
log := logger.WithValues(
Expand Down Expand Up @@ -341,6 +343,23 @@ func IgnoreHetznerClusterConditionUpdates(logger logr.Logger) predicate.Funcs {
oldCluster.Status.Conditions = nil
newCluster.Status.Conditions = nil

if oldCluster.Status.ControlPlaneLoadBalancer != nil {
oldCluster.Status.ControlPlaneLoadBalancer.Target = nil
}
if newCluster.Status.ControlPlaneLoadBalancer != nil {
newCluster.Status.ControlPlaneLoadBalancer.Target = nil
}

oldCluster.Status.HCloudPlacementGroups = nil
newCluster.Status.HCloudPlacementGroups = nil

if oldCluster.Status.Network != nil {
oldCluster.Status.Network.AttachedServers = nil
}
if newCluster.Status.Network != nil {
newCluster.Status.Network.AttachedServers = nil
}

if reflect.DeepEqual(oldCluster, newCluster) {
// Only insignificant fields changed, no need to reconcile
return false
Expand All @@ -355,3 +374,103 @@ func IgnoreHetznerClusterConditionUpdates(logger logr.Logger) predicate.Funcs {
GenericFunc: func(_ event.GenericEvent) bool { return true },
}
}

// IgnoreInsignificantMachineStatusUpdates is a predicate used for ignoring insignificant Machine.Status updates.
func IgnoreInsignificantMachineStatusUpdates(logger logr.Logger) predicate.Funcs {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
log := logger.WithValues(
"predicate", "IgnoreInsignificantMachineStatusUpdates",
"type", "update",
"namespace", e.ObjectNew.GetNamespace(),
"kind", strings.ToLower(e.ObjectNew.GetObjectKind().GroupVersionKind().Kind),
"name", e.ObjectNew.GetName(),
)

var oldMachine, newMachine *clusterv1.Machine
var ok bool
// This predicate only looks at Machine objects
if oldMachine, ok = e.ObjectOld.(*clusterv1.Machine); !ok {
return true
}
if newMachine, ok = e.ObjectNew.(*clusterv1.Machine); !ok {
// Something weird happened, and we received two different kinds of objects
return true
}

// We should not modify the original objects, this causes issues with code that relies on the original object.
oldMachine = oldMachine.DeepCopy()
newMachine = newMachine.DeepCopy()

oldMachine.ManagedFields = nil
newMachine.ManagedFields = nil

oldMachine.ResourceVersion = ""
newMachine.ResourceVersion = ""

oldMachine.Status = clusterv1.MachineStatus{}
newMachine.Status = clusterv1.MachineStatus{}

if reflect.DeepEqual(oldMachine, newMachine) {
// Only insignificant fields changed, no need to reconcile
return false
}

log.V(1).Info("Machine -> HCloudMachine event")
return true
},
CreateFunc: func(_ event.CreateEvent) bool { return true },
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
GenericFunc: func(_ event.GenericEvent) bool { return true },
}
}

// IgnoreInsignificantHCloudMachineStatusUpdates is a predicate used for ignoring insignificant HCloudMachine.Status updates.
func IgnoreInsignificantHCloudMachineStatusUpdates(logger logr.Logger) predicate.Funcs {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
log := logger.WithValues(
"predicate", "IgnoreInsignificantHCloudMachineStatusUpdates",
"type", "update",
"namespace", e.ObjectNew.GetNamespace(),
"kind", strings.ToLower(e.ObjectNew.GetObjectKind().GroupVersionKind().Kind),
"name", e.ObjectNew.GetName(),
)

var oldHCloudMachine, newHCloudMachine *infrav1.HCloudMachine
var ok bool
// This predicate only looks at HCloudMachine objects
if oldHCloudMachine, ok = e.ObjectOld.(*infrav1.HCloudMachine); !ok {
return true
}
if newHCloudMachine, ok = e.ObjectNew.(*infrav1.HCloudMachine); !ok {
// Something weird happened, and we received two different kinds of objects
return true
}

// We should not modify the original objects, this causes issues with code that relies on the original object.
oldHCloudMachine = oldHCloudMachine.DeepCopy()
newHCloudMachine = newHCloudMachine.DeepCopy()

oldHCloudMachine.ManagedFields = nil
newHCloudMachine.ManagedFields = nil

oldHCloudMachine.ResourceVersion = ""
newHCloudMachine.ResourceVersion = ""

oldHCloudMachine.Status = infrav1.HCloudMachineStatus{}
newHCloudMachine.Status = infrav1.HCloudMachineStatus{}

if reflect.DeepEqual(oldHCloudMachine, newHCloudMachine) {
// Only insignificant fields changed, no need to reconcile
return false
}

log.V(1).Info("HCloudMachine event")
return true
},
CreateFunc: func(_ event.CreateEvent) bool { return true },
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
GenericFunc: func(_ event.GenericEvent) bool { return true },
}
}
4 changes: 2 additions & 2 deletions controllers/hcloudmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ var _ = Describe("HCloudMachine validation", func() {
})
})

var _ = Describe("IgnoreHetznerClusterConditionUpdates Predicate", func() {
var _ = Describe("IgnoreInsignificantHetznerClusterUpdates Predicate", func() {
var (
predicate predicate.Predicate

Expand All @@ -636,7 +636,7 @@ var _ = Describe("IgnoreHetznerClusterConditionUpdates Predicate", func() {
)

BeforeEach(func() {
predicate = IgnoreHetznerClusterConditionUpdates(klog.Background())
predicate = IgnoreInsignificantHetznerClusterUpdates(klog.Background())

oldCluster = &infrav1.HetznerCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-predicate", ResourceVersion: "1"},
Expand Down
110 changes: 110 additions & 0 deletions controllers/hetznercluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ import (
"context"
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"sync"
"time"

"github.com/go-logr/logr"
certificatesv1 "k8s.io/api/certificates/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -46,9 +48,11 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

Expand Down Expand Up @@ -732,6 +736,7 @@ func (r *HetznerClusterReconciler) SetupWithManager(ctx context.Context, mgr ctr
For(&infrav1.HetznerCluster{}).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)).
WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)).
WithEventFilter(IgnoreInsignificantHetznerClusterStatusUpdates(log)).
Owns(&corev1.Secret{}).
Build(r)
if err != nil {
Expand Down Expand Up @@ -777,5 +782,110 @@ func (r *HetznerClusterReconciler) SetupWithManager(ctx context.Context, mgr ctr
},
}
}),
IgnoreInsignificantClusterStatusUpdates(log),
)
}

// IgnoreInsignificantClusterStatusUpdates is a predicate used for ignoring insignificant HetznerCluster.Status updates.
func IgnoreInsignificantClusterStatusUpdates(logger logr.Logger) predicate.Funcs {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
log := logger.WithValues(
"predicate", "IgnoreInsignificantClusterStatusUpdates",
"type", "update",
"namespace", e.ObjectNew.GetNamespace(),
"kind", strings.ToLower(e.ObjectNew.GetObjectKind().GroupVersionKind().Kind),
"name", e.ObjectNew.GetName(),
)

var oldCluster, newCluster *clusterv1.Cluster
var ok bool
// This predicate only looks at Cluster objects
if oldCluster, ok = e.ObjectOld.(*clusterv1.Cluster); !ok {
return true
}
if newCluster, ok = e.ObjectNew.(*clusterv1.Cluster); !ok {
// Something weird happened, and we received two different kinds of objects
return true
}

// We should not modify the original objects, this causes issues with code that relies on the original object.
oldCluster = oldCluster.DeepCopy()
newCluster = newCluster.DeepCopy()

// Set fields we do not care about to nil

oldCluster.ManagedFields = nil
newCluster.ManagedFields = nil

oldCluster.ResourceVersion = ""
newCluster.ResourceVersion = ""

oldCluster.Status = clusterv1.ClusterStatus{}
newCluster.Status = clusterv1.ClusterStatus{}

if reflect.DeepEqual(oldCluster, newCluster) {
// Only insignificant fields changed, no need to reconcile
return false
}
// There is a noteworthy diff, so we should reconcile
log.V(1).Info("Cluster -> HetznerCluster")
return true
},
CreateFunc: func(_ event.CreateEvent) bool { return true },
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
GenericFunc: func(_ event.GenericEvent) bool { return true },
}
}

// IgnoreInsignificantHetznerClusterStatusUpdates is a predicate used for ignoring insignificant HetznerHetznerCluster.Status updates.
func IgnoreInsignificantHetznerClusterStatusUpdates(logger logr.Logger) predicate.Funcs {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
log := logger.WithValues(
"predicate", "IgnoreInsignificantHetznerClusterStatusUpdates",
"namespace", e.ObjectNew.GetNamespace(),
"kind", strings.ToLower(e.ObjectNew.GetObjectKind().GroupVersionKind().Kind),
"name", e.ObjectNew.GetName(),
)

var oldHetznerCluster, newHetznerCluster *infrav1.HetznerCluster
var ok bool
// This predicate only looks at HetznerCluster objects
if oldHetznerCluster, ok = e.ObjectOld.(*infrav1.HetznerCluster); !ok {
return true
}
if newHetznerCluster, ok = e.ObjectNew.(*infrav1.HetznerCluster); !ok {
// Something weird happened, and we received two different kinds of objects
return true
}

// We should not modify the original objects, this causes issues with code that relies on the original object.
oldHetznerCluster = oldHetznerCluster.DeepCopy()
newHetznerCluster = newHetznerCluster.DeepCopy()

// Set fields we do not care about to nil

oldHetznerCluster.ManagedFields = nil
newHetznerCluster.ManagedFields = nil

oldHetznerCluster.ResourceVersion = ""
newHetznerCluster.ResourceVersion = ""

oldHetznerCluster.Status = infrav1.HetznerClusterStatus{}
newHetznerCluster.Status = infrav1.HetznerClusterStatus{}

if reflect.DeepEqual(oldHetznerCluster, newHetznerCluster) {
// Only insignificant fields changed, no need to reconcile
return false
}
// There is a noteworthy diff, so we should reconcile
log.V(1).Info("HetznerCluster Update")
return true
},
// We only care about Update events, anything else should be reconciled
CreateFunc: func(_ event.CreateEvent) bool { return true },
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
GenericFunc: func(_ event.GenericEvent) bool { return true },
}
}
19 changes: 10 additions & 9 deletions pkg/services/hcloud/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,12 @@ func (s *Service) Reconcile(ctx context.Context) (res reconcile.Result, err erro
clusterv1.ConditionSeverityInfo,
"server is starting",
)
return reconcile.Result{RequeueAfter: 10 * time.Second}, nil
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
case hcloud.ServerStatusRunning: // do nothing
default:
// some temporary status
s.scope.SetReady(false)
return reconcile.Result{RequeueAfter: 2 * time.Second}, nil
return reconcile.Result{RequeueAfter: 10 * time.Second}, nil
}

// check whether server is attached to the network
Expand Down Expand Up @@ -296,7 +296,7 @@ func (s *Service) reconcileLoadBalancerAttachment(ctx context.Context, server *h
// we attach only nodes with kube-apiserver pod healthy to avoid downtime, skipped for the first node
if len(s.scope.HetznerCluster.Status.ControlPlaneLoadBalancer.Target) > 0 &&
!conditions.IsTrue(s.scope.Machine, controlplanev1.MachineAPIServerPodHealthyCondition) {
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
}

opts := hcloud.LoadBalancerAddServerTargetOpts{
Expand All @@ -318,8 +318,8 @@ func (s *Service) reconcileLoadBalancerAttachment(ctx context.Context, server *h
record.Eventf(
s.scope.HetznerCluster,
"AddedAsTargetToLoadBalancer",
"Added new server %s with ID %d to the loadbalancer %s with ID %d",
server.Name, server.ID, s.scope.HetznerCluster.Spec.ControlPlaneLoadBalancer.Name, s.scope.HetznerCluster.Status.ControlPlaneLoadBalancer.ID)
"Added new server %s with ID %d to the loadbalancer with ID %d",
server.Name, server.ID, s.scope.HetznerCluster.Status.ControlPlaneLoadBalancer.ID)

return reconcile.Result{}, nil
}
Expand Down Expand Up @@ -499,7 +499,7 @@ func (s *Service) getServerImage(ctx context.Context) (*hcloud.Image, error) {
infrav1.ServerCreateSucceededCondition,
infrav1.ServerTypeNotFoundReason,
clusterv1.ConditionSeverityError,
err.Error(),
"failed to get server type - nil type",
)
return nil, errServerCreateNotPossible
}
Expand Down Expand Up @@ -566,13 +566,14 @@ func (s *Service) handleServerStatusOff(ctx context.Context, server *hcloud.Serv
if serverAvailableCondition != nil &&
serverAvailableCondition.Status == corev1.ConditionFalse &&
serverAvailableCondition.Reason == infrav1.ServerOffReason {
s.scope.Info("Trigger power on again")
if time.Now().Before(serverAvailableCondition.LastTransitionTime.Time.Add(serverOffTimeout)) {
// Not yet timed out, try again to power on
if err := s.scope.HCloudClient.PowerOnServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "PowerOnServer")
if hcloud.IsError(err, hcloud.ErrorCodeLocked) {
// if server is locked, we just retry again
return reconcile.Result{Requeue: true}, nil
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("failed to power on server: %w", err)
}
Expand All @@ -587,7 +588,7 @@ func (s *Service) handleServerStatusOff(ctx context.Context, server *hcloud.Serv
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "PowerOnServer")
if hcloud.IsError(err, hcloud.ErrorCodeLocked) {
// if server is locked, we just retry again
return reconcile.Result{Requeue: true}, nil
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("failed to power on server: %w", err)
}
Expand All @@ -609,7 +610,7 @@ func (s *Service) handleDeleteServerStatusRunning(ctx context.Context, server *h
// 1. The server has not yet been tried to shut down and still is marked as "ready".
// 2. The server has been tried to shut down without an effect and the timeout is not reached yet.

if s.scope.HasServerAvailableCondition() || (s.scope.HasServerTerminatedCondition() && !s.scope.HasShutdownTimedOut()) {
if s.scope.HasServerAvailableCondition() {
if err := s.scope.HCloudClient.ShutdownServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ShutdownServer")
return reconcile.Result{}, fmt.Errorf("failed to shutdown server: %w", err)
Expand Down

0 comments on commit 0cd4ab0

Please sign in to comment.