Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add healthz deep subpathes for all controllers #201

Merged
merged 1 commit into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions controllers/apps/deployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ package apps
import (
"context"

"github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core"
controllers "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"

"github.com/go-logr/logr"
appV1 "k8s.io/api/apps/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
)

type DeploymentReconciler struct {
Expand Down Expand Up @@ -63,7 +65,13 @@ func (r *DeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}

func (r *DeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *DeploymentReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
// add health check on subpath for deployment controller
// TODO: this is a simple controller and unlikely hit blocking issue but we can revisit this after subpaths are released for a while
healthzHandler.AddControllersHealthCheckers(
map[string]healthz.Checker{"health-deploy-controller": rcHealthz.SimplePing("deployment controller", r.Log)},
)

return ctrl.NewControllerManagedBy(mgr).
For(&appV1.Deployment{}).
Complete(r)
Expand Down
16 changes: 15 additions & 1 deletion controllers/core/configmap_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
"github.com/go-logr/logr"
Expand All @@ -27,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
)

// ConfigMapReconciler reconciles a ConfigMap object
Expand All @@ -38,6 +40,7 @@ type ConfigMapReconciler struct {
K8sAPI k8s.K8sWrapper
Condition condition.Conditions
curWinIPAMEnabledCond bool
Context context.Context
}

//+kubebuilder:rbac:groups=core,resources=configmaps,namespace=kube-system,resourceNames=amazon-vpc-cni,verbs=get;list;watch
Expand Down Expand Up @@ -84,7 +87,12 @@ func (r *ConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}

// SetupWithManager sets up the controller with the Manager.
func (r *ConfigMapReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *ConfigMapReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
// add health check on subpath for CM controller
healthzHandler.AddControllersHealthCheckers(
map[string]healthz.Checker{"health-cm-controller": r.check()},
)

return ctrl.NewControllerManagedBy(mgr).
For(&corev1.ConfigMap{}).
Complete(r)
Expand All @@ -110,3 +118,9 @@ func UpdateNodesOnConfigMapChanges(k8sAPI k8s.K8sWrapper, nodeManager manager.Ma
}
return nil
}

func (r *ConfigMapReconciler) check() healthz.Checker {
r.Log.Info("ConfigMap controller's healthz subpath was added")
// We can revisit this to use PingWithTimeout() instead if we have concerns on this controller.
return rcHealthz.SimplePing("configmap controller", r.Log)
}
54 changes: 44 additions & 10 deletions controllers/core/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,22 @@ package controllers
import (
"context"
goErr "errors"
"net/http"

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
"github.com/google/uuid"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/healthz"
)

// MaxNodeConcurrentReconciles is the number of go routines that can invoke
Expand All @@ -43,7 +48,7 @@ type NodeReconciler struct {
Scheme *runtime.Scheme
Manager manager.Manager
Conditions condition.Conditions
// NodeEventCache *bigcache.BigCache
Context context.Context
}

// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
Expand All @@ -68,6 +73,7 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.

if err := r.Client.Get(ctx, req.NamespacedName, node); err != nil {
if errors.IsNotFound(err) {
r.Log.V(1).Info("the requested node couldn't be found by k8s client", "Node", req.NamespacedName)
_, found := r.Manager.GetNode(req.Name)
// if cachedNode != nil && cachedNode.HasInstance() {
// // delete the not found node instance id from node event cache for housekeeping
Expand All @@ -80,7 +86,7 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
logger.Error(err, "failed to delete node from manager")
return ctrl.Result{}, nil
}
logger.Info("deleted the node from manager")
logger.V(1).Info("deleted the node from manager")
}
}
return ctrl.Result{}, client.IgnoreNotFound(err)
Expand All @@ -98,17 +104,45 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
return ctrl.Result{}, err
}

// func (r *NodeReconciler) deleteNodeFromNodeEventCache(nodeId string) {
// if err := r.NodeEventCache.Delete(nodeId); err != nil {
// r.Log.V(1).Info("node controller removing node from node event cache failed", "Error", err)
// } else {
// r.Log.V(1).Info("node controller removed the node from node event cache successfully", "InstanceId", nodeId)
// }
// }
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
// add health check on subpath for node controller
healthzHandler.AddControllersHealthCheckers(
map[string]healthz.Checker{"health-node-controller": r.Check()},
)

func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&corev1.Node{}).
WithOptions(controller.Options{MaxConcurrentReconciles: MaxNodeConcurrentReconciles}).
Complete(r)
}

func (r *NodeReconciler) Check() healthz.Checker {
r.Log.Info("Node controller's healthz subpath was added")
return func(req *http.Request) error {
// if the reconciler is not ready, using the simple ping to test
// this can test the referenced cached pod datastore
if !r.Conditions.GetPodDataStoreSyncStatus() {
r.Log.V(1).Info("***** node controller healthz enpoint tested Simple Ping *****")
return nil
}

err := rcHealthz.PingWithTimeout(func(c chan<- error) {
// when the reconciler is ready, testing the reconciler with a fake node request
pingRequest := &ctrl.Request{
NamespacedName: types.NamespacedName{
Namespace: corev1.NamespaceDefault,
Name: uuid.New().String(),
},
}

// expecting to 'return ctrl.Result{}, client.IgnoreNotFound(err)'
// IgnoreNotFound returns nil on NotFound errors.
// this can test the pod cached datastore and node cached datastore
_, rErr := r.Reconcile(r.Context, *pingRequest)
r.Log.V(1).Info("***** node controller healthz endpoint tested Reconcile *****")
c <- rErr
}, r.Log)

return err
}
}
44 changes: 41 additions & 3 deletions controllers/core/pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,25 @@ package controllers

import (
"context"
"net/http"
"time"

"github.com/aws/amazon-vpc-resource-controller-k8s/controllers/custom"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s/pod"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/resource"
"github.com/google/uuid"

"github.com/go-logr/logr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
)

// +kubebuilder:rbac:groups="",resources=events,verbs=create;update;patch
Expand Down Expand Up @@ -71,7 +76,7 @@ func (r *PodReconciler) Reconcile(request custom.Request) (ctrl.Result, error) {
return ctrl.Result{}, nil
}
if !exists {
r.Log.Info("pod doesn't exists in the cache anymore",
r.Log.V(1).Info("pod doesn't exists in the cache anymore",
"namespace name", request.NamespacedName.String())
return ctrl.Result{}, nil
}
Expand Down Expand Up @@ -174,9 +179,10 @@ func getAggregateResources(pod *v1.Pod) map[string]int64 {
// list of runnable. After Manager acquire the lease the pod controller runnable
// will be started and the Pod events will be sent to Reconcile function
func (r *PodReconciler) SetupWithManager(ctx context.Context, manager ctrl.Manager,
clientSet *kubernetes.Clientset, pageLimit int, syncPeriod time.Duration) error {
clientSet *kubernetes.Clientset, pageLimit int, syncPeriod time.Duration, healthzHandler *rcHealthz.HealthzHandler) error {
r.Log.Info("The pod controller is using MaxConcurrentReconciles", "Routines", MaxPodConcurrentReconciles)
return custom.NewControllerManagedBy(ctx, manager).

customChecker, err := custom.NewControllerManagedBy(ctx, manager).
WithLogger(r.Log.WithName("custom pod controller")).
UsingDataStore(r.DataStore).
WithClientSet(clientSet).
Expand All @@ -188,4 +194,36 @@ func (r *PodReconciler) SetupWithManager(ctx context.Context, manager ctrl.Manag
ResyncPeriod: syncPeriod,
MaxConcurrentReconciles: MaxPodConcurrentReconciles,
}).UsingConditions(r.Condition).Complete(r)

// add health check on subpath for pod and pod customized controllers
healthzHandler.AddControllersHealthCheckers(
map[string]healthz.Checker{
"health-pod-controller": r.check(),
"health-custom-pod-controller": customChecker,
},
)

return err
}

func (r *PodReconciler) check() healthz.Checker {
r.Log.Info("Pod controller's healthz subpath was added")
// more meaningful ping
return func(req *http.Request) error {
err := rcHealthz.PingWithTimeout(func(c chan<- error) {
pingRequest := &custom.Request{
NamespacedName: types.NamespacedName{
Namespace: v1.NamespaceDefault,
Name: uuid.New().String(),
},
DeletedObject: nil,
}
// calling reconcile will test pod cache
_, rErr := r.Reconcile(*pingRequest)
r.Log.V(1).Info("***** pod controller healthz endpoint tested reconcile *****")
c <- rErr
}, r.Log)

return err
}
}
29 changes: 15 additions & 14 deletions controllers/custom/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/manager"
)

Expand Down Expand Up @@ -89,21 +90,21 @@ func NewControllerManagedBy(ctx context.Context, mgr manager.Manager) *Builder {

// Complete adds the controller to manager's Runnable. The Controller
// runnable will start when the manager starts
func (b *Builder) Complete(reconciler Reconciler) error {
func (b *Builder) Complete(reconciler Reconciler) (healthz.Checker, error) {
// Loggr is no longer an interface
// The suggestion is using LogSink to do nil check now
if b.log.GetSink() == nil {
return fmt.Errorf("need to set the logger")
return nil, fmt.Errorf("need to set the logger")
}
if b.converter == nil {
return fmt.Errorf("converter not provided, " +
return nil, fmt.Errorf("converter not provided, " +
"must use high level controller if conversion not required")
}
if b.clientSet == nil {
return fmt.Errorf("need to set kubernetes clienset")
return nil, fmt.Errorf("need to set kubernetes clienset")
}
if b.dataStore == nil {
return fmt.Errorf("need datastore to start the controller")
return nil, fmt.Errorf("need datastore to start the controller")
}

b.SetDefaults()
Expand Down Expand Up @@ -172,17 +173,17 @@ func (b *Builder) Complete(reconciler Reconciler) error {
},
}

controller := &CustomController{
log: b.log,
options: b.options,
config: config,
Do: reconciler,
workQueue: workQueue,
conditions: b.conditions,
}
controller := NewCustomController(
b.log,
b.options,
config,
reconciler,
workQueue,
b.conditions,
)

// Adds the controller to the manager's Runnable
return b.mgr.Add(controller)
return controller.checker, b.mgr.Add(controller)
}

// SetDefaults sets the default options for controller
Expand Down
Loading