Skip to content

Commit

Permalink
introduce healthchecking controller
Browse files Browse the repository at this point in the history
  • Loading branch information
enxebre committed Nov 13, 2018
1 parent b8e3c1e commit 7c12b59
Show file tree
Hide file tree
Showing 6 changed files with 556 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ nodelink-controller:
@echo -e "\033[32mBuilding node link controller binary...\033[0m"
$(DOCKER_CMD) go build $(GOGCFLAGS) -o bin/nodelink-controller github.com/openshift/machine-api-operator/cmd/nodelink-controller

.PHONY: machine-healthcheck
machine-healthcheck:
@echo -e "\033[32mBuilding machine healthcheck binary...\033[0m"
$(DOCKER_CMD) go build $(GOGCFLAGS) -o bin/machine-healthcheck github.com/openshift/machine-api-operator/cmd/machine-healthcheck

.PHONY: build-e2e
build-e2e: ## Build end-to-end test binary
@echo -e "\033[32mBuilding e2e test binary...\033[0m"
Expand Down
55 changes: 55 additions & 0 deletions cmd/machine-healthcheck/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package main

import (
"flag"
"runtime"

"github.com/golang/glog"
"github.com/openshift/machine-api-operator/pkg/apis"
"github.com/openshift/machine-api-operator/pkg/controller"
sdkVersion "github.com/operator-framework/operator-sdk/version"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

func printVersion() {
glog.Infof("Go Version: %s", runtime.Version())
glog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH)
glog.Infof("operator-sdk Version: %v", sdkVersion.Version)
}

func main() {
flag.Parse()
printVersion()

// Get a config to talk to the apiserver
cfg, err := config.GetConfig()
if err != nil {
glog.Fatal(err)
}

// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{})
if err != nil {
glog.Fatal(err)
}

glog.Infof("Registering Components.")

// Setup Scheme for all resources
if err := apis.AddToScheme(mgr.GetScheme()); err != nil {
glog.Fatal(err)
}

// Setup all Controllers
if err := controller.AddToManager(mgr); err != nil {
glog.Fatal(err)
}

glog.Info("Starting the Cmd.")

// Start the Cmd
glog.Fatal(mgr.Start(signals.SetupSignalHandler()))
}
10 changes: 10 additions & 0 deletions pkg/controller/add_machinehealthcheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package controller

import (
"github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck"
)

func init() {
// AddToManagerFuncs is a list of functions to create controllers and add them to a manager.
AddToManagerFuncs = append(AddToManagerFuncs, machinehealthcheck.Add)
}
18 changes: 18 additions & 0 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package controller

import (
"sigs.k8s.io/controller-runtime/pkg/manager"
)

// AddToManagerFuncs is a list of functions to add all Controllers to the Manager
var AddToManagerFuncs []func(manager.Manager) error

// AddToManager adds all Controllers to the Manager
func AddToManager(m manager.Manager) error {
for _, f := range AddToManagerFuncs {
if err := f(m); err != nil {
return err
}
}
return nil
}
185 changes: 185 additions & 0 deletions pkg/controller/machinehealthcheck/machinehealthcheck_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
package machinehealthcheck

import (
"context"

"github.com/golang/glog"
healthcheckingv1alpha1 "github.com/openshift/machine-api-operator/pkg/apis/healthchecking/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/cache"
capiv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
)

const (
machineAnnotationKey = "machine"
)

// Add creates a new MachineHealthCheck Controller and adds it to the Manager. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
return add(mgr, newReconciler(mgr))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
return &ReconcileMachineHealthCheck{client: mgr.GetClient(), scheme: mgr.GetScheme()}
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Create a new controller
c, err := controller.New("machinehealthcheck-controller", mgr, controller.Options{Reconciler: r})
if err != nil {
return err
}

err = c.Watch(&source.Kind{Type: &corev1.Node{}}, &handler.EnqueueRequestForObject{})
if err != nil {
return err
}

return nil
}

var _ reconcile.Reconciler = &ReconcileMachineHealthCheck{}

// ReconcileMachineHealthCheck reconciles a MachineHealthCheck object
type ReconcileMachineHealthCheck struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
client client.Client
scheme *runtime.Scheme
}

// Reconcile reads that state of the cluster for MachineHealthCheck, machine and nodes objects and makes changes based on the state read
// and what is in the MachineHealthCheck.Spec
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
func (r *ReconcileMachineHealthCheck) Reconcile(request reconcile.Request) (reconcile.Result, error) {
glog.Infof("Reconciling MachineHealthCheck triggered by %s/%s\n", request.Namespace, request.Name)

node := &corev1.Node{}
err := r.client.Get(context.TODO(), request.NamespacedName, node)
glog.V(4).Infof("Reconciling, getting node %v", node.Name)
if err != nil {
if errors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}

if machineKey, ok := node.Annotations[machineAnnotationKey]; ok {
glog.Infof("Node %s is annotated for machine %s", node.Name, machineKey)
machine := &capiv1.Machine{}
namespace, machineName, err := cache.SplitMetaNamespaceKey(machineKey)
if err != nil {
return reconcile.Result{}, err
}
key := &types.NamespacedName{
Namespace: namespace,
Name: machineName,
}

err = r.client.Get(context.TODO(), *key, machine)
if err != nil {
if errors.IsNotFound(err) {
glog.Warning("machine %s not found", machineKey)
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
glog.Errorf("error getting machine %s, requeuing", machineKey)
return reconcile.Result{}, err
}

// If the current machine matches any existing MachineHealthCheck CRD
allMachineHealthChecks := &healthcheckingv1alpha1.MachineHealthCheckList{}
err = r.client.List(context.Background(), getMachineHealthCheckListOptions(), allMachineHealthChecks)
if err != nil {
glog.Errorf("failed to list MachineHealthChecks, %v", err)
return reconcile.Result{}, err
}

for _, hc := range allMachineHealthChecks.Items {
if hasMatchingLabels(&hc, machine) {
glog.V(4).Infof("Machine %s has a matching machineHealthCheck: %s", machineKey, hc.Name)
remediate(node)
}
}
}

return reconcile.Result{}, nil
}

// This set so the fake client can be used for unit test. See:
// https://github.com/kubernetes-sigs/controller-runtime/issues/168
func getMachineHealthCheckListOptions() *client.ListOptions {
return &client.ListOptions{
Raw: &metav1.ListOptions{
TypeMeta: metav1.TypeMeta{
APIVersion: "healthchecking.openshift.io/v1alpha1",
Kind: "MachineHealthCheck",
},
},
}
}

func remediate(node *corev1.Node) {
// TODO(alberto): implement Remediate logic via hash or CRD
if !isHealthy(node) {
}
return
}

func isHealthy(node *corev1.Node) bool {
nodeReady := getNodeCondition(node, corev1.NodeReady)
if nodeReady.Status != corev1.ConditionTrue {
return false
}
return true
}

func getNodeCondition(node *corev1.Node, conditionType corev1.NodeConditionType) *corev1.NodeCondition {
for _, c := range node.Status.Conditions {
if c.Type == conditionType {
return &c
}
}
return nil
}

func hasMatchingLabels(machineHealthCheck *healthcheckingv1alpha1.MachineHealthCheck, machine *capiv1.Machine) bool {
selector, err := metav1.LabelSelectorAsSelector(&machineHealthCheck.Spec.Selector)
if err != nil {
glog.Warningf("unable to convert selector: %v", err)
return false
}
// If a deployment with a nil or empty selector creeps in, it should match nothing, not everything.
if selector.Empty() {
glog.V(2).Infof("%v machineHealthCheck has empty selector", machineHealthCheck.Name)
return false
}
if !selector.Matches(labels.Set(machine.Labels)) {
glog.V(4).Infof("%v machine has mismatch labels", machine.Name)
return false
}
return true
}
Loading

0 comments on commit 7c12b59

Please sign in to comment.