diff --git a/Makefile b/Makefile index fbcc9ab02b..51c606da2f 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,11 @@ nodelink-controller: @echo -e "\033[32mBuilding node link controller binary...\033[0m" $(DOCKER_CMD) go build $(GOGCFLAGS) -o bin/nodelink-controller github.com/openshift/machine-api-operator/cmd/nodelink-controller +.PHONY: machine-healthcheck +machine-healthcheck: + @echo -e "\033[32mBuilding machine healthcheck binary...\033[0m" + $(DOCKER_CMD) go build $(GOGCFLAGS) -o bin/machine-healthcheck github.com/openshift/machine-api-operator/cmd/machine-healthcheck + .PHONY: build-e2e build-e2e: ## Build end-to-end test binary @echo -e "\033[32mBuilding e2e test binary...\033[0m" diff --git a/cmd/machine-healthcheck/main.go b/cmd/machine-healthcheck/main.go new file mode 100644 index 0000000000..819f904311 --- /dev/null +++ b/cmd/machine-healthcheck/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "flag" + "runtime" + + "github.com/golang/glog" + "github.com/openshift/machine-api-operator/pkg/apis" + "github.com/openshift/machine-api-operator/pkg/controller" + sdkVersion "github.com/operator-framework/operator-sdk/version" + _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" + "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/runtime/signals" +) + +func printVersion() { + glog.Infof("Go Version: %s", runtime.Version()) + glog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH) + glog.Infof("operator-sdk Version: %v", sdkVersion.Version) +} + +func main() { + flag.Parse() + printVersion() + + // Get a config to talk to the apiserver + cfg, err := config.GetConfig() + if err != nil { + glog.Fatal(err) + } + + // Create a new Cmd to provide shared dependencies and start components + mgr, err := manager.New(cfg, manager.Options{}) + if err != nil { + glog.Fatal(err) + } + + glog.Infof("Registering Components.") + + // Setup Scheme for all resources + if err := apis.AddToScheme(mgr.GetScheme()); err != nil { + glog.Fatal(err) + } + + // Setup all Controllers + if err := controller.AddToManager(mgr); err != nil { + glog.Fatal(err) + } + + glog.Info("Starting the Cmd.") + + // Start the Cmd + glog.Fatal(mgr.Start(signals.SetupSignalHandler())) +} diff --git a/pkg/controller/add_machinehealthcheck.go b/pkg/controller/add_machinehealthcheck.go new file mode 100644 index 0000000000..7ea6c097c2 --- /dev/null +++ b/pkg/controller/add_machinehealthcheck.go @@ -0,0 +1,10 @@ +package controller + +import ( + "github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck" +) + +func init() { + // AddToManagerFuncs is a list of functions to create controllers and add them to a manager. + AddToManagerFuncs = append(AddToManagerFuncs, machinehealthcheck.Add) +} diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go new file mode 100644 index 0000000000..7c069f3ee6 --- /dev/null +++ b/pkg/controller/controller.go @@ -0,0 +1,18 @@ +package controller + +import ( + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// AddToManagerFuncs is a list of functions to add all Controllers to the Manager +var AddToManagerFuncs []func(manager.Manager) error + +// AddToManager adds all Controllers to the Manager +func AddToManager(m manager.Manager) error { + for _, f := range AddToManagerFuncs { + if err := f(m); err != nil { + return err + } + } + return nil +} diff --git a/pkg/controller/machinehealthcheck/machinehealthcheck_controller.go b/pkg/controller/machinehealthcheck/machinehealthcheck_controller.go new file mode 100644 index 0000000000..654b1f6b8c --- /dev/null +++ b/pkg/controller/machinehealthcheck/machinehealthcheck_controller.go @@ -0,0 +1,180 @@ +package machinehealthcheck + +import ( + "context" + + "github.com/golang/glog" + healthcheckingv1alpha1 "github.com/openshift/machine-api-operator/pkg/apis/healthchecking/v1alpha1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" + capiv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + machineAnnotationKey = "machine" +) + +// Add creates a new MachineHealthCheck Controller and adds it to the Manager. The Manager will set fields on the Controller +// and start it when the Manager is started. +func Add(mgr manager.Manager) error { + return add(mgr, newReconciler(mgr)) +} + +// newReconciler returns a new reconcile.Reconciler +func newReconciler(mgr manager.Manager) reconcile.Reconciler { + return &ReconcileMachineHealthCheck{client: mgr.GetClient(), scheme: mgr.GetScheme()} +} + +// add adds a new Controller to mgr with r as the reconcile.Reconciler +func add(mgr manager.Manager, r reconcile.Reconciler) error { + // Create a new controller + c, err := controller.New("machinehealthcheck-controller", mgr, controller.Options{Reconciler: r}) + if err != nil { + return err + } + return c.Watch(&source.Kind{Type: &corev1.Node{}}, &handler.EnqueueRequestForObject{}) +} + +var _ reconcile.Reconciler = &ReconcileMachineHealthCheck{} + +// ReconcileMachineHealthCheck reconciles a MachineHealthCheck object +type ReconcileMachineHealthCheck struct { + // This client, initialized using mgr.Client() above, is a split client + // that reads objects from the cache and writes to the apiserver + client client.Client + scheme *runtime.Scheme +} + +// Reconcile reads that state of the cluster for MachineHealthCheck, machine and nodes objects and makes changes based on the state read +// and what is in the MachineHealthCheck.Spec +// Note: +// The Controller will requeue the Request to be processed again if the returned error is non-nil or +// Result.Requeue is true, otherwise upon completion it will remove the work from the queue. +func (r *ReconcileMachineHealthCheck) Reconcile(request reconcile.Request) (reconcile.Result, error) { + glog.Infof("Reconciling MachineHealthCheck triggered by %s/%s\n", request.Namespace, request.Name) + + node := &corev1.Node{} + err := r.client.Get(context.TODO(), request.NamespacedName, node) + glog.V(4).Infof("Reconciling, getting node %v", node.Name) + if err != nil { + if errors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. + // Return and don't requeue + return reconcile.Result{}, nil + } + // Error reading the object - requeue the request. + return reconcile.Result{}, err + } + + machineKey, ok := node.Annotations[machineAnnotationKey] + if !ok { + glog.Infof("No machine annotation for node %s", node.Name) + return reconcile.Result{}, nil + } + + glog.Infof("Node %s is annotated for machine %s", node.Name, machineKey) + machine := &capiv1.Machine{} + namespace, machineName, err := cache.SplitMetaNamespaceKey(machineKey) + if err != nil { + return reconcile.Result{}, err + } + key := &types.NamespacedName{ + Namespace: namespace, + Name: machineName, + } + + err = r.client.Get(context.TODO(), *key, machine) + if err != nil { + if errors.IsNotFound(err) { + glog.Warning("machine %s not found", machineKey) + // Request object not found, could have been deleted after reconcile request. + // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. + // Return and don't requeue + return reconcile.Result{}, nil + } + // Error reading the object - requeue the request. + glog.Errorf("error getting machine %s, requeuing", machineKey) + return reconcile.Result{}, err + } + + // If the current machine matches any existing MachineHealthCheck CRD + allMachineHealthChecks := &healthcheckingv1alpha1.MachineHealthCheckList{} + err = r.client.List(context.Background(), getMachineHealthCheckListOptions(), allMachineHealthChecks) + if err != nil { + glog.Errorf("failed to list MachineHealthChecks, %v", err) + return reconcile.Result{}, err + } + + for _, hc := range allMachineHealthChecks.Items { + if hasMatchingLabels(&hc, machine) { + glog.V(4).Infof("Machine %s has a matching machineHealthCheck: %s", machineKey, hc.Name) + remediate(node) + } + } + + return reconcile.Result{}, nil +} + +// This set so the fake client can be used for unit test. See: +// https://github.com/kubernetes-sigs/controller-runtime/issues/168 +func getMachineHealthCheckListOptions() *client.ListOptions { + return &client.ListOptions{ + Raw: &metav1.ListOptions{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "healthchecking.openshift.io/v1alpha1", + Kind: "MachineHealthCheck", + }, + }, + } +} + +func remediate(node *corev1.Node) { + // TODO(alberto): implement Remediate logic via hash or CRD + if !isHealthy(node) { + } + return +} + +func isHealthy(node *corev1.Node) bool { + nodeReady := getNodeCondition(node, corev1.NodeReady) + return nodeReady.Status == corev1.ConditionTrue +} + +func getNodeCondition(node *corev1.Node, conditionType corev1.NodeConditionType) *corev1.NodeCondition { + for _, c := range node.Status.Conditions { + if c.Type == conditionType { + return &c + } + } + return nil +} + +func hasMatchingLabels(machineHealthCheck *healthcheckingv1alpha1.MachineHealthCheck, machine *capiv1.Machine) bool { + selector, err := metav1.LabelSelectorAsSelector(&machineHealthCheck.Spec.Selector) + if err != nil { + glog.Warningf("unable to convert selector: %v", err) + return false + } + // If a deployment with a nil or empty selector creeps in, it should match nothing, not everything. + if selector.Empty() { + glog.V(2).Infof("%v machineHealthCheck has empty selector", machineHealthCheck.Name) + return false + } + if !selector.Matches(labels.Set(machine.Labels)) { + glog.V(4).Infof("%v machine has mismatch labels", machine.Name) + return false + } + return true +} diff --git a/pkg/controller/machinehealthcheck/machinehealthcheck_controller_test.go b/pkg/controller/machinehealthcheck/machinehealthcheck_controller_test.go new file mode 100644 index 0000000000..e8bf9e1e70 --- /dev/null +++ b/pkg/controller/machinehealthcheck/machinehealthcheck_controller_test.go @@ -0,0 +1,283 @@ +package machinehealthcheck + +import ( + "fmt" + healthcheckingapis "github.com/openshift/machine-api-operator/pkg/apis" + healthcheckingv1alpha1 "github.com/openshift/machine-api-operator/pkg/apis/healthchecking/v1alpha1" + "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "reflect" + capiv1alpha1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "testing" +) + +const ( + namespace = "openshift-cluster-api" +) + +func init() { + // Add types to scheme + capiv1alpha1.AddToScheme(scheme.Scheme) + healthcheckingapis.AddToScheme(scheme.Scheme) +} + +func node(name string, ready bool) *v1.Node { + nodeReadyStatus := corev1.ConditionTrue + if !ready { + nodeReadyStatus = corev1.ConditionFalse + } + + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: metav1.NamespaceNone, + Annotations: map[string]string{ + "machine": fmt.Sprintf("%s/%s", namespace, "fakeMachine"), + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Node", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: nodeReadyStatus, + }, + }, + }, + } +} + +func machine(name string) *capiv1alpha1.Machine { + return &capiv1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + "foo": "a", + "bar": "b", + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + }, + Spec: capiv1alpha1.MachineSpec{}, + } +} + +func machineHealthCheck(name string) *healthcheckingv1alpha1.MachineHealthCheck { + return &healthcheckingv1alpha1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "MachineHealthCheck", + }, + Spec: healthcheckingv1alpha1.MachineHealthCheckSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "foo": "a", + "bar": "b", + }, + }, + }, + Status: healthcheckingv1alpha1.MachineHealthCheckStatus{}, + } +} + +func TestHasMatchingLabels(t *testing.T) { + machine := machine("machine") + testsCases := []struct { + machine *capiv1alpha1.Machine + machineHealthCheck *healthcheckingv1alpha1.MachineHealthCheck + expected bool + }{ + { + machine: machine, + machineHealthCheck: &healthcheckingv1alpha1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Name: "MatchingLabels", + Namespace: namespace, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "MachineHealthCheck", + }, + Spec: healthcheckingv1alpha1.MachineHealthCheckSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "foo": "a", + "bar": "b", + }, + }, + }, + Status: healthcheckingv1alpha1.MachineHealthCheckStatus{}, + }, + expected: true, + }, + { + machine: machine, + machineHealthCheck: &healthcheckingv1alpha1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Name: "NoMatchingLabels", + Namespace: namespace, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "MachineHealthCheck", + }, + Spec: healthcheckingv1alpha1.MachineHealthCheckSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "no": "match", + }, + }, + }, + Status: healthcheckingv1alpha1.MachineHealthCheckStatus{}, + }, + expected: false, + }, + } + + for _, tc := range testsCases { + if got := hasMatchingLabels(tc.machineHealthCheck, tc.machine); got != tc.expected { + t.Errorf("Expected %t, got %t", tc.expected, got) + } + } +} + +func TestIsHealthy(t *testing.T) { + nodeHealthy := node("healthy", true) + nodeUnhealthy := node("unhealthy", false) + + if health := isHealthy(nodeHealthy); !health { + t.Errorf("Expected true, got %t", health) + } + if health := isHealthy(nodeUnhealthy); health { + t.Errorf("Expected false, got %t", health) + } +} + +func TestGetNodeCondition(t *testing.T) { + testsCases := []struct { + node *corev1.Node + condition *corev1.NodeCondition + expected *corev1.NodeCondition + }{ + { + node: node("hasCondition", true), + condition: &corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + expected: &corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + { + node: node("doesNotHaveCondition", true), + condition: &corev1.NodeCondition{ + Type: corev1.NodeOutOfDisk, + Status: corev1.ConditionTrue, + }, + expected: nil, + }, + } + + for _, tc := range testsCases { + got := getNodeCondition(tc.node, tc.condition.Type) + if !reflect.DeepEqual(got, tc.expected) { + t.Errorf("Expected %v, got %v", tc.expected, got) + } + } + +} + +type expectedReconcile struct { + result reconcile.Result + error bool +} + +func TestReconcile(t *testing.T) { + nodeHealthy := node("healthy", true) + nodeUnhealthy := node("unhealthy", false) + nodeWithNoMachineAnnotation := node("noAnnotated", true) + nodeWithNoMachineAnnotation.Annotations = map[string]string{} + nodeAnnotatedWithNoExistentMachine := node("noExistentMachine", true) + nodeAnnotatedWithNoExistentMachine.Annotations[machineAnnotationKey] = "noExistentMachine" + fakeMachine := machine("fakeMachine") + + testsCases := []struct { + node *v1.Node + expected expectedReconcile + }{ + { + node: nodeHealthy, + expected: expectedReconcile{ + result: reconcile.Result{}, + error: false, + }, + }, + { + node: nodeUnhealthy, + expected: expectedReconcile{ + result: reconcile.Result{}, + error: false, + }, + }, + { + node: nodeWithNoMachineAnnotation, + expected: expectedReconcile{ + result: reconcile.Result{}, + error: false, + }, + }, + { + node: nodeAnnotatedWithNoExistentMachine, + expected: expectedReconcile{ + result: reconcile.Result{}, + error: false, + }, + }, + } + + machineHealthCheck := machineHealthCheck("machineHealthCheck") + allMachineHealthChecks := &healthcheckingv1alpha1.MachineHealthCheckList{ + Items: []healthcheckingv1alpha1.MachineHealthCheck{ + *machineHealthCheck, + }, + } + + r := newFakeReconciler(nodeHealthy, nodeUnhealthy, fakeMachine, allMachineHealthChecks) + for _, tc := range testsCases { + request := reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: "", + Name: tc.node.Name, + }, + } + result, err := r.Reconcile(request) + if result != tc.expected.result { + t.Errorf("Expected %v, got: %v", tc.expected.result, result) + } + if tc.expected.error != (err != nil) { + t.Errorf("Expected error, got %v", err) + } + } +} + +// newFakeReconciler returns a new reconcile.Reconciler with a fake client +func newFakeReconciler(initObjects ...runtime.Object) reconcile.Reconciler { + fakeClient := fake.NewFakeClient(initObjects...) + return &ReconcileMachineHealthCheck{ + client: fakeClient, + scheme: scheme.Scheme, + } +}