Skip to content

Commit

Permalink
UPSTREAM: <carry>: openshift: Machine controller: drain node before m…
Browse files Browse the repository at this point in the history
…achine deletion

The node draining code itself is imported from github.com/openshift/kubernetes-drain.

At the same time it's currently impossible to use the controller-runtime client for node draining
due to missing Patch operation (kubernetes-sigs/controller-runtime#235).
Thus, the machine controller needs to initialize kubeclient as well in order to
implement the node draining logic. Once the Patch operation is implemented,
the draining logic can be updated to replace kube client with controller runtime client.

Also, initialize event recorder to generate node draining event.
  • Loading branch information
ingvagabund committed Jun 3, 2019
1 parent 80999f7 commit 78611c8
Show file tree
Hide file tree
Showing 12 changed files with 1,106 additions and 4 deletions.
21 changes: 21 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/controller/machine/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ go_library(
"//pkg/apis/cluster/v1alpha1:go_default_library",
"//pkg/controller/error:go_default_library",
"//pkg/util:go_default_library",
"//vendor/github.com/go-log/log/info:go_default_library",
"//vendor/github.com/openshift/kubernetes-drain:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/client:go_default_library",
"//vendor/sigs.k8s.io/controller-runtime/pkg/controller:go_default_library",
Expand Down
70 changes: 66 additions & 4 deletions pkg/controller/machine/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,19 @@ package machine

import (
"context"
"fmt"
"os"
"time"

"github.com/go-log/log/info"
kubedrain "github.com/openshift/kubernetes-drain"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
clusterv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1"
controllerError "sigs.k8s.io/cluster-api/pkg/controller/error"
Expand All @@ -38,6 +45,9 @@ import (

const (
NodeNameEnvVar = "NODE_NAME"

// ExcludeNodeDrainingAnnotation annotation explicitly skips node draining if set
ExcludeNodeDrainingAnnotation = "cluster.k8s.io/exclude-node-draining"
)

var DefaultActuator Actuator
Expand All @@ -49,10 +59,12 @@ func AddWithActuator(mgr manager.Manager, actuator Actuator) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, actuator Actuator) reconcile.Reconciler {
r := &ReconcileMachine{
Client: mgr.GetClient(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
Client: mgr.GetClient(),
eventRecorder: mgr.GetRecorder("machine-controller"),
config: mgr.GetConfig(),
scheme: mgr.GetScheme(),
nodeName: os.Getenv(NodeNameEnvVar),
actuator: actuator,
}

if r.nodeName == "" {
Expand Down Expand Up @@ -80,8 +92,11 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
// ReconcileMachine reconciles a Machine object
type ReconcileMachine struct {
client.Client
config *rest.Config
scheme *runtime.Scheme

eventRecorder record.EventRecorder

actuator Actuator

// nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME.
Expand Down Expand Up @@ -168,6 +183,18 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
}

klog.Infof("Reconciling machine %q triggers delete", name)

// Drain node before deletion
// If a machine is not linked to a node, just delete the machine. Since a node
// can be unlinked from a machine when the node goes NotReady and is removed
// by cloud controller manager. In that case some machines would never get
// deleted without a manual intervention.
if _, exists := m.ObjectMeta.Annotations[ExcludeNodeDrainingAnnotation]; !exists && m.Status.NodeRef != nil {
if err := r.drainNode(m); err != nil {
return reconcile.Result{}, err
}
}

if err := r.actuator.Delete(ctx, cluster, m); err != nil {
if requeueErr, ok := err.(*controllerError.RequeueAfterError); ok {
klog.Infof("Actuator returned requeue-after error: %v", requeueErr)
Expand Down Expand Up @@ -233,6 +260,41 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
return reconcile.Result{}, nil
}

func (r *ReconcileMachine) drainNode(machine *clusterv1.Machine) error {
kubeClient, err := kubernetes.NewForConfig(r.config)
if err != nil {
return fmt.Errorf("unable to build kube client: %v", err)
}
node, err := kubeClient.CoreV1().Nodes().Get(machine.Status.NodeRef.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("unable to get node %q: %v", machine.Status.NodeRef.Name, err)
}

if err := kubedrain.Drain(
kubeClient,
[]*corev1.Node{node},
&kubedrain.DrainOptions{
Force: true,
IgnoreDaemonsets: true,
DeleteLocalData: true,
GracePeriodSeconds: -1,
Logger: info.New(klog.V(0)),
// If a pod is not evicted in 20 second, retry the eviction next time the
// machine gets reconciled again (to allow other machines to be reconciled)
Timeout: 20 * time.Second,
},
); err != nil {
// Machine still tries to terminate after drain failure
klog.Warningf("drain failed for machine %q: %v", machine.Name, err)
return &controllerError.RequeueAfterError{RequeueAfter: 20 * time.Second}
}

klog.Infof("drain successful for machine %q", machine.Name)
r.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Deleted", "Node %q drained", node.Name)

return nil
}

func (r *ReconcileMachine) getCluster(ctx context.Context, machine *clusterv1.Machine) (*clusterv1.Cluster, error) {
if machine.Labels[clusterv1.MachineClusterLabelName] == "" {
klog.Infof("Machine %q in namespace %q doesn't specify %q label, assuming nil cluster", machine.Name, clusterv1.MachineClusterLabelName, machine.Namespace)
Expand Down
9 changes: 9 additions & 0 deletions vendor/github.com/go-log/log/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions vendor/github.com/go-log/log/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

92 changes: 92 additions & 0 deletions vendor/github.com/go-log/log/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions vendor/github.com/go-log/log/info/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions vendor/github.com/go-log/log/info/info.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions vendor/github.com/go-log/log/log.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 78611c8

Please sign in to comment.