Skip to content

Commit

Permalink
node-controller: Support an annotation to hold/prioritize updates
Browse files Browse the repository at this point in the history
Today the MCO arbitrarily chooses a node to update from the candidates.
We want to allow admins to avoid specific nodes entirely (for as long
as they want) as well as guide upgrade ordering.

This replaces the defunct etcd-specific code with support for a generic
annotation `machineconfiguration.openshift.io/update-order` that allows
an external controller (and/or human) to do both of these.

Setting it to `0` will entirely skip that node for updates.  Otherwise,
higher values are preferred.

Closes: openshift#2059
  • Loading branch information
cgwalters committed Oct 15, 2020
1 parent 86c8766 commit 35ffe71
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 32 deletions.
65 changes: 33 additions & 32 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"encoding/json"
"fmt"
"reflect"
"sort"
"strconv"
"time"

"github.com/golang/glog"
Expand Down Expand Up @@ -866,7 +868,7 @@ func getAllCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*core
}
capacity -= failingThisConfig

return nodes, uint(capacity)
return prioritizeCandidateNodes(nodes), uint(capacity)
}

// getCandidateMachines returns the maximum subset of nodes which can be updated to the target config given availability constraints.
Expand All @@ -878,46 +880,45 @@ func getCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.
return nodes[:capacity]
}

// getCurrentEtcdLeader is not yet implemented
func (ctrl *Controller) getCurrentEtcdLeader(candidates []*corev1.Node) (*corev1.Node, error) {
return nil, nil
type nodeOrdering struct {
nodes []*corev1.Node
order map[*corev1.Node]int
}

// filterControlPlaneCandidateNodes adjusts the candidates and capacity specifically
// for the control plane, e.g. based on which node is the etcd leader at the time.
// nolint:unparam
func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) ([]*corev1.Node, uint, error) {
if len(candidates) <= 1 {
return candidates, capacity, nil
}
etcdLeader, err := ctrl.getCurrentEtcdLeader(candidates)
if err != nil {
glog.Warningf("Failed to find current etcd leader (continuing anyways): %v", err)
}
var newCandidates []*corev1.Node
func (p nodeOrdering) Len() int { return len(p.nodes) }
func (p nodeOrdering) Less(i, j int) bool { return p.order[p.nodes[i]] > p.order[p.nodes[j]] }
func (p nodeOrdering) Swap(i, j int) { p.nodes[i], p.nodes[j] = p.nodes[j], p.nodes[i] }

// prioritizeCandidateNodes chooses the update ordering
func prioritizeCandidateNodes(candidates []*corev1.Node) []*corev1.Node {
var newCandidates nodeOrdering
newCandidates.order = make(map[*corev1.Node]int)
for _, node := range candidates {
if node == etcdLeader {
// For now make this an event so we know it's working, even though it's more of a non-event
ctrl.eventRecorder.Eventf(pool, corev1.EventTypeNormal, "DeferringEtcdLeaderUpdate", "Deferring update of etcd leader %s", node.Name)
glog.Infof("Deferring update of etcd leader: %s", node.Name)
continue
orderVal, ok := node.Annotations[daemonconsts.MachineUpdateOrderingAnnotationKey]
if ok {
order, err := strconv.Atoi(orderVal)
if err != nil {
glog.Warningf("Failed to parse %s %s: %v", node.Name, daemonconsts.MachineUpdateOrderingAnnotationKey, err)
continue
}
// order 0 means "skip this node"
if order == 0 {
continue
}
if order < 0 {
glog.Warningf("Invalid negative ordering %s for node %s", orderVal, node.Name)
continue
}
newCandidates.order[node] = order
}
newCandidates = append(newCandidates, node)
newCandidates.nodes = append(newCandidates.nodes, node)
}
return newCandidates, capacity, nil
sort.Sort(newCandidates)
return newCandidates.nodes
}

// updateCandidateMachines sets the desiredConfig annotation the candidate machines
func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) error {
if pool.Name == masterPoolName {
var err error
candidates, capacity, err = ctrl.filterControlPlaneCandidateNodes(pool, candidates, capacity)
if err != nil {
return err
}
// In practice right now these counts will be 1 but let's stay general to support 5 etcd nodes in the future
ctrl.logPool(pool, "filtered to %d candidate nodes for update, capacity: %d", len(candidates), capacity)
}
if capacity < uint(len(candidates)) {
// Arbitrarily pick the first N candidates; no attempt at sorting.
// Perhaps later we allow admins to weight somehow, or do something more intelligent.
Expand Down
17 changes: 17 additions & 0 deletions pkg/controller/node/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,23 @@ func TestGetCandidateMachines(t *testing.T) {
expected: []string{"node-3", "node-4"},
otherCandidates: []string{"node-5", "node-6"},
capacity: 2,
}, {
// A test with more nodes in mixed order
progress: 4,
nodes: []*corev1.Node{
newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue),
newNodeWithReady("node-1", "v1", "v1", corev1.ConditionFalse),
newNodeWithReady("node-2", "v0", "v1", corev1.ConditionTrue),
newNodeWithReadyAndOrder("node-3", "v0", "v0", 0, corev1.ConditionTrue),
newNodeWithReadyAndOrder("node-4", "v0", "v0", 5, corev1.ConditionTrue),
newNodeWithReadyAndOrder("node-5", "v0", "v0", 10, corev1.ConditionTrue),
newNodeWithReady("node-6", "v0", "v0", corev1.ConditionTrue),
newNodeWithReady("node-7", "v1", "v1", corev1.ConditionTrue),
newNodeWithReady("node-8", "v1", "v1", corev1.ConditionTrue),
},
expected: []string{"node-5", "node-4"},
otherCandidates: []string{"node-6"},
capacity: 2,
}}

for idx, test := range tests {
Expand Down
10 changes: 10 additions & 0 deletions pkg/controller/node/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ func newNodeWithReady(name string, currentConfig, desiredConfig string, status c
return node
}

func newNodeWithReadyAndOrder(name string, currentConfig, desiredConfig string, order int, status corev1.ConditionStatus) *corev1.Node {
node := newNode(name, currentConfig, desiredConfig)
node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}}
if node.Annotations == nil {
node.Annotations = map[string]string{}
}
node.Annotations[daemonconsts.MachineUpdateOrderingAnnotationKey] = fmt.Sprintf("%d", order)
return node
}

func newNodeWithReadyAndDaemonState(name string, currentConfig, desiredConfig string, status corev1.ConditionStatus, dstate string) *corev1.Node {
node := newNode(name, currentConfig, desiredConfig)
node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}}
Expand Down
2 changes: 2 additions & 0 deletions pkg/daemon/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ const (
DesiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig"
// MachineConfigDaemonStateAnnotationKey is used to fetch the state of the daemon on the machine.
MachineConfigDaemonStateAnnotationKey = "machineconfiguration.openshift.io/state"
// MachineUpdateOrderingAnnotationKey is used to specify the desired ordering for an update
MachineUpdateOrderingAnnotationKey = "machineconfiguration.openshift.io/update-order"
// OpenShiftOperatorManagedLabel is used to filter out kube objects that don't need to be synced by the MCO
OpenShiftOperatorManagedLabel = "openshift.io/operator-managed"
// MachineConfigDaemonStateWorking is set by daemon when it is applying an update.
Expand Down

0 comments on commit 35ffe71

Please sign in to comment.