Skip to content

Commit

Permalink
Render osImageURL, handle "bootstrap" case in MCD
Browse files Browse the repository at this point in the history
Have the MCC take `osImageURL` as provided by the cluster update/release payload
and generate a `00-{master,worker}-osimageurl` MC from it, which ensures
the MCD will update the node to it.

However, we need special handling for the *initial* case where we boot
into a target config, but we may be using an old OS image.  Currently
the MCD would treat this as "config drift" and go degraded.

Today we write the node annotations to a file in `/etc` as part of the
rendered Ignition.  Use that as a "bootstrap may be required" flag,
and handle it specially - if we need to pivot, do *just* that and
reboot.

We also clean things up by unlinking that node annotation file; after
that, if the `osImageURL` drifts from the expected config, we'll go
degraded, just like if someone modified a file.

Closes: openshift#183
  • Loading branch information
cgwalters committed Jan 21, 2019
1 parent aa7f288 commit f9054b9
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 52 deletions.
63 changes: 53 additions & 10 deletions pkg/controller/template/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,16 @@ func generateMachineConfigs(config *RenderConfig, templateDir string) ([]*mcfgv1
return cfgs, nil
}

// GenerateMachineConfigsForRole is part of generateMachineConfigs; it operates
// on a specific role which has a set of builtin templates.
func GenerateMachineConfigsForRole(config *RenderConfig, role string, path string) ([]*mcfgv1.MachineConfig, error) {
cfgs := []*mcfgv1.MachineConfig{}

// Add our built-in templates
infos, err := ioutil.ReadDir(path)
if err != nil {
return nil, fmt.Errorf("failed to read dir %q: %v", path, err)
}
// for each role a machine config is created containing the sshauthorized keys to allow for ssh access
// ex: role = worker -> machine config "00-worker-ssh" created containing user core and ssh key
var tempIgnConfig ignv2_2types.Config
tempUser := ignv2_2types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ignv2_2types.SSHAuthorizedKey{ignv2_2types.SSHAuthorizedKey(config.SSHKey)}}
tempIgnConfig.Passwd.Users = append(tempIgnConfig.Passwd.Users, tempUser)
sshConfigName := "00-" + role + "-ssh"
sshMachineConfigForRole := MachineConfigFromIgnConfig(role, sshConfigName, &tempIgnConfig)

cfgs := []*mcfgv1.MachineConfig{}
cfgs = append(cfgs, sshMachineConfigForRole)

for _, info := range infos {
if !info.IsDir() {
Expand All @@ -121,9 +116,57 @@ func GenerateMachineConfigsForRole(config *RenderConfig, role string, path strin
cfgs = append(cfgs, nameConfig)
}

// And derived configs
derivedCfgs, err := generateDerivedMachineConfigs(config, role)
if err != nil {
return nil, err
}
cfgs = append(cfgs, derivedCfgs...)

return cfgs, nil
}

// machineConfigForOSImageURL generates a MC fragment that just includes the target OSImageURL.
func machineConfigForOSImageURL(role string, url string) *mcfgv1.MachineConfig {
labels := map[string]string{
machineConfigRoleLabelKey: role,
}
return &mcfgv1.MachineConfig{
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
Name: "00-" + role + "-osimageurl",
},
Spec: mcfgv1.MachineConfigSpec{
OSImageURL: url,
},
}
}

// generateDerivedMachineConfigs is part of generateMachineConfigsForRole. It
// takes care of generating MachineConfig objects which are derived from other
// components of the cluster configuration. Currently, that's:
//
// - SSH keys from the install configuration
// - OSImageURL from the machine-config-osimageurl configmap (which comes from the CVO)
func generateDerivedMachineConfigs(config *RenderConfig, role string) ([]*mcfgv1.MachineConfig, error) {
cfgs := []*mcfgv1.MachineConfig{}

// for each role a machine config is created containing the sshauthorized keys to allow for ssh access
// ex: role = worker -> machine config "00-worker-ssh" created containing user core and ssh key
var tempIgnConfig ignv2_2types.Config
tempUser := ignv2_2types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ignv2_2types.SSHAuthorizedKey{ignv2_2types.SSHAuthorizedKey(config.SSHKey)}}
tempIgnConfig.Passwd.Users = append(tempIgnConfig.Passwd.Users, tempUser)
sshConfigName := "00-" + role + "-ssh"
cfgs = append(cfgs, MachineConfigFromIgnConfig(role, sshConfigName, &tempIgnConfig))

if config.OSImageURL != "" {
cfgs = append(cfgs, machineConfigForOSImageURL(role, config.OSImageURL))
}

return cfgs, nil
}

// generateMachineConfigForName is part of the implementation of generateMachineConfigsForRole
func generateMachineConfigForName(config *RenderConfig, role, name, path string) (*mcfgv1.MachineConfig, error) {
platformDirs := []string{}
for _, dir := range []string{"_base", config.Platform} {
Expand Down
47 changes: 37 additions & 10 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,21 +357,25 @@ func (dn *Daemon) EnterDegradedState(err error) {
//
// If any of the object names are the same, they will be pointer-equal.
type stateAndConfigs struct {
bootstrapping bool
state string
currentConfig *mcfgv1.MachineConfig
pendingConfig *mcfgv1.MachineConfig
desiredConfig *mcfgv1.MachineConfig
}

func (dn *Daemon) getStateAndConfigs(pendingConfigName string) (*stateAndConfigs, error) {
state, err := getNodeAnnotationExt(dn.kubeClient.CoreV1().Nodes(), dn.name, MachineConfigDaemonStateAnnotationKey, true)
_, err := os.Lstat(InitialNodeAnnotationsFilePath)
bootstrapping := false
if err != nil {
if os.IsNotExist(err) {
// The node annotation file (laid down by the MCS)
// doesn't exist, we must not be bootstrapping
}
return nil, err
}
// Temporary hack: the MCS used to not write the state=done annotation
// key. If it's unset, let's write it now.
if state == "" {
state = MachineConfigDaemonStateDone
} else {
bootstrapping = true
glog.Infof("In bootstrap mode")
}

currentConfigName, err := getNodeAnnotation(dn.kubeClient.CoreV1().Nodes(), dn.name, CurrentMachineConfigAnnotationKey)
Expand All @@ -386,6 +390,16 @@ func (dn *Daemon) getStateAndConfigs(pendingConfigName string) (*stateAndConfigs
if err != nil {
return nil, err
}
state, err := getNodeAnnotationExt(dn.kubeClient.CoreV1().Nodes(), dn.name, MachineConfigDaemonStateAnnotationKey, true)
if err != nil {
return nil, err
}
// Temporary hack: the MCS used to not write the state=done annotation
// key. If it's unset, let's write it now.
if state == "" {
state = MachineConfigDaemonStateDone
}

var desiredConfig *mcfgv1.MachineConfig
if currentConfigName == desiredConfigName {
desiredConfig = currentConfig
Expand Down Expand Up @@ -415,6 +429,7 @@ func (dn *Daemon) getStateAndConfigs(pendingConfigName string) (*stateAndConfigs
}

return &stateAndConfigs{
bootstrapping: bootstrapping,
currentConfig: currentConfig,
pendingConfig: pendingConfig,
desiredConfig: desiredConfig,
Expand Down Expand Up @@ -540,6 +555,22 @@ func (dn *Daemon) CheckStateOnBoot() error {
select {}
}

if state.bootstrapping {
if !dn.checkOS(state.currentConfig.Spec.OSImageURL) {
glog.Infof("Bootstrap pivot required")
// This only returns on error
return dn.updateOSAndReboot(state.currentConfig)
} else {
glog.Infof("No bootstrap pivot required; unlinking bootstrap node annotations")
// Delete the bootstrap node annotations; the
// currentConfig's osImageURL should now be *truth*.
// In other words if it drifts somehow, we go degraded.
if err := os.Remove(InitialNodeAnnotationsFilePath); err != nil {
return errors.Wrapf(err, "Removing initial node annotations file")
}
}
}

// Validate the on-disk state against what we *expect*.
//
// In the case where we're booting a node for the first time, or the MCD
Expand Down Expand Up @@ -742,10 +773,6 @@ func (dn *Daemon) completeUpdate(desiredConfigName string) error {
// triggerUpdateWithMachineConfig starts the update. It queries the cluster for
// the current and desired config if they weren't passed.
func (dn *Daemon) triggerUpdateWithMachineConfig(currentConfig *mcfgv1.MachineConfig, desiredConfig *mcfgv1.MachineConfig) error {
if err := dn.nodeWriter.SetUpdateWorking(dn.kubeClient.CoreV1().Nodes(), dn.name); err != nil {
return err
}

if currentConfig == nil {
ccAnnotation, err := getNodeAnnotation(dn.kubeClient.CoreV1().Nodes(), dn.name, CurrentMachineConfigAnnotationKey)
if err != nil {
Expand Down
74 changes: 43 additions & 31 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,41 +65,16 @@ func (dn *Daemon) writePendingState(desiredConfig *mcfgv1.MachineConfig) error {
return replaceFileContentsAtomically(pathStateJSON, b)
}

// update the node to the provided node configuration.
func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) error {
// updateOSAndReboot is the last step in an update(), and it can also
// be called as a special case for the "bootstrap pivot".
func (dn *Daemon) updateOSAndReboot(newConfig *mcfgv1.MachineConfig) error {
var err error

oldConfigName := oldConfig.GetName()
newConfigName := newConfig.GetName()
glog.Infof("Checking reconcilable for config %v to %v", oldConfigName, newConfigName)
// make sure we can actually reconcile this state
reconcilableError := dn.reconcilable(oldConfig, newConfig)

if reconcilableError != nil {
msg := fmt.Sprintf("Can't reconcile config %v with %v: %v", oldConfigName, newConfigName, *reconcilableError)
if dn.recorder != nil {
dn.recorder.Eventf(newConfig, corev1.EventTypeWarning, "FailedToReconcile", msg)
}
dn.logSystem(msg)
return fmt.Errorf("%s", msg)
}

// update files on disk that need updating
if err = dn.updateFiles(oldConfig, newConfig); err != nil {
return err
}

if err = dn.updateOS(newConfig); err != nil {
return err
}

if err = dn.updateSSHKeys(newConfig.Spec.Config.Passwd.Users); err != nil {
return err
}

// TODO: Change the logic to be clearer
// We need to skip draining of the node when we are running once
// and there is no cluster.
// Skip draining of the node when we're not cluster driven
if dn.onceFrom == "" {
glog.Info("Update prepared; draining the node")

Expand All @@ -122,12 +97,49 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) error {
glog.V(2).Infof("Node successfully drained")
}

// reboot. this function shouldn't actually return.
return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfig.GetName()))
}

// update the node to the provided node configuration.
func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) error {
var err error

if dn.nodeWriter != nil {
if err = dn.nodeWriter.SetUpdateWorking(dn.kubeClient.CoreV1().Nodes(), dn.name); err != nil {
return err
}
}

oldConfigName := oldConfig.GetName()
newConfigName := newConfig.GetName()
glog.Infof("Checking reconcilable for config %v to %v", oldConfigName, newConfigName)
// make sure we can actually reconcile this state
reconcilableError := dn.reconcilable(oldConfig, newConfig)

if reconcilableError != nil {
msg := fmt.Sprintf("Can't reconcile config %v with %v: %v", oldConfigName, newConfigName, *reconcilableError)
if dn.recorder != nil {
dn.recorder.Eventf(newConfig, corev1.EventTypeWarning, "FailedToReconcile", msg)
}
dn.logSystem(msg)
return fmt.Errorf("%s", msg)
}

// update files on disk that need updating
if err = dn.updateFiles(oldConfig, newConfig); err != nil {
return err
}

if err = dn.updateSSHKeys(newConfig.Spec.Config.Passwd.Users); err != nil {
return err
}

if err = dn.writePendingState(newConfig); err != nil {
return errors.Wrapf(err, "writing pending state")
}

// reboot. this function shouldn't actually return.
return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfigName))
return dn.updateOSAndReboot(newConfig)
}

// reconcilable checks the configs to make sure that the only changes requested
Expand Down
4 changes: 4 additions & 0 deletions test/e2e/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import (
"testing"
)

const (
namespace = "openshift-machine-config-operator"
)

func TestMain(m *testing.M) {
os.Exit(m.Run())
}
60 changes: 59 additions & 1 deletion test/e2e/sanity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,16 @@ import (
"k8s.io/client-go/informers"
"k8s.io/apimachinery/pkg/labels"

mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
"github.com/openshift/machine-config-operator/cmd/common"
"github.com/openshift/machine-config-operator/pkg/daemon"
)


var (
controllerKind = mcfgv1.SchemeGroupVersion.WithKind("MachineConfigPool")
)

// Test case for https://github.com/openshift/machine-config-operator/pull/288/commits/44d5c5215b5450fca32806f796b50a3372daddc2
func TestOperatorLabel(t *testing.T) {
cb, err := common.NewClientBuilder("")
Expand All @@ -22,7 +28,7 @@ func TestOperatorLabel(t *testing.T) {
}
k := cb.KubeClientOrDie("sanity-test")

d, err := k.AppsV1().DaemonSets("openshift-machine-config-operator").Get("machine-config-daemon", metav1.GetOptions{})
d, err := k.AppsV1().DaemonSets(namespace).Get("machine-config-daemon", metav1.GetOptions{})
if err != nil {
t.Errorf("%#v", err)
}
Expand Down Expand Up @@ -73,3 +79,55 @@ func TestNoDegraded(t *testing.T) {
t.Errorf("%d degraded nodes found", len(degraded))
}
}

// func getRenderedMachineConfigs(mcfgs []mcfgv1.MachineConfig) ([]*mcfgv1.MachineConfig, []*mcfgv1.MachineConfig) {
// var masters []*mcfgv1.MachineConfig
// var workers []*mcfgv1.MachineConfig
// for _, mcfg := range mcfgs {
// if controllerRef := metav1.GetControllerOf(&mcfg); controllerRef != nil {
// if controllerRef.Kind != controllerKind.Kind {
// continue
// }
// if strings.HasPrefix(mcfg.Name, "master-") {
// masters = append(masters, &mcfg)
// } else if strings.HasPrefix(mcfg.Name, "worker-") {
// workers = append(workers, &mcfg)
// }
// }
// }
// return masters, workers
// }

func TestMachineConfigsOSImageURL(t *testing.T) {
cb, err := common.NewClientBuilder("")
if err != nil{
t.Fatalf("%#v", err)
}
mcClient := cb.MachineConfigClientOrDie("mc-test")

masterMCP, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get("master", metav1.GetOptions{})
if err != nil {
t.Fatalf("Getting master MCP: %v", err)
}
workerMCP, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get("worker", metav1.GetOptions{})
if err != nil {
t.Fatalf("Getting worker MCP: %v", err)
}

masterMC, err := mcClient.MachineconfigurationV1().MachineConfigs().Get(masterMCP.Status.Configuration.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Getting MC: %v", err)
}
if masterMC.Spec.OSImageURL == "" {
t.Fatalf("master has no OSImageURL")
}

workerMC, err := mcClient.MachineconfigurationV1().MachineConfigs().Get(workerMCP.Status.Configuration.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Getting MC: %v", err)
}
if workerMC.Spec.OSImageURL == "" {
t.Fatalf("master has no OSImageURL")
}
}

0 comments on commit f9054b9

Please sign in to comment.