Skip to content

Commit

Permalink
daemon: Make switchKernel less stateful
Browse files Browse the repository at this point in the history
This is prep for fixing RHEL9 upgrades while maintaining `kernel-rt`.

Previously the `switchKernel` logic tried to carefully handle
all 4 cases (default -> default, default -> rt, rt -> default, rt -> rt).

But, the last one (rt -> rt) was not quite right because
the previous `rpm-ostree rebase` command already preserved the previous
kernel.  In fact it was pretty expensive to do things this way
because we'd e.g. regenerate the initramfs *twice*.

To say this another way: when doing a RHEL9 update, it's actually
the first `rpm-ostree rebase` command which fails before we
even get to `switchKernel`.

And the reason is due to the introduction of a new `-core` subpackage;
xref https://issues.redhat.com/browse/OCPBUGS-8113

So here's the new logic to handle this:

- Before we do the `rebase` operation to the new OS, we detect
  any previous overrides of any packages starting with `kernel-rt`
  and we remove them.  Notably this avoids hardcoding any specific
  kernel subpackages; we just remove *everything* starting with
  `kernel-rt` which should be more robust to subpackage changes
  in the future.
- Consequently the `rebase` operation will hence start out by deploying the
  stock image i.e. with throughput kernel (though note we *are*
  carefully preserving other local overrides)
- The `switchKernel` function now longer needs to take the *previous*
  machineconfig state into account (except for logging).
  Instead, we just detect if the target is RT, and if so we then we
  apply the latest packages.

This significantly simplifies the logic in `switchKernel`, and will
help fix RHEL9 upgrades.
  • Loading branch information
cgwalters committed Mar 8, 2023
1 parent cae67a6 commit 8ac5bee
Showing 1 changed file with 68 additions and 26 deletions.
94 changes: 68 additions & 26 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1161,8 +1161,9 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig)
oldKtype := canonicalizeKernelType(oldConfig.Spec.KernelType)
newKtype := canonicalizeKernelType(newConfig.Spec.KernelType)

// Do nothing if both old and new KernelType are of type default
if oldKtype == ctrlcommon.KernelTypeDefault && newKtype == ctrlcommon.KernelTypeDefault {
// In the OS update path, we removed overrides for kernel-rt. So if the target (new) config
// is also default (i.e. throughput) then we have nothing to do.
if newKtype == ctrlcommon.KernelTypeDefault {
return nil
}

Expand All @@ -1173,41 +1174,23 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig)
// kernel-rt also has a split off kernel-rt-kvm subpackage because it's in a separate subscription in RHEL.
realtimeKernel := []string{"kernel-rt-core", "kernel-rt-modules", "kernel-rt-modules-extra", "kernel-rt-kvm"}

dn.logSystem("Initiating switch from kernel %s to %s", oldKtype, newKtype)

switchingToThroughput := oldKtype == ctrlcommon.KernelTypeRealtime && newKtype == ctrlcommon.KernelTypeDefault
if switchingToThroughput {
args := []string{"override", "reset"}
args = append(args, defaultKernel...)
for _, pkg := range realtimeKernel {
args = append(args, "--uninstall", pkg)
}
dn.logSystem("Switching to kernelType=%s, invoking rpm-ostree %+q", newConfig.Spec.KernelType, args)
return runRpmOstree(args...)
if oldKtype != newKtype {
dn.logSystem("Initiating switch to kernel %s", newKtype)
} else {
dn.logSystem("Re-applying kernel type %s", newKtype)
}

switchingToRealtime := oldKtype == ctrlcommon.KernelTypeDefault && newKtype == ctrlcommon.KernelTypeRealtime
if switchingToRealtime {
if newKtype == ctrlcommon.KernelTypeRealtime {
// Switch to RT kernel
args := []string{"override", "remove"}
args = append(args, defaultKernel...)
for _, pkg := range realtimeKernel {
args = append(args, "--install", pkg)
}

dn.logSystem("Switching to kernelType=%s, invoking rpm-ostree %+q", newConfig.Spec.KernelType, args)
return runRpmOstree(args...)
}

if oldKtype == ctrlcommon.KernelTypeRealtime && newKtype == ctrlcommon.KernelTypeRealtime {
if oldConfig.Spec.OSImageURL != newConfig.Spec.OSImageURL {
args := []string{"update"}
dn.logSystem("Updating rt-kernel packages on host: %+q", args)
return runRpmOstree(args...)
}
}

return nil
return fmt.Errorf("Unhandled kernel type %s", newKtype)
}

// updateFiles writes files specified by the nodeconfig to disk. it also writes
Expand Down Expand Up @@ -1772,6 +1755,56 @@ func (dn *Daemon) InplaceUpdateViaNewContainer(target string) error {
return nil
}

// queueRevertRTKernel undoes the layering of the RT kernel
func (dn *Daemon) queueRevertRTKernel() error {
booted, _, err := dn.NodeUpdaterClient.GetBootedAndStagedDeployment()
if err != nil {
return err
}

// Before we attempt to do an OS update, we must remove the kernel-rt switch
// because in the case of updating from RHEL8 to RHEL9, the kernel packages are
// OS version dependent. See also https://github.com/coreos/rpm-ostree/issues/2542
// (Now really what we want to do here is something more like rpm-ostree override reset --kernel
// i.e. the inverse of https://github.com/coreos/rpm-ostree/pull/4322 so that
// we're again not hardcoding even the prefix of kernel packages)
kernelOverrides := []string{}
kernelRtLayers := []string{}
for _, removal := range booted.RequestedBaseRemovals {
if removal == "kernel" || strings.HasPrefix(removal, "kernel-") {
kernelOverrides = append(kernelOverrides, removal)
}
}
for _, pkg := range booted.RequestedPackages {
if strings.HasPrefix(pkg, "kernel-rt-") {
kernelRtLayers = append(kernelRtLayers, pkg)
}
}
// We *only* do this switch if the node has done a switch from kernel -> kernel-rt.
// We don't want to override any machine-local hotfixes for the kernel package.
// Implicitly in this we don't really support machine-local hotfixes for kernel-rt.
// The only sane way to handle that is declarative drop-ins, but really we want to
// just go to deploying pre-built images and not doing per-node mutation with rpm-ostree
// at all.
if len(kernelOverrides) > 0 && len(kernelRtLayers) > 0 {
args := []string{"override", "reset"}
args = append(args, kernelOverrides...)
for _, pkg := range kernelRtLayers {
args = append(args, "--uninstall", pkg)
}
err := runRpmOstree(args...)
if err != nil {
return err
}
} else if len(kernelOverrides) > 0 || len(kernelRtLayers) > 0 {
glog.Infof("notice: detected %d overrides and %d kernel-rt layers", len(kernelOverrides), len(kernelRtLayers))
} else {
glog.Infof("No kernel overrides or replacement detected")
}

return nil
}

// updateLayeredOS updates the system OS to the one specified in newConfig
func (dn *Daemon) updateLayeredOS(config *mcfgv1.MachineConfig) error {
newURL := config.Spec.OSImageURL
Expand Down Expand Up @@ -2024,6 +2057,15 @@ func (dn *CoreOSDaemon) applyLayeredOSChanges(mcDiff machineConfigDiff, oldConfi
defer os.Remove(extensionsRepo)
}

// If we have an OS update *or* a kernel type change, then we must undo the RT kernel
// enablement.
if mcDiff.osUpdate || mcDiff.kernelType {
if err := dn.queueRevertRTKernel(); err != nil {
mcdPivotErr.Inc()
return err
}
}

// Update OS
if mcDiff.osUpdate {
if err := dn.updateLayeredOS(newConfig); err != nil {
Expand Down

0 comments on commit 8ac5bee

Please sign in to comment.