From c8242ba8b4f97665c5ad9f6640666425e72329e2 Mon Sep 17 00:00:00 2001 From: jbtrystram Date: Thu, 4 Jul 2024 17:59:23 +0200 Subject: [PATCH] tests: add a remote kdump test This test setups two machines to test if kdump successfully exports vmcore to a SSH destination. Fixes https://github.com/coreos/fedora-coreos-tracker/issues/1753 --- mantle/kola/tests/ignition/kdump.go | 205 ++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 mantle/kola/tests/ignition/kdump.go diff --git a/mantle/kola/tests/ignition/kdump.go b/mantle/kola/tests/ignition/kdump.go new file mode 100644 index 0000000000..148690a0de --- /dev/null +++ b/mantle/kola/tests/ignition/kdump.go @@ -0,0 +1,205 @@ +package ignition + +import ( + "fmt" + "os" + "strings" + "time" + + "github.com/coreos/coreos-assembler/mantle/kola" + "github.com/coreos/coreos-assembler/mantle/kola/cluster" + "github.com/coreos/coreos-assembler/mantle/kola/register" + "github.com/coreos/coreos-assembler/mantle/platform" + "github.com/coreos/coreos-assembler/mantle/platform/conf" + "github.com/coreos/coreos-assembler/mantle/platform/machine/qemu" + "github.com/coreos/coreos-assembler/mantle/util" +) + +// Test kdump to remote hosts + +func init() { + // Create 0 cluster size to allow starting and setup ssh server as needed for the test + // See: https://github.com/coreos/coreos-assembler/pull/1310#discussion_r401908836 + register.RegisterTest(®ister.Test{ + Run: kdumpSSHTest, + ClusterSize: 0, + Name: `kdump.crash.ssh`, + Description: "Verifies kdump logs are exported to SSH destination", + Flags: []register.Flag{}, + Distros: []string{"rhcos", "fcos"}, + Tags: []string{"kdump", kola.NeedsInternetTag, "reprovision"}, + }) +} + +// TangServer contains fields required to set up a tang server +type SshServer struct { + Machine platform.Machine + MachineAddress string + SSHPort string + PrivSSH string + PubSSH string +} + + +// this simply start a VM and retrieve the SSH thumprint for it +func setupSSHMachine(c cluster.TestCluster) SshServer { + var m platform.Machine + var err error + var address string + var port string + + options := platform.QemuMachineOptions{ + HostForwardPorts: []platform.HostForwardPort{ + {Service: "ssh", HostPort: 0, GuestPort: 22}, + }, + } + + // temp dir to store SSH keys + tmpd, err := os.MkdirTemp("", "kola-kdump-crash-ssh") + if err != nil { + c.Fatalf("Error creating tempdir: %v", err) + } + defer os.RemoveAll(tmpd) + + // generate an ssh key pair we'll use for authentication + pubkeyBuf, privkeyPath, err := util.CreateSSHAuthorizedKey(tmpd) + if err != nil { + c.Fatalf("Error creating ssh keys: %v", err) + } + + // load the private key as well + PrivKeyBuf, err := os.ReadFile(privkeyPath) + if err != nil { + c.Fatalf("error reading pubkey: %v", err) + } + + + // Inject the public key previously created as an + // authorized key + ignition := conf.Ignition(fmt.Sprintf(`{ + "ignition": { "version": "3.4.0" }, + "passwd":{ + "users":[ + { + "name":"core", + "sshAuthorizedKeys":["%s"] + } + ] + } + }`, strings.TrimSpace(string(pubkeyBuf)))) + + + // get machine address + switch pc := c.Cluster.(type) { + // These cases have to be separated because when put together to the same case statement + // the golang compiler no longer checks that the individual types in the case have the + // NewMachineWithQemuOptions function, but rather whether platform.Cluster + // does which fails + case *qemu.Cluster: + m, err = pc.NewMachineWithQemuOptions(ignition, options) + for _, hfp := range options.HostForwardPorts { + if hfp.Service == "ssh" { + address = fmt.Sprintf("10.0.2.2") + port = fmt.Sprintf("%d",hfp.HostPort) + } + } + default: + m, err = pc.NewMachine(ignition) + address = m.IP() + port = "22" + } + if err != nil { + c.Fatal(err) + } + + // FIXME this is a bit ugly + // insert indentation in front of the entries + // to avoid errors in the butane file + var padded = "" + for _, line := range strings.Split(strings.TrimSuffix(string(PrivKeyBuf), "\n"), "\n") { + padded = fmt.Sprintf("%s %s\n", padded, line) + } + + return SshServer { + Machine: m, + MachineAddress: address, + SSHPort: port, + PubSSH: string(pubkeyBuf), + PrivSSH: padded, + } +} + +func kdumpSSHTest(c cluster.TestCluster) { + ssh_host := setupSSHMachine(c) + butane := conf.Butane(fmt.Sprintf(`variant: fcos +version: 1.5.0 +storage: + files: + - path: /root/.ssh/id_ssh_kdump.pub + mode: 0600 + contents: + inline: | + %s + - path: /root/.ssh/id_ssh_kdump + mode: 0600 + contents: + inline: | +%s + - path: /root/.ssh/config + mode: 0644 + overwrite: true + contents: + inline: | + Host %s + StrictHostKeyChecking no + Port %s + - path: /etc/kdump.conf + overwrite: true + contents: + inline: | + ssh core@%s + sshkey /root/.ssh/id_ssh_kdump + path /home/core/crash + core_collector makedumpfile -F -l --message-level 1 -d 31 +systemd: + units: + - name: kdump.service + enabled: true +kernel_arguments: + should_exist: + - crashkernel=512M`, + ssh_host.PubSSH, ssh_host.PrivSSH, ssh_host.MachineAddress, ssh_host.SSHPort, ssh_host.MachineAddress)) + + opts := platform.MachineOptions{ + MinMemory: 2048, + } + + kdump_machine, err := c.NewMachineWithOptions(butane, opts) + if err != nil { + c.Fatalf("Unable to create test machine: %v", err) + } + + // TODO : maybe add a retry loop + timeout ? + time.Sleep(15 * time.Second) + + // make sure kdump service is active + kdump_status := c.MustSSH(kdump_machine, "systemctl is-active kdump.service") + if string(kdump_status) == "inactive" { + c.Fatalf("Kdump.service is not ready: %s. Err: %v", string(kdump_status), err) + } + + // crash the kernel + c.SSH(kdump_machine, "sudo su -c \"echo 'c' > /proc/sysrq-trigger\"") + + time.Sleep(15 * time.Second) + + // Look for the crash files created on the SSH machine + logs := c.MustSSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*") + + //fmt.Println(string(logs)) + //platform.Manhole(ssh_host.Machine) + + if logs == nil { + c.Fatalf("No vmcore created on remote SSH host") + } +}