Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

qemu: reduce monitor socket path #13971

Merged
merged 3 commits into from
Aug 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/13971.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
qemu: use shorter socket file names to reduce the chance of hitting the max path length
```
87 changes: 38 additions & 49 deletions drivers/qemu/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"strings"
"time"

"github.com/coreos/go-semver/semver"
hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/drivers/shared/eventer"
Expand All @@ -39,14 +38,13 @@ const (

// Represents an ACPI shutdown request to the VM (emulates pressing a physical power button)
// Reference: https://en.wikibooks.org/wiki/QEMU/Monitor
// Use a short file name since socket paths have a maximum length.
qemuGracefulShutdownMsg = "system_powerdown\n"
qemuMonitorSocketName = "qemu-monitor.sock"
qemuMonitorSocketName = "qm.sock"

// Socket file enabling communication with the Qemu Guest Agent (if enabled and running)
qemuGuestAgentSocketName = "qemu-guest-agent.sock"

// Maximum socket path length prior to qemu 2.10.1
qemuLegacyMaxMonitorPathLen = 108
// Use a short file name since socket paths have a maximum length.
qemuGuestAgentSocketName = "qa.sock"

// taskHandleVersion is the version of task handle which this driver sets
// and understands how to decode driver state
Expand All @@ -70,14 +68,6 @@ var (

versionRegex = regexp.MustCompile(`version (\d[\.\d+]+)`)

// Prior to qemu 2.10.1, monitor socket paths are truncated to 108 bytes.
// We should consider this if driver.qemu.version is < 2.10.1 and the
// generated monitor path is too long.
//
// Relevant fix is here:
// https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6
qemuVersionLongSocketPathFix = semver.New("2.10.1")

// pluginInfo is the response returned for the PluginInfo RPC
pluginInfo = &base.PluginInfoResponse{
Type: base.PluginTypeDriver,
Expand Down Expand Up @@ -307,11 +297,19 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {

// Try to restore monitor socket path.
taskDir := filepath.Join(handle.Config.AllocDir, handle.Config.Name)
monitorPath := filepath.Join(taskDir, qemuMonitorSocketName)
if _, err := os.Stat(monitorPath); err == nil {
d.logger.Debug("found existing monitor socket", "monitor", monitorPath)
} else {
monitorPath = ""
possiblePaths := []string{
filepath.Join(taskDir, qemuMonitorSocketName),
// Support restoring tasks that used the old socket name.
filepath.Join(taskDir, "qemu-monitor.sock"),
}

var monitorPath string
for _, path := range possiblePaths {
if _, err := os.Stat(path); err == nil {
monitorPath = path
d.logger.Debug("found existing monitor socket", "monitor", monitorPath)
break
}
}

h := &taskHandle{
Expand Down Expand Up @@ -468,21 +466,17 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
}
}

taskDir := filepath.Join(cfg.AllocDir, cfg.Name)

var monitorPath string
if driverConfig.GracefulShutdown {
if runtime.GOOS == "windows" {
return nil, nil, errors.New("QEMU graceful shutdown is unsupported on the Windows platform")
}
// This socket will be used to manage the virtual machine (for example,
// to perform graceful shutdowns)
taskDir := filepath.Join(cfg.AllocDir, cfg.Name)
fingerPrint := d.buildFingerprint()
if fingerPrint.Attributes == nil {
return nil, nil, fmt.Errorf("unable to get qemu driver version from fingerprinted attributes")
}
monitorPath, err = d.getMonitorPath(taskDir, fingerPrint)
if err != nil {
d.logger.Debug("could not get qemu monitor path", "error", err)
monitorPath = filepath.Join(taskDir, qemuMonitorSocketName)
if err := validateSocketPath(monitorPath); err != nil {
return nil, nil, err
}
d.logger.Debug("got monitor path", "monitorPath", monitorPath)
Expand All @@ -494,8 +488,12 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
return nil, nil, errors.New("QEMU Guest Agent socket is unsupported on the Windows platform")
}
// This socket will be used to communicate with the Guest Agent (if it's running)
taskDir := filepath.Join(cfg.AllocDir, cfg.Name)
args = append(args, "-chardev", fmt.Sprintf("socket,path=%s/%s,server,nowait,id=qga0", taskDir, qemuGuestAgentSocketName))
agentSocketPath := filepath.Join(taskDir, qemuGuestAgentSocketName)
if err := validateSocketPath(agentSocketPath); err != nil {
return nil, nil, err
}

args = append(args, "-chardev", fmt.Sprintf("socket,path=%s,server,nowait,id=qga0", agentSocketPath))
args = append(args, "-device", "virtio-serial")
args = append(args, "-device", "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0")
}
Expand Down Expand Up @@ -647,6 +645,8 @@ func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) e
if err := sendQemuShutdown(d.logger, handle.monitorPath, handle.pid); err != nil {
d.logger.Debug("error sending graceful shutdown ", "pid", handle.pid, "error", err)
}
} else {
d.logger.Debug("monitor socket is empty, forcing shutdown")
}

// TODO(preetha) we are calling shutdown on the executor here
Expand Down Expand Up @@ -748,27 +748,16 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
}
}

// getMonitorPath is used to determine whether a qemu monitor socket can be
// safely created and accessed in the task directory by the version of qemu
// present on the host. If it is safe to use, the socket's full path is
// returned along with a nil error. Otherwise, an empty string is returned
// along with a descriptive error.
func (d *Driver) getMonitorPath(dir string, fingerPrint *drivers.Fingerprint) (string, error) {
var longPathSupport bool
currentQemuVer := fingerPrint.Attributes[driverVersionAttr]
currentQemuSemver := semver.New(currentQemuVer.GoString())
if currentQemuSemver.LessThan(*qemuVersionLongSocketPathFix) {
longPathSupport = false
d.logger.Debug("long socket paths are not available in this version of QEMU", "version", currentQemuVer)
} else {
longPathSupport = true
d.logger.Debug("long socket paths available in this version of QEMU", "version", currentQemuVer)
}
fullSocketPath := fmt.Sprintf("%s/%s", dir, qemuMonitorSocketName)
if len(fullSocketPath) > qemuLegacyMaxMonitorPathLen && !longPathSupport {
return "", fmt.Errorf("monitor path is too long for this version of qemu")
// validateSocketPath provides best effort validation of socket paths since
// some rules may be platform-dependant.
func validateSocketPath(path string) error {
if maxSocketPathLen > 0 && len(path) > maxSocketPathLen {
return fmt.Errorf(
"socket path %s is longer than the maximum length allowed (%d), try to reduce the task name or Nomad's data_dir if possible.",
path, maxSocketPathLen)
}
return fullSocketPath, nil

return nil
}

// sendQemuShutdown attempts to issue an ACPI power-off command via the qemu
Expand Down
12 changes: 12 additions & 0 deletions drivers/qemu/driver_bsd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//go:build darwin || freebsd || netbsd || openbsd
// +build darwin freebsd netbsd openbsd

package qemu

const (
// https://man.openbsd.org/unix.4#ADDRESSING
// https://www.freebsd.org/cgi/man.cgi?query=unix
// https://github.com/apple/darwin-xnu/blob/main/bsd/man/man4/unix.4#L72
// https://man.netbsd.org/unix.4#ADDRESSING
maxSocketPathLen = 104
)
9 changes: 9 additions & 0 deletions drivers/qemu/driver_fallback.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//go:build !linux && !darwin && !freebsd && !netbsd && !openbsd
// +build !linux,!darwin,!freebsd,!netbsd,!openbsd

package qemu

const (
// Don't enforce any path limit.
maxSocketPathLen = 0
)
9 changes: 9 additions & 0 deletions drivers/qemu/driver_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//go:build linux
// +build linux

package qemu

const (
// https://man7.org/linux/man-pages/man7/unix.7.html
maxSocketPathLen = 108
)
117 changes: 1 addition & 116 deletions drivers/qemu/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"io"
"os"
"path/filepath"
"strings"
"testing"
"time"

Expand All @@ -17,7 +16,6 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils"
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -88,119 +86,6 @@ func TestQemuDriver_Start_Wait_Stop(t *testing.T) {

}

// Verifies monitor socket path for old qemu
func TestQemuDriver_GetMonitorPathOldQemu(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)

require := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

d := NewQemuDriver(ctx, testlog.HCLogger(t))
harness := dtestutil.NewDriverHarness(t, d)

task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "linux",
Resources: &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 100,
},
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
},
}

cleanup := harness.MkAllocDir(task, true)
defer cleanup()

fingerPrint := &drivers.Fingerprint{
Attributes: map[string]*pstructs.Attribute{
driverVersionAttr: pstructs.NewStringAttribute("2.0.0"),
},
}
shortPath := strings.Repeat("x", 10)
qemuDriver := d.(*Driver)
_, err := qemuDriver.getMonitorPath(shortPath, fingerPrint)
require.Nil(err)

longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = qemuDriver.getMonitorPath(longPath, fingerPrint)
require.NotNil(err)

// Max length includes the '/' separator and socket name
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = qemuDriver.getMonitorPath(maxLengthLegacyPath, fingerPrint)
require.Nil(err)
}

// Verifies monitor socket path for new qemu version
func TestQemuDriver_GetMonitorPathNewQemu(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)

require := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

d := NewQemuDriver(ctx, testlog.HCLogger(t))
harness := dtestutil.NewDriverHarness(t, d)

task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "linux",
Resources: &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 100,
},
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
},
}

cleanup := harness.MkAllocDir(task, true)
defer cleanup()

fingerPrint := &drivers.Fingerprint{
Attributes: map[string]*pstructs.Attribute{
driverVersionAttr: pstructs.NewStringAttribute("2.99.99"),
},
}
shortPath := strings.Repeat("x", 10)
qemuDriver := d.(*Driver)
_, err := qemuDriver.getMonitorPath(shortPath, fingerPrint)
require.Nil(err)

// Should not return an error in this qemu version
longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = qemuDriver.getMonitorPath(longPath, fingerPrint)
require.Nil(err)

// Max length includes the '/' separator and socket name
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = qemuDriver.getMonitorPath(maxLengthLegacyPath, fingerPrint)
require.Nil(err)
}

// copyFile moves an existing file to the destination
func copyFile(src, dst string, t *testing.T) {
in, err := os.Open(src)
Expand Down Expand Up @@ -464,7 +349,7 @@ func TestIsAllowedImagePath(t *testing.T) {

func TestArgsAllowList(t *testing.T) {
ci.Parallel(t)

pluginConfigAllowList := []string{"-drive", "-net", "-snapshot"}

validArgs := [][]string{
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ require (
github.com/containernetworking/cni v1.0.1
github.com/containernetworking/plugins v1.0.1
github.com/coreos/go-iptables v0.6.0
github.com/coreos/go-semver v0.3.0
github.com/creack/pty v1.1.18
github.com/docker/cli v20.10.3-0.20220113150236-6e2838e18645+incompatible
github.com/docker/distribution v2.8.1+incompatible
Expand Down
1 change: 0 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,6 @@ github.com/coreos/go-iptables v0.6.0 h1:is9qnZMPYjLd8LYqmm/qlE+wwEgJIkTYdhV3rfZo
github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q=
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
Expand Down
26 changes: 10 additions & 16 deletions website/content/docs/drivers/qemu.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,16 @@ The `qemu` driver supports the following configuration in the job spec:
signal to virtual machines rather than simply terminating them. This emulates
a physical power button press, and gives instances a chance to shut down
cleanly. If the VM is still running after `kill_timeout`, it will be
forcefully terminated. (Note that
[prior to qemu 2.10.1](https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6),
the monitor socket path is limited to 108 characters. Graceful shutdown will
be disabled if QEMU is < 2.10.1 and the generated monitor path exceeds this
length. You may encounter this issue if you set long
[data_dir](/docs/configuration#data_dir)
or
[alloc_dir](/docs/configuration/client#alloc_dir)
paths.) This feature is currently not supported on Windows.

- `guest_agent` `(bool: false)` - Enable support for the [QEMU Guest
Agent](https://wiki.qemu.org/Features/GuestAgent) for this virtual machine. This
will add the necessary virtual hardware and create a `qemu-guest-agent.sock`
file in the task's working directory for interacting with the agent. The QEMU Guest
Agent must be running in the guest VM. This feature is currently not supported
on Windows.
forcefully terminated. This feature uses a Unix socket that is placed within
the task directory and operating systems may impose a limit on how long these
paths can be. This feature is currently not supported on Windows.

- `guest_agent` `(bool: false)` - Enable support for the [QEMU Guest
Agent](https://wiki.qemu.org/Features/GuestAgent) for this virtual machine.
This will add the necessary virtual hardware and create a `qa.sock` file in
the task's working directory for interacting with the agent. The QEMU Guest
Agent must be running in the guest VM. This feature is currently not
supported on Windows.

- `port_map` - (Optional) A key-value map of port labels.

Expand Down