Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lxc: cleanup partially configured containers after errors in Start #3773

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 33 additions & 12 deletions client/driver/lxc.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,20 @@ func (d *LxcDriver) Prestart(*ExecContext, *structs.Task) (*PrestartResponse, er

// Start starts the LXC Driver
func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) {
sresp, err, errCleanup := d.StartWithCleanup(ctx, task)
if err != nil {
if cleanupErr := errCleanup(); cleanupErr != nil {
d.logger.Printf("[ERR] error occured:\n%v\nwhile cleaning up from error in Start: %v", cleanupErr, err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the newline and err from this message since err will be logged later anyway.

}
}
return sresp, err
}

func (d *LxcDriver) StartWithCleanup(ctx *ExecContext, task *structs.Task) (*StartResponse, error, func() error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nitpick, but let's lowercase Start so this method isn't exported as it shouldn't be called directly.

noCleanup := func() error { return nil }
var driverConfig LxcDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
return nil, err, noCleanup
}
lxcPath := lxc.DefaultConfigPath()
if path := d.config.Read("driver.lxc.path"); path != "" {
Expand All @@ -222,7 +233,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
containerName := fmt.Sprintf("%s-%s", task.Name, d.DriverContext.allocID)
c, err := lxc.NewContainer(containerName, lxcPath)
if err != nil {
return nil, fmt.Errorf("unable to initialize container: %v", err)
return nil, fmt.Errorf("unable to initialize container: %v", err), noCleanup
}

var verbosity lxc.Verbosity
Expand All @@ -232,7 +243,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
case "", "quiet":
verbosity = lxc.Quiet
default:
return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose")
return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose"), noCleanup
}
c.SetVerbosity(verbosity)

Expand All @@ -249,7 +260,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
case "", "error":
logLevel = lxc.ERROR
default:
return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error")
return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error"), noCleanup
}
c.SetLogLevel(logLevel)

Expand All @@ -267,12 +278,12 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
}

if err := c.Create(options); err != nil {
return nil, fmt.Errorf("unable to create container: %v", err)
return nil, fmt.Errorf("unable to create container: %v", err), noCleanup
}

// Set the network type to none
if err := c.SetConfigItem("lxc.network.type", "none"); err != nil {
return nil, fmt.Errorf("error setting network type configuration: %v", err)
return nil, fmt.Errorf("error setting network type configuration: %v", err), c.Destroy
}

// Bind mount the shared alloc dir and task local dir in the container
Expand All @@ -290,7 +301,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,

if filepath.IsAbs(paths[0]) {
if !volumesEnabled {
return nil, fmt.Errorf("absolute bind-mount volume in config but '%v' is false", lxcVolumesConfigOption)
return nil, fmt.Errorf("absolute bind-mount volume in config but '%v' is false", lxcVolumesConfigOption), c.Destroy
}
} else {
// Relative source paths are treated as relative to alloc dir
Expand All @@ -302,21 +313,31 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,

for _, mnt := range mounts {
if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err)
return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err), c.Destroy
}
}

// Start the container
if err := c.Start(); err != nil {
return nil, fmt.Errorf("unable to start container: %v", err)
return nil, fmt.Errorf("unable to start container: %v", err), c.Destroy
}

stopAndDestroyCleanup := func() error {
if err := c.Stop(); err != nil {
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we attempt to Destroy the container even if Stop fails, or will Destroy never succeed if Stop errors? I'm unfamiliar with lxc's behavior in this circumstance and the Go library's docs aren't very useful.

If it's safe to call Destroy if Stop errors, I'd suggest just ignoring Destroy's return value and returning the original Stop effort (so Destroy is just a best-effort at cleaning up).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the only reasons that Stop might fail would also cause Destroy to fail. I think returning without trying Destroy is the right thing here.

}
if err := c.Destroy(); err != nil {
return err
}
return nil
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This last few lines can be simplified to return c.Destroy()

}

// Set the resource limits
if err := c.SetMemoryLimit(lxc.ByteSize(task.Resources.MemoryMB) * lxc.MB); err != nil {
return nil, fmt.Errorf("unable to set memory limits: %v", err)
return nil, fmt.Errorf("unable to set memory limits: %v", err), stopAndDestroyCleanup
}
if err := c.SetCgroupItem("cpu.shares", strconv.Itoa(task.Resources.CPU)); err != nil {
return nil, fmt.Errorf("unable to set cpu shares: %v", err)
return nil, fmt.Errorf("unable to set cpu shares: %v", err), stopAndDestroyCleanup
}

h := lxcDriverHandle{
Expand All @@ -335,7 +356,7 @@ func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,

go h.run()

return &StartResponse{Handle: &h}, nil
return &StartResponse{Handle: &h}, nil, noCleanup
}

func (d *LxcDriver) Cleanup(*ExecContext, *CreatedResources) error { return nil }
Expand Down
12 changes: 12 additions & 0 deletions client/driver/lxc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,27 @@ func TestLxcDriver_Start_NoVolumes(t *testing.T) {
ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()

// set lxcVolumesConfigOption to false to disallow absolute paths as the source for the bind mount
ctx.DriverCtx.config.Options = map[string]string{lxcVolumesConfigOption: "false"}

d := NewLxcDriver(ctx.DriverCtx)

if _, err := d.Prestart(ctx.ExecCtx, task); err != nil {
t.Fatalf("prestart err: %v", err)
}

// expect the "absolute bind-mount volume in config.. " error
_, err := d.Start(ctx.ExecCtx, task)
if err == nil {
t.Fatalf("expected error in start, got nil.")
}

// Because the container was created but not started before
// the expected error, we can test that the destroy-only
// cleanup is done here.
containerName := fmt.Sprintf("%s-%s", task.Name, ctx.DriverCtx.allocID)
if err := exec.Command("bash", "-c", fmt.Sprintf("lxc-ls -1 | grep -q %s", containerName)).Run(); err == nil {
t.Fatalf("error, container '%s' is still around", containerName)
}

}