-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move chroot building into TaskRunner #2132
Changes from all commits
de7351b
e7f6986
d87dcce
3011df1
957adf2
e25274b
86db194
12915c3
6235122
59c501f
9bfa8dc
ed8b3cd
a087a8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,6 @@ import ( | |
"github.com/hashicorp/go-multierror" | ||
"github.com/hashicorp/nomad/client/allocdir" | ||
"github.com/hashicorp/nomad/client/config" | ||
"github.com/hashicorp/nomad/client/driver" | ||
"github.com/hashicorp/nomad/client/vaultclient" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
|
||
|
@@ -48,8 +47,9 @@ type AllocRunner struct { | |
|
||
dirtyCh chan struct{} | ||
|
||
ctx *driver.ExecContext | ||
ctxLock sync.Mutex | ||
allocDir *allocdir.AllocDir | ||
allocDirLock sync.Mutex | ||
|
||
tasks map[string]*TaskRunner | ||
taskStates map[string]*structs.TaskState | ||
restored map[string]struct{} | ||
|
@@ -76,9 +76,22 @@ type AllocRunner struct { | |
type allocRunnerState struct { | ||
Version string | ||
Alloc *structs.Allocation | ||
AllocDir *allocdir.AllocDir | ||
AllocClientStatus string | ||
AllocClientDescription string | ||
Context *driver.ExecContext | ||
|
||
// COMPAT: Remove in 0.7.0: removing will break upgrading directly from | ||
// 0.5.2, so don't remove in the 0.6 series. | ||
// Context is deprecated and only used to migrate from older releases. | ||
// It will be removed in the future. | ||
Context *struct { | ||
AllocID string // unused; included for completeness | ||
AllocDir struct { | ||
AllocDir string | ||
SharedDir string // unused; included for completeness | ||
TaskDirs map[string]string | ||
} | ||
} `json:"Context,omitempty"` | ||
} | ||
|
||
// NewAllocRunner is used to create a new allocation context | ||
|
@@ -117,18 +130,28 @@ func (r *AllocRunner) RestoreState() error { | |
return err | ||
} | ||
|
||
// #2132 Upgrade path: if snap.AllocDir is nil, try to convert old | ||
// Context struct to new AllocDir struct | ||
if snap.AllocDir == nil && snap.Context != nil { | ||
r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.alloc.ID) | ||
snap.AllocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir) | ||
for taskName := range snap.Context.AllocDir.TaskDirs { | ||
snap.AllocDir.NewTaskDir(taskName) | ||
} | ||
} | ||
|
||
// Restore fields | ||
r.alloc = snap.Alloc | ||
r.ctx = snap.Context | ||
r.allocDir = snap.AllocDir | ||
r.allocClientStatus = snap.AllocClientStatus | ||
r.allocClientDescription = snap.AllocClientDescription | ||
|
||
var snapshotErrors multierror.Error | ||
if r.alloc == nil { | ||
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation")) | ||
} | ||
if r.ctx == nil { | ||
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil context")) | ||
if r.allocDir == nil { | ||
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir")) | ||
} | ||
if e := snapshotErrors.ErrorOrNil(); e != nil { | ||
return e | ||
|
@@ -142,9 +165,16 @@ func (r *AllocRunner) RestoreState() error { | |
// Mark the task as restored. | ||
r.restored[name] = struct{}{} | ||
|
||
td, ok := r.allocDir.TaskDirs[name] | ||
if !ok { | ||
err := fmt.Errorf("failed to find task dir metadata for alloc %q task %q", | ||
r.alloc.ID, name) | ||
r.logger.Printf("[ERR] client: %v", err) | ||
return err | ||
} | ||
|
||
task := &structs.Task{Name: name} | ||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(), | ||
task, r.vaultClient) | ||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, td, r.Alloc(), task, r.vaultClient) | ||
r.tasks[name] = tr | ||
|
||
// Skip tasks in terminal states. | ||
|
@@ -166,10 +196,7 @@ func (r *AllocRunner) RestoreState() error { | |
|
||
// GetAllocDir returns the alloc dir for the alloc runner | ||
func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir { | ||
if r.ctx == nil { | ||
return nil | ||
} | ||
return r.ctx.AllocDir | ||
return r.allocDir | ||
} | ||
|
||
// SaveState is used to snapshot the state of the alloc runner | ||
|
@@ -204,14 +231,14 @@ func (r *AllocRunner) saveAllocRunnerState() error { | |
allocClientDescription := r.allocClientDescription | ||
r.allocLock.Unlock() | ||
|
||
r.ctxLock.Lock() | ||
ctx := r.ctx | ||
r.ctxLock.Unlock() | ||
r.allocDirLock.Lock() | ||
allocDir := r.allocDir | ||
r.allocDirLock.Unlock() | ||
|
||
snap := allocRunnerState{ | ||
Version: r.config.Version, | ||
Alloc: alloc, | ||
Context: ctx, | ||
AllocDir: allocDir, | ||
AllocClientStatus: allocClientStatus, | ||
AllocClientDescription: allocClientDescription, | ||
} | ||
|
@@ -233,7 +260,7 @@ func (r *AllocRunner) DestroyState() error { | |
|
||
// DestroyContext is used to destroy the context | ||
func (r *AllocRunner) DestroyContext() error { | ||
return r.ctx.AllocDir.Destroy() | ||
return r.allocDir.Destroy() | ||
} | ||
|
||
// copyTaskStates returns a copy of the passed task states. | ||
|
@@ -409,26 +436,27 @@ func (r *AllocRunner) Run() { | |
} | ||
|
||
// Create the execution context | ||
r.ctxLock.Lock() | ||
if r.ctx == nil { | ||
allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID)) | ||
if err := allocDir.Build(tg.Tasks); err != nil { | ||
r.allocDirLock.Lock() | ||
if r.allocDir == nil { | ||
// Build allocation directory | ||
r.allocDir = allocdir.NewAllocDir(r.logger, filepath.Join(r.config.AllocDir, r.alloc.ID)) | ||
if err := r.allocDir.Build(); err != nil { | ||
r.logger.Printf("[WARN] client: failed to build task directories: %v", err) | ||
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) | ||
r.ctxLock.Unlock() | ||
r.allocDirLock.Unlock() | ||
return | ||
} | ||
r.ctx = driver.NewExecContext(allocDir, r.alloc.ID) | ||
|
||
if r.otherAllocDir != nil { | ||
if err := allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil { | ||
if err := r.allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil { | ||
r.logger.Printf("[ERROR] client: failed to move alloc dir into alloc %q: %v", r.alloc.ID, err) | ||
} | ||
if err := r.otherAllocDir.Destroy(); err != nil { | ||
r.logger.Printf("[ERROR] client: error destroying allocdir %v", r.otherAllocDir.AllocDir, err) | ||
} | ||
} | ||
} | ||
r.ctxLock.Unlock() | ||
r.allocDirLock.Unlock() | ||
|
||
// Check if the allocation is in a terminal status. In this case, we don't | ||
// start any of the task runners and directly wait for the destroy signal to | ||
|
@@ -448,7 +476,11 @@ func (r *AllocRunner) Run() { | |
continue | ||
} | ||
|
||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(), task.Copy(), r.vaultClient) | ||
r.allocDirLock.Lock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be cleaner if AllocDir maintains it's own locks internally so that the consumers of the API doesn't have to ensure they are locked appropriately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sadly we'd still need this lock because it guards the setting and checking the allocDir field on the AllocRunner. We might be able to skip acquiring it on this particular line if AllocDir had an internal lock, but I'd rather always guard accesses to a field than try to figure out safe places to skip synchronization and confuse future readers of the code. |
||
taskdir := r.allocDir.NewTaskDir(task.Name) | ||
r.allocDirLock.Unlock() | ||
|
||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient) | ||
r.tasks[task.Name] = tr | ||
tr.MarkReceived() | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We use the following format for these:
// COMPAT: Remove in 0.X.0: <reason for compatibility code and when it can be removed