Skip to content

Commit

Permalink
Merge pull request #4010 from hashicorp/f-drain-rebased2
Browse files Browse the repository at this point in the history
Drain v2: add controlled draining
  • Loading branch information
schmichael committed Mar 22, 2018
2 parents 48c5f0e + e10883c commit 40db0af
Show file tree
Hide file tree
Showing 75 changed files with 6,318 additions and 702 deletions.
15 changes: 15 additions & 0 deletions api/allocations.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type Allocation struct {
Metrics *AllocationMetric
DesiredStatus string
DesiredDescription string
DesiredTransition DesiredTransition
ClientStatus string
ClientDescription string
TaskStates map[string]*TaskState
Expand Down Expand Up @@ -205,3 +206,17 @@ type RescheduleEvent struct {
// PrevNodeID is the node ID of the previous allocation
PrevNodeID string
}

// DesiredTransition is used to mark an allocation as having a desired state
// transition. This information can be used by the scheduler to make the
// correct decision.
type DesiredTransition struct {
// Migrate is used to indicate that this allocation should be stopped and
// migrated to another node.
Migrate *bool
}

// ShouldMigrate returns whether the transition object dictates a migration.
func (d DesiredTransition) ShouldMigrate() bool {
return d.Migrate != nil && *d.Migrate
}
7 changes: 7 additions & 0 deletions api/allocations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,10 @@ func TestAllocations_RescheduleInfo(t *testing.T) {
}

}

func TestAllocations_ShouldMigrate(t *testing.T) {
t.Parallel()
require.True(t, DesiredTransition{Migrate: helper.BoolToPtr(true)}.ShouldMigrate())
require.False(t, DesiredTransition{}.ShouldMigrate())
require.False(t, DesiredTransition{Migrate: helper.BoolToPtr(false)}.ShouldMigrate())
}
1 change: 1 addition & 0 deletions api/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ type Job struct {
ParameterizedJob *ParameterizedJobConfig
Payload []byte
Reschedule *ReschedulePolicy
Migrate *MigrateStrategy
Meta map[string]string
VaultToken *string `mapstructure:"vault_token"`
Status *string
Expand Down
32 changes: 15 additions & 17 deletions api/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,34 @@ import (
"github.com/hashicorp/nomad/testutil"
"github.com/kr/pretty"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestJobs_Register(t *testing.T) {
t.Parallel()
require := require.New(t)

c, s := makeClient(t, nil, nil)
defer s.Stop()
jobs := c.Jobs()

// Listing jobs before registering returns nothing
resp, qm, err := jobs.List(nil)
if err != nil {
t.Fatalf("err: %s", err)
}
require.Nil(err)
assertQueryMeta(t, qm)
if n := len(resp); n != 0 {
t.Fatalf("expected 0 jobs, got: %d", n)
}
require.Emptyf(resp, "expected 0 jobs, got: %d", len(resp))

// Create a job and attempt to register it
job := testJob()
resp2, wm, err := jobs.Register(job, nil)
if err != nil {
t.Fatalf("err: %s", err)
}
if resp2 == nil || resp2.EvalID == "" {
t.Fatalf("missing eval id")
}
require.Nil(err)
require.NotNil(resp2)
require.NotEmpty(resp2.EvalID)
assertWriteMeta(t, wm)

// Query the jobs back out again
resp, qm, err = jobs.List(nil)
if err != nil {
t.Fatalf("err: %s", err)
}
assertQueryMeta(t, qm)
resp, _, err = jobs.List(nil)
require.Nil(err)

// Check that we got the expected response
if len(resp) != 1 || resp[0].ID != *job.ID {
Expand Down Expand Up @@ -141,6 +134,7 @@ func TestJobs_Canonicalize(t *testing.T) {
MaxDelay: helper.TimeToPtr(1 * time.Hour),
Unlimited: helper.BoolToPtr(true),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
{
KillTimeout: helper.TimeToPtr(5 * time.Second),
Expand Down Expand Up @@ -211,6 +205,7 @@ func TestJobs_Canonicalize(t *testing.T) {
MaxDelay: helper.TimeToPtr(1 * time.Hour),
Unlimited: helper.BoolToPtr(true),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
{
Name: "task1",
Expand Down Expand Up @@ -363,6 +358,7 @@ func TestJobs_Canonicalize(t *testing.T) {
AutoRevert: helper.BoolToPtr(false),
Canary: helper.IntToPtr(0),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
{
Name: "redis",
Expand Down Expand Up @@ -576,6 +572,7 @@ func TestJobs_Canonicalize(t *testing.T) {
AutoRevert: helper.BoolToPtr(true),
Canary: helper.IntToPtr(1),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
{
Name: "task1",
Expand Down Expand Up @@ -616,6 +613,7 @@ func TestJobs_Canonicalize(t *testing.T) {
AutoRevert: helper.BoolToPtr(false),
Canary: helper.IntToPtr(0),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
{
Name: "task1",
Expand Down
142 changes: 107 additions & 35 deletions api/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package api
import (
"fmt"
"sort"
"strconv"
"time"

"github.com/hashicorp/nomad/nomad/structs"
)

// Nodes is used to query node-related API endpoints
Expand Down Expand Up @@ -42,10 +43,57 @@ func (n *Nodes) Info(nodeID string, q *QueryOptions) (*Node, *QueryMeta, error)
return &resp, qm, nil
}

// ToggleDrain is used to toggle drain mode on/off for a given node.
func (n *Nodes) ToggleDrain(nodeID string, drain bool, q *WriteOptions) (*WriteMeta, error) {
drainArg := strconv.FormatBool(drain)
wm, err := n.client.write("/v1/node/"+nodeID+"/drain?enable="+drainArg, nil, nil, q)
// NodeUpdateDrainRequest is used to update the drain specification for a node.
type NodeUpdateDrainRequest struct {
// NodeID is the node to update the drain specification for.
NodeID string

// DrainSpec is the drain specification to set for the node. A nil DrainSpec
// will disable draining.
DrainSpec *DrainSpec

// MarkEligible marks the node as eligible for scheduling if removing
// the drain strategy.
MarkEligible bool
}

// UpdateDrain is used to update the drain strategy for a given node. If
// markEligible is true and the drain is being removed, the node will be marked
// as having its scheduling being elibile
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*WriteMeta, error) {
req := &NodeUpdateDrainRequest{
NodeID: nodeID,
DrainSpec: spec,
MarkEligible: markEligible,
}

wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q)
if err != nil {
return nil, err
}
return wm, nil
}

// NodeUpdateEligibilityRequest is used to update the drain specification for a node.
type NodeUpdateEligibilityRequest struct {
// NodeID is the node to update the drain specification for.
NodeID string
Eligibility string
}

// ToggleEligibility is used to update the scheduling eligibility of the node
func (n *Nodes) ToggleEligibility(nodeID string, eligible bool, q *WriteOptions) (*WriteMeta, error) {
e := structs.NodeSchedulingEligible
if !eligible {
e = structs.NodeSchedulingIneligible
}

req := &NodeUpdateEligibilityRequest{
NodeID: nodeID,
Eligibility: e,
}

wm, err := n.client.write("/v1/node/"+nodeID+"/eligibility", req, nil, q)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -108,25 +156,48 @@ type DriverInfo struct {

// Node is used to deserialize a node entry.
type Node struct {
ID string
Datacenter string
Name string
HTTPAddr string
TLSEnabled bool
Attributes map[string]string
Resources *Resources
Reserved *Resources
Links map[string]string
Meta map[string]string
NodeClass string
Drain bool
Status string
StatusDescription string
StatusUpdatedAt int64
Events []*NodeEvent
Drivers map[string]*DriverInfo
CreateIndex uint64
ModifyIndex uint64
ID string
Datacenter string
Name string
HTTPAddr string
TLSEnabled bool
Attributes map[string]string
Resources *Resources
Reserved *Resources
Links map[string]string
Meta map[string]string
NodeClass string
Drain bool
DrainStrategy *DrainStrategy
SchedulingEligibility string
Status string
StatusDescription string
StatusUpdatedAt int64
Events []*NodeEvent
Drivers map[string]*DriverInfo
CreateIndex uint64
ModifyIndex uint64
}

// DrainStrategy describes a Node's drain behavior.
type DrainStrategy struct {
// DrainSpec is the user declared drain specification
DrainSpec

// ForceDeadline is the deadline time for the drain after which drains will
// be forced
ForceDeadline time.Time
}

// DrainSpec describes a Node's drain behavior.
type DrainSpec struct {
// Deadline is the duration after StartTime when the remaining
// allocations on a draining Node should be told to stop.
Deadline time.Duration

// IgnoreSystemJobs allows systems jobs to remain on the node even though it
// has been marked for draining.
IgnoreSystemJobs bool
}

const (
Expand Down Expand Up @@ -181,17 +252,18 @@ type HostDiskStats struct {
// NodeListStub is a subset of information returned during
// node list operations.
type NodeListStub struct {
Address string
ID string
Datacenter string
Name string
NodeClass string
Version string
Drain bool
Status string
StatusDescription string
CreateIndex uint64
ModifyIndex uint64
Address string
ID string
Datacenter string
Name string
NodeClass string
Version string
Drain bool
SchedulingEligibility string
Status string
StatusDescription string
CreateIndex uint64
ModifyIndex uint64
}

// NodeIndexSort reverse sorts nodes by CreateIndex
Expand Down
Loading

0 comments on commit 40db0af

Please sign in to comment.