Skip to content

Commit

Permalink
Merge pull request #1410 from hashicorp/f-client-lost-state
Browse files Browse the repository at this point in the history
Changing the state of an allocation to lost if the node on which it w…
  • Loading branch information
diptanu authored Jul 12, 2016
2 parents 1f8d70c + 76b913c commit 5798a23
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 1 deletion.
9 changes: 9 additions & 0 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp
return fmt.Errorf("invalid status for node")
}

// Set the timestamp when the node is registered
args.Node.StatusUpdatedAt = time.Now().Unix()

// Compute the node class
if err := args.Node.ComputeClass(); err != nil {
return fmt.Errorf("failed to computed node class: %v", err)
Expand Down Expand Up @@ -218,6 +221,9 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct
return fmt.Errorf("node not found")
}

// Update the timestamp of when the node status was updated
node.StatusUpdatedAt = time.Now().Unix()

// Commit this update via Raft
var index uint64
if node.Status != args.Status {
Expand Down Expand Up @@ -291,6 +297,9 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
return fmt.Errorf("node not found")
}

// Update the timestamp to
node.StatusUpdatedAt = time.Now().Unix()

// Commit this update via Raft
var index uint64
if node.Drain != args.Drain {
Expand Down
19 changes: 19 additions & 0 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,25 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error
return fmt.Errorf("index update failed: %v", err)
}

// Update the state of the allocations which are in running state to lost
if status == structs.NodeStatusDown {
allocs, err := s.AllocsByNode(nodeID)
if err != nil {
return fmt.Errorf("error retrieving any allocations for the node: %v", nodeID)
}
for _, alloc := range allocs {
copyAlloc := new(structs.Allocation)
*copyAlloc = *alloc
if alloc.ClientStatus == structs.AllocClientStatusPending ||
alloc.ClientStatus == structs.AllocClientStatusRunning {
copyAlloc.ClientStatus = structs.AllocClientStatusLost
if err := txn.Insert("allocs", copyAlloc); err != nil {
return fmt.Errorf("alloc insert failed: %v", err)
}
}
}
}

txn.Defer(func() { s.watch.notify(watcher) })
txn.Commit()
return nil
Expand Down
41 changes: 41 additions & 0 deletions nomad/state/state_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,47 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) {
t.Fatalf("bad: %d", index)
}

alloc := mock.Alloc()
alloc1 := mock.Alloc()
alloc2 := mock.Alloc()
alloc.NodeID = node.ID
alloc1.NodeID = node.ID
alloc2.NodeID = node.ID
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc1.ClientStatus = structs.AllocClientStatusFailed
alloc2.ClientStatus = structs.AllocClientStatusPending

if err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc1, alloc2}); err != nil {
t.Fatalf("err: %v", err)
}
if err = state.UpdateNodeStatus(1003, node.ID, structs.NodeStatusDown); err != nil {
t.Fatalf("err: %v", err)
}

allocOut, err := state.AllocByID(alloc.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if allocOut.ClientStatus != structs.AllocClientStatusLost {
t.Fatalf("expected alloc status: %v, actual: %v", structs.AllocClientStatusLost, allocOut.ClientStatus)
}

alloc1Out, err := state.AllocByID(alloc1.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if alloc1Out.ClientStatus != structs.AllocClientStatusFailed {
t.Fatalf("expected alloc status: %v, actual: %v", structs.AllocClientStatusFailed, alloc1Out.ClientStatus)
}

alloc2Out, err := state.AllocByID(alloc2.ID)
if err != nil {
t.Fatalf("err: %v", err)
}
if alloc2Out.ClientStatus != structs.AllocClientStatusLost {
t.Fatalf("expected alloc status: %v, actual: %v", structs.AllocClientStatusLost, alloc2Out.ClientStatus)
}

notify.verify(t)
}

Expand Down
7 changes: 6 additions & 1 deletion nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,10 @@ type Node struct {
// StatusDescription is meant to provide more human useful information
StatusDescription string

// StatusUpdatedAt is the time stamp at which the state of the node was
// updated
StatusUpdatedAt int64

// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
Expand Down Expand Up @@ -2304,6 +2308,7 @@ const (
AllocClientStatusRunning = "running"
AllocClientStatusComplete = "complete"
AllocClientStatusFailed = "failed"
AllocClientStatusLost = "lost"
)

// Allocation is used to allocate the placement of a task group to a node.
Expand Down Expand Up @@ -2410,7 +2415,7 @@ func (a *Allocation) TerminalStatus() bool {
}

switch a.ClientStatus {
case AllocClientStatusComplete, AllocClientStatusFailed:
case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
return true
default:
return false
Expand Down

0 comments on commit 5798a23

Please sign in to comment.