hashicorp · preetapan · May 21, 2018 · May 8, 2018 · May 9, 2018 · May 9, 2018
diff --git a/api/jobs.go b/api/jobs.go
@@ -224,9 +224,14 @@ func (j *Jobs) Deregister(jobID string, purge bool, q *WriteOptions) (string, *W
 }
 
 // ForceEvaluate is used to force-evaluate an existing job.
-func (j *Jobs) ForceEvaluate(jobID string, q *WriteOptions) (string, *WriteMeta, error) {
+func (j *Jobs) ForceEvaluate(jobID string, opts EvalOptions, q *WriteOptions) (string, *WriteMeta, error) {
+	req := &JobEvaluateRequest{
+		JobID:       jobID,
+		EvalOptions: opts,
+	}
+
 	var resp JobRegisterResponse
-	wm, err := j.client.write("/v1/job/"+jobID+"/evaluate", nil, &resp, q)
+	wm, err := j.client.write("/v1/job/"+jobID+"/evaluate", req, &resp, q)
 	if err != nil {
 		return "", nil, err
 	}
@@ -1032,3 +1037,15 @@ type JobStabilityResponse struct {
 	JobModifyIndex uint64
 	WriteMeta
 }
+
+// JobEvaluateRequest is used when we just need to re-evaluate a target job
+type JobEvaluateRequest struct {
+	JobID       string
+	EvalOptions EvalOptions
+	WriteRequest
+}
+
+// EvalOptions is used to encapsulate options when forcing a job evaluation
+type EvalOptions struct {
+	ForceReschedule bool
+}
diff --git a/api/jobs_test.go b/api/jobs_test.go
@@ -1049,7 +1049,7 @@ func TestJobs_ForceEvaluate(t *testing.T) {
 	jobs := c.Jobs()
 
 	// Force-eval on a non-existent job fails
-	_, _, err := jobs.ForceEvaluate("job1", nil)
+	_, _, err := jobs.ForceEvaluate("job1", EvalOptions{}, nil)
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Fatalf("expected not found error, got: %#v", err)
 	}
@@ -1062,7 +1062,7 @@ func TestJobs_ForceEvaluate(t *testing.T) {
 	assertWriteMeta(t, wm)
 
 	// Try force-eval again
-	evalID, wm, err := jobs.ForceEvaluate("job1", nil)
+	evalID, wm, err := jobs.ForceEvaluate("job1", EvalOptions{}, nil)
 	if err != nil {
 		t.Fatalf("err: %s", err)
 	}

diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go
@@ -90,8 +90,25 @@ func (s *HTTPServer) jobForceEvaluate(resp http.ResponseWriter, req *http.Reques
 	if req.Method != "PUT" && req.Method != "POST" {
 		return nil, CodedError(405, ErrInvalidMethod)
 	}
-	args := structs.JobEvaluateRequest{
-		JobID: jobName,
+	var args structs.JobEvaluateRequest
+
+	// TODO(preetha) remove in 0.9
+	// For backwards compatibility allow using this endpoint without a payload
+	if req.ContentLength == 0 {
+		args = structs.JobEvaluateRequest{
+			JobID: jobName,
+		}
+	} else {
+		if err := decodeBody(req, &args); err != nil {
+			return nil, CodedError(400, err.Error())
+		}
+		if args.JobID == "" {
+			return nil, CodedError(400, "Job ID must be specified")
+		}
+
+		if jobName != "" && args.JobID != jobName {
+			return nil, CodedError(400, "JobID not same as job name")
+		}
 	}
 	s.parseWriteRequest(req, &args.WriteRequest)
 

diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go
@@ -609,6 +609,57 @@ func TestHTTP_JobForceEvaluate(t *testing.T) {
 	})
 }
 
+func TestHTTP_JobEvaluate_ForceReschedule(t *testing.T) {
+	t.Parallel()
+	httpTest(t, nil, func(s *TestAgent) {
+		// Create the job
+		job := mock.Job()
+		args := structs.JobRegisterRequest{
+			Job: job,
+			WriteRequest: structs.WriteRequest{
+				Region:    "global",
+				Namespace: structs.DefaultNamespace,
+			},
+		}
+		var resp structs.JobRegisterResponse
+		if err := s.Agent.RPC("Job.Register", &args, &resp); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		jobEvalReq := api.JobEvaluateRequest{
+			JobID: job.ID,
+			EvalOptions: api.EvalOptions{
+				ForceReschedule: true,
+			},
+		}
+
+		buf := encodeReq(jobEvalReq)
+
+		// Make the HTTP request
+		req, err := http.NewRequest("POST", "/v1/job/"+job.ID+"/evaluate", buf)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		respW := httptest.NewRecorder()
+
+		// Make the request
+		obj, err := s.Server.JobSpecificRequest(respW, req)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+
+		// Check the response
+		reg := obj.(structs.JobRegisterResponse)
+		if reg.EvalID == "" {
+			t.Fatalf("bad: %v", reg)
+		}
+
+		// Check for the index
+		if respW.HeaderMap.Get("X-Nomad-Index") == "" {
+			t.Fatalf("missing index")
+		}
+	})
+}
+
 func TestHTTP_JobEvaluations(t *testing.T) {
 	t.Parallel()
 	httpTest(t, nil, func(s *TestAgent) {

diff --git a/command/commands.go b/command/commands.go
@@ -270,6 +270,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
 				Meta: meta,
 			}, nil
 		},
+		"job eval": func() (cli.Command, error) {
+			return &JobEvalCommand{
+				Meta: meta,
+			}, nil
+		},
 		"job history": func() (cli.Command, error) {
 			return &JobHistoryCommand{
 				Meta: meta,

diff --git a/command/job_eval.go b/command/job_eval.go
@@ -0,0 +1,106 @@
+package command
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/hashicorp/nomad/api"
+	"github.com/hashicorp/nomad/api/contexts"
+	"github.com/posener/complete"
+)
+
+type JobEvalCommand struct {
+	Meta
+	forceRescheduling bool
+}
+
+func (c *JobEvalCommand) Help() string {
+	helpText := `
+Usage: nomad job eval [options] <job_id>
+
+  Force an evaluation of the provided job id
+
+General Options:
+
+  ` + generalOptionsUsage() + `
+
+Eval Options:
+
+  -force-reschedule
+    Force reschedule any failed allocations even if they are not currently
+    eligible for rescheduling
+`
+	return strings.TrimSpace(helpText)
+}
+
+func (c *JobEvalCommand) Synopsis() string {
+	return "Force evaluating a job using its job id"
+}
+
+func (c *JobEvalCommand) AutocompleteFlags() complete.Flags {
+	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
+		complete.Flags{
+			"-force-reschedule": complete.PredictNothing,
+		})
+}
+
+func (c *JobEvalCommand) AutocompleteArgs() complete.Predictor {
+	return complete.PredictFunc(func(a complete.Args) []string {
+		client, err := c.Meta.Client()
+		if err != nil {
+			return nil
+		}
+
+		resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil)
+		if err != nil {
+			return []string{}
+		}
+		return resp.Matches[contexts.Jobs]
+	})
+}
+
+func (c *JobEvalCommand) Name() string { return "eval" }
+
+func (c *JobEvalCommand) Run(args []string) int {
+	flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
+	flags.Usage = func() { c.Ui.Output(c.Help()) }
+	flags.BoolVar(&c.forceRescheduling, "force-reschedule", false, "")
+
+	if err := flags.Parse(args); err != nil {
+		return 1
+	}
+
+	// Check that we either got no jobs or exactly one.
+	args = flags.Args()
+	if len(args) > 1 {
+		c.Ui.Error("This command takes either no arguments or one: <job>")
+		c.Ui.Error(commandErrorText(c))
+		return 1
+	}
+
+	// Get the HTTP client
+	client, err := c.Meta.Client()
+	if err != nil {
+		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
+		return 1
+	}
+
+	if len(args) == 0 {
+		c.Ui.Error("Must provide a job ID")
+		return 1
+	}
+
+	// Call eval end point
+	jobID := args[0]
+
+	opts := api.EvalOptions{
+		ForceReschedule: c.forceRescheduling,
+	}
+	evalId, _, err := client.Jobs().ForceEvaluate(jobID, opts, nil)
+	if err != nil {
+		c.Ui.Error(fmt.Sprintf("Error evaluating job: %s", err))
+		return 1
+	}
+	c.Ui.Output(fmt.Sprintf("Created eval ID: %q ", evalId))
+	return 0
+}
diff --git a/command/job_eval_test.go b/command/job_eval_test.go
@@ -0,0 +1,65 @@
+package command
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/mitchellh/cli"
+	"github.com/posener/complete"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestJobEvalCommand_Implements(t *testing.T) {
+	t.Parallel()
+	var _ cli.Command = &JobEvalCommand{}
+}
+
+func TestJobEvalCommand_Fails(t *testing.T) {
+	t.Parallel()
+	ui := new(cli.MockUi)
+	cmd := &JobEvalCommand{Meta: Meta{Ui: ui}}
+
+	// Fails on misuse
+	if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 {
+		t.Fatalf("expected exit code 1, got: %d", code)
+	}
+	if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) {
+		t.Fatalf("expected help output, got: %s", out)
+	}
+	ui.ErrorWriter.Reset()
+
+	// Fails when job ID is not specified
+	if code := cmd.Run([]string{}); code != 1 {
+		t.Fatalf("expect exit 1, got: %d", code)
+	}
+	if out := ui.ErrorWriter.String(); !strings.Contains(out, "Must provide a job ID") {
+		t.Fatalf("unexpected error: %v", out)
+	}
+	ui.ErrorWriter.Reset()
+
+}
+
+func TestJobEvalCommand_AutocompleteArgs(t *testing.T) {
+	assert := assert.New(t)
+	t.Parallel()
+
+	srv, _, url := testServer(t, true, nil)
+	defer srv.Shutdown()
+
+	ui := new(cli.MockUi)
+	cmd := &JobEvalCommand{Meta: Meta{Ui: ui, flagAddress: url}}
+
+	// Create a fake job
+	state := srv.Agent.Server().State()
+	j := mock.Job()
+	assert.Nil(state.UpsertJob(1000, j))
+
+	prefix := j.ID[:len(j.ID)-5]
+	args := complete.Args{Last: prefix}
+	predictor := cmd.AutocompleteArgs()
+
+	res := predictor.Predict(args)
+	assert.Equal(1, len(res))
+	assert.Equal(j.ID, res[0])
+}
diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go
@@ -39,6 +39,13 @@ var (
 		RTarget: ">= 0.6.1",
 		Operand: structs.ConstraintVersion,
 	}
+
+	// allowRescheduleTransition is the transition that allows failed
+	// allocations to be force rescheduled. We create a one off
+	// variable to avoid creating a new object for every request.
+	allowForceRescheduleTransition = &structs.DesiredTransition{
+		ForceReschedule: helper.BoolToPtr(true),
+	}
 )
 
 // Job endpoint is used for job interactions
@@ -538,6 +545,28 @@ func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegis
 		return fmt.Errorf("can't evaluate parameterized job")
 	}
 
+	forceRescheduleAllocs := make(map[string]*structs.DesiredTransition)
+
+	if args.EvalOptions.ForceReschedule {
+		// Find any failed allocs that could be force rescheduled
+		allocs, err := snap.AllocsByJob(ws, args.RequestNamespace(), args.JobID, false)
+		if err != nil {
+			return err
+		}
+
+		for _, alloc := range allocs {
+			taskGroup := job.LookupTaskGroup(alloc.TaskGroup)
+			// Forcing rescheduling is only allowed if task group has rescheduling enabled
+			if taskGroup == nil || taskGroup.ReschedulePolicy == nil || !taskGroup.ReschedulePolicy.Enabled() {
+				continue
+			}
+
+			if alloc.NextAllocation == "" && alloc.ClientStatus == structs.AllocClientStatusFailed {
+				forceRescheduleAllocs[alloc.ID] = allowForceRescheduleTransition
+			}
+		}
+	}
+
 	// Create a new evaluation
 	eval := &structs.Evaluation{
 		ID:             uuid.Generate(),
@@ -549,13 +578,14 @@ func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegis
 		JobModifyIndex: job.ModifyIndex,
 		Status:         structs.EvalStatusPending,
 	}
-	update := &structs.EvalUpdateRequest{
-		Evals:        []*structs.Evaluation{eval},
-		WriteRequest: structs.WriteRequest{Region: args.Region},
+
+	// Create a AllocUpdateDesiredTransitionRequest request with the eval and any forced rescheduled allocs
+	updateTransitionReq := &structs.AllocUpdateDesiredTransitionRequest{
+		Allocs: forceRescheduleAllocs,
+		Evals:  []*structs.Evaluation{eval},
 	}
+	_, evalIndex, err := j.srv.raftApply(structs.AllocUpdateDesiredTransitionRequestType, updateTransitionReq)
 
-	// Commit this evaluation via Raft
-	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
 	if err != nil {
 		j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err)
 		return err