core: prevent malformed plans from crashing leader

The Plan.Submit endpoint assumed PlanRequest.Plan was never nil. While there is no evidence it ever has been nil, we should not panic if a nil plan is ever submitted because that would crash the leader.
hashicorp · Jan 31, 2022 · 1e39c32 · 1e39c32
1 parent 6c51333
commit 1e39c32
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 1 deletion.
diff --git a/nomad/plan_endpoint.go b/nomad/plan_endpoint.go
@@ -1,6 +1,7 @@
 package nomad
 
 import (
+	"fmt"
 	"time"
 
 	metrics "github.com/armon/go-metrics"
@@ -22,6 +23,10 @@ func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) er
 	}
 	defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now())
 
+	if args.Plan == nil {
+		return fmt.Errorf("cannot submit nil plan")
+	}
+
 	// Pause the Nack timer for the eval as it is making progress as long as it
 	// is in the plan queue. We resume immediately after we get a result to
 	// handle the case that the receiving worker dies.

diff --git a/nomad/plan_endpoint_test.go b/nomad/plan_endpoint_test.go
@@ -8,6 +8,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/testutil"
+	"github.com/stretchr/testify/require"
 )
 
 func TestPlanEndpoint_Submit(t *testing.T) {
@@ -49,3 +50,80 @@ func TestPlanEndpoint_Submit(t *testing.T) {
 		t.Fatalf("missing result")
 	}
 }
+
+// TestPlanEndpoint_Submit_Bad asserts that the Plan.Submit endpoint rejects
+// bad data with an error instead of panicking.
+func TestPlanEndpoint_Submit_Bad(t *testing.T) {
+	t.Parallel()
+
+	s1, cleanupS1 := TestServer(t, func(c *Config) {
+		c.NumSchedulers = 0
+	})
+	defer cleanupS1()
+	codec := rpcClient(t, s1)
+	testutil.WaitForLeader(t, s1.RPC)
+
+	// Mock a valid eval being dequeued by a worker
+	eval := mock.Eval()
+	s1.evalBroker.Enqueue(eval)
+
+	evalOut, _, err := s1.evalBroker.Dequeue([]string{eval.Type}, time.Second)
+	require.NoError(t, err)
+	require.Equal(t, eval, evalOut)
+
+	cases := []struct {
+		Name string
+		Plan *structs.Plan
+		Err  string
+	}{
+		{
+			Name: "Nil",
+			Plan: nil,
+			Err:  "cannot submit nil plan",
+		},
+		{
+			Name: "Empty",
+			Plan: &structs.Plan{},
+			Err:  "evaluation is not outstanding",
+		},
+		{
+			Name: "BadEvalID",
+			Plan: &structs.Plan{
+				EvalID: "1234", // does not exist
+			},
+			Err: "evaluation is not outstanding",
+		},
+		{
+			Name: "MissingToken",
+			Plan: &structs.Plan{
+				EvalID: eval.ID,
+			},
+			Err: "evaluation token does not match",
+		},
+		{
+			Name: "InvalidToken",
+			Plan: &structs.Plan{
+				EvalID:    eval.ID,
+				EvalToken: "1234", // invalid
+			},
+			Err: "evaluation token does not match",
+		},
+	}
+
+	for i := range cases {
+		tc := cases[i]
+		t.Run(tc.Name, func(t *testing.T) {
+			req := &structs.PlanRequest{
+				Plan:         tc.Plan,
+				WriteRequest: structs.WriteRequest{Region: "global"},
+			}
+			var resp structs.PlanResponse
+			err := msgpackrpc.CallWithCodec(codec, "Plan.Submit", req, &resp)
+			require.EqualError(t, err, tc.Err)
+			require.Nil(t, resp.Result)
+		})
+	}
+
+	// Ensure no plans were enqueued
+	require.Zero(t, s1.planner.planQueue.Stats().Depth)
+}
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
@@ -10803,7 +10803,9 @@ type Plan struct {
 
 func (p *Plan) GoString() string {
 	out := fmt.Sprintf("(eval %s", p.EvalID[:8])
-	out += fmt.Sprintf(", job %s", p.Job.ID)
+	if p.Job != nil {
+		out += fmt.Sprintf(", job %s", p.Job.ID)
+	}
 	if p.Deployment != nil {
 		out += fmt.Sprintf(", deploy %s", p.Deployment.ID[:8])
 	}