Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: add related evals to eval details #12305

Merged
merged 3 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/12305.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
api: Add `related` query parameter to the Evaluation details endpoint
```
24 changes: 24 additions & 0 deletions api/evaluations.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Evaluation struct {
NextEval string
PreviousEval string
BlockedEval string
RelatedEvals []*EvaluationStub
FailedTGAllocs map[string]*AllocationMetric
ClassEligibility map[string]bool
EscapedComputedClass bool
Expand All @@ -84,6 +85,29 @@ type Evaluation struct {
ModifyTime int64
}

// EvaluationStub is used to serialize parts of an evaluation returned in the
// RelatedEvals field of an Evaluation.
type EvaluationStub struct {
ID string
Priority int
Type string
TriggeredBy string
Namespace string
JobID string
NodeID string
DeploymentID string
Status string
StatusDescription string
WaitUntil time.Time
NextEval string
PreviousEval string
BlockedEval string
CreateIndex uint64
ModifyIndex uint64
CreateTime int64
ModifyTime int64
}

// EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
// We reverse the test so that we get the highest index first.
type EvalIndexSort []*Evaluation
Expand Down
19 changes: 19 additions & 0 deletions api/evaluations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,25 @@ func TestEvaluations_Info(t *testing.T) {
// Check that we got the right result
require.NotNil(t, result)
require.Equal(t, resp.EvalID, result.ID)

// Register the job again to get a related eval
resp, wm, err = jobs.Register(job, nil)
evals, _, err := e.List(nil)
require.NoError(t, err)

// Find an eval that should have related evals
for _, eval := range evals {
if eval.NextEval != "" || eval.PreviousEval != "" || eval.BlockedEval != "" {
result, qm, err := e.Info(eval.ID, &QueryOptions{
Params: map[string]string{
"related": "true",
},
})
require.NoError(t, err)
assertQueryMeta(t, qm)
require.NotNil(t, result.RelatedEvals)
}
}
}

func TestEvaluations_Allocations(t *testing.T) {
Expand Down
3 changes: 3 additions & 0 deletions command/agent/eval_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func (s *HTTPServer) evalQuery(resp http.ResponseWriter, req *http.Request, eval
return nil, nil
}

query := req.URL.Query()
args.IncludeRelated = query.Get("related") == "true"

var out structs.SingleEvalResponse
if err := s.agent.RPC("Eval.GetEval", &args, &out); err != nil {
return nil, err
Expand Down
41 changes: 41 additions & 0 deletions command/agent/eval_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,44 @@ func TestHTTP_EvalQuery(t *testing.T) {
}
})
}

func TestHTTP_EvalQueryWithRelated(t *testing.T) {
t.Parallel()
httpTest(t, nil, func(s *TestAgent) {
// Directly manipulate the state
state := s.Agent.server.State()
eval1 := mock.Eval()
eval2 := mock.Eval()

// Link related evals
eval1.NextEval = eval2.ID
eval2.PreviousEval = eval1.ID

err := state.UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1, eval2})
require.NoError(t, err)

// Make the HTTP request
req, err := http.NewRequest("GET", fmt.Sprintf("/v1/evaluation/%s?related=true", eval1.ID), nil)
require.NoError(t, err)
respW := httptest.NewRecorder()

// Make the request
obj, err := s.Server.EvalSpecificRequest(respW, req)
require.NoError(t, err)

// Check for the index
require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-Index"))
require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-KnownLeader"))
require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-LastContact"))

// Check the eval
e := obj.(*structs.Evaluation)
require.Equal(t, eval1.ID, e.ID)

// Check for the related evals
expected := []*structs.EvaluationStub{
eval2.Stub(),
}
require.Equal(t, expected, e.RelatedEvals)
})
}
34 changes: 25 additions & 9 deletions nomad/eval_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,39 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
queryOpts: &args.QueryOptions,
queryMeta: &reply.QueryMeta,
run: func(ws memdb.WatchSet, state *state.StateStore) error {
// Look for the job
out, err := state.EvalByID(ws, args.EvalID)
var related []*structs.EvaluationStub

// Look for the eval
eval, err := state.EvalByID(ws, args.EvalID)
if err != nil {
return err
return fmt.Errorf("failed to lookup eval: %v", err)
}

// Setup the output
reply.Eval = out
if out != nil {
if eval != nil {
// Re-check namespace in case it differs from request.
if !allowNsOp(aclObj, out.Namespace) {
if !allowNsOp(aclObj, eval.Namespace) {
return structs.ErrPermissionDenied
}

reply.Index = out.ModifyIndex
// Lookup related evals if requested.
if args.IncludeRelated {
related, err = state.EvalsRelatedToID(ws, eval.ID)
if err != nil {
return fmt.Errorf("failed to lookup related evals: %v", err)
}

// Use a copy to avoid modifying the original eval.
eval = eval.Copy()
eval.RelatedEvals = related
}
}

// Setup the output.
reply.Eval = eval
if eval != nil {
reply.Index = eval.ModifyIndex
} else {
// Use the last index that affected the nodes table
// Use the last index that affected the evals table
index, err := state.Index("evals")
if err != nil {
return err
Expand Down
78 changes: 51 additions & 27 deletions nomad/eval_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,60 @@ func TestEvalEndpoint_GetEval(t *testing.T) {

// Create the register request
eval1 := mock.Eval()
s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1})
eval2 := mock.Eval()

// Lookup the eval
get := &structs.EvalSpecificRequest{
EvalID: eval1.ID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
var resp structs.SingleEvalResponse
if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if resp.Index != 1000 {
t.Fatalf("Bad index: %d %d", resp.Index, 1000)
}
// Link the evals
eval1.NextEval = eval2.ID
eval2.PreviousEval = eval1.ID

if !reflect.DeepEqual(eval1, resp.Eval) {
t.Fatalf("bad: %#v %#v", eval1, resp.Eval)
}
err := s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1, eval2})
require.NoError(t, err)

// Lookup non-existing node
get.EvalID = uuid.Generate()
if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil {
t.Fatalf("err: %v", err)
}
if resp.Index != 1000 {
t.Fatalf("Bad index: %d %d", resp.Index, 1000)
}
if resp.Eval != nil {
t.Fatalf("unexpected eval")
}
t.Run("lookup eval", func(t *testing.T) {
get := &structs.EvalSpecificRequest{
EvalID: eval1.ID,
QueryOptions: structs.QueryOptions{Region: "global"},
}
var resp structs.SingleEvalResponse
err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp)
require.NoError(t, err)
require.EqualValues(t, 1000, resp.Index, "bad index")
require.Equal(t, eval1, resp.Eval)
})

t.Run("lookup non-existing eval", func(t *testing.T) {
get := &structs.EvalSpecificRequest{
EvalID: uuid.Generate(),
QueryOptions: structs.QueryOptions{Region: "global"},
}
var resp structs.SingleEvalResponse
err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp)
require.NoError(t, err)
require.EqualValues(t, 1000, resp.Index, "bad index")
require.Nil(t, resp.Eval, "unexpected eval")
})

t.Run("lookup related evals", func(t *testing.T) {
get := &structs.EvalSpecificRequest{
EvalID: eval1.ID,
QueryOptions: structs.QueryOptions{Region: "global"},
IncludeRelated: true,
}
var resp structs.SingleEvalResponse
err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp)
require.NoError(t, err)
require.EqualValues(t, 1000, resp.Index, "bad index")
require.Equal(t, eval1.ID, resp.Eval.ID)

// Make sure we didn't modify the eval on a read request.
require.Nil(t, eval1.RelatedEvals)

// Check for the related evals
expected := []*structs.EvaluationStub{
eval2.Stub(),
}
require.Equal(t, expected, resp.Eval.RelatedEvals)
})
}

func TestEvalEndpoint_GetEval_ACL(t *testing.T) {
Expand Down
49 changes: 49 additions & 0 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -3177,6 +3177,55 @@ func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation
return nil, nil
}

// EvalsRelatedToID is used to retrieve the evals that are related (next,
// previous, or blocked) to the provided eval ID.
func (s *StateStore) EvalsRelatedToID(ws memdb.WatchSet, id string) ([]*structs.EvaluationStub, error) {
txn := s.db.ReadTxn()

raw, err := txn.First("evals", "id", id)
if err != nil {
return nil, fmt.Errorf("eval lookup failed: %v", err)
}
if raw == nil {
return nil, nil
}
eval := raw.(*structs.Evaluation)

relatedEvals := []*structs.EvaluationStub{}
todo := eval.RelatedIDs()
done := map[string]bool{
eval.ID: true, // don't place the requested eval in the related list.
}

for len(todo) > 0 {
// Pop the first value from the todo list.
current := todo[0]
todo = todo[1:]
if current == "" {
continue
}

// Skip value if we already have it in the results.
if done[current] {
continue
}

eval, err := s.EvalByID(ws, current)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This call will add the eval watch channel to the watchset, which means we can have multiple channels in ws. I don't know if there are any performance implications in this approach.

A alternative would be to set the watchset using the job ID prefix. This would use a single watch channel, but may fire in situations where an eval for another with the same prefix changes.

if err != nil {
return nil, err
}
if eval == nil {
continue
}

todo = append(todo, eval.RelatedIDs()...)
relatedEvals = append(relatedEvals, eval.Stub())
done[eval.ID] = true
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently the only way out of this loop is to scan all related evals (or experience an error). Is there a way to limit the scanning we do, like on the size of the accumulated relatedEvals or even just a hard stop at like 20?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum...good question. I think the problem with setting an upper bound is that the response then becomes incomplete, and the idea of this flag is to give all the data needed quickly. But guarding against potential infinite loops, or slow requests sounds like a good idea. I don't know what a good number would be, so perhaps we can have a time limit, like 1s?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe the most correct thing to do would be to plumb the Request.Context all the way down here. (You could make the same argument for all calls into the state store, it's just that this one feels less strongly bounded than the others)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good point. Is this the channel I should be listening for connection close? https://pkg.go.dev/github.com/hashicorp/yamux#Session.CloseChan

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want the http request context

https://pkg.go.dev/net/http#Request.Context

stemming from the request handler http.Request

https://github.com/hashicorp/nomad/blob/main/command/agent/eval_endpoint.go#L10

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok. Unfortunately that's currently not possible 😬
#7119

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whelp so much for that! It's probably fine as-is anyway, I mean how many related evals could there be anyway ... 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw, another endpoint returns all the evaluations for a job in one go, so this is strictly less than that... can't be too bad, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, good point. One of the differences here is that we have a (potentially) infinite loop if the todo list never gets to zero. But I think we're safe because we always pop and check if eval has been processed already.


return relatedEvals, nil
}

// EvalsByIDPrefix is used to lookup evaluations by prefix in a particular
// namespace
func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) {
Expand Down
Loading