Skip to content

Commit

Permalink
api: acl bootstrap errors aren't 500
Browse files Browse the repository at this point in the history
Noticed that ACL endpoints return 500 status code for user errors.  This
is confusing and can lead to false monitoring alerts.

Here, I introduce a concept of RPCCoded errors to be returned by RPC
that signal a code in addition to error message.  Codes for now match
HTTP codes to ease reasoning.

```
$ nomad acl bootstrap
Error bootstrapping: Unexpected response code: 500 (ACL bootstrap already done (reset index: 9))

$ nomad acl bootstrap
Error bootstrapping: Unexpected response code: 400 (ACL bootstrap already done (reset index: 9))
```
  • Loading branch information
Mahmood Ali committed Oct 4, 2019
1 parent 550e1b1 commit c1b3668
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 18 deletions.
3 changes: 3 additions & 0 deletions command/agent/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque
errMsg := err.Error()
if http, ok := err.(HTTPCodedError); ok {
code = http.Code()
} else if ecode, emsg, ok := structs.CodeFromRPCCodedErr(err); ok {
code = ecode
errMsg = emsg
} else {
// RPC errors get wrapped, so manually unwrap by only looking at their suffix
if strings.HasSuffix(errMsg, structs.ErrPermissionDenied.Error()) {
Expand Down
36 changes: 18 additions & 18 deletions nomad/acl_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

var (
// aclDisabled is returned when an ACL endpoint is hit but ACLs are not enabled
aclDisabled = fmt.Errorf("ACL support disabled")
aclDisabled = structs.NewErrRPCCoded(400, "ACL support disabled")
)

const (
Expand Down Expand Up @@ -55,13 +55,13 @@ func (a *ACL) UpsertPolicies(args *structs.ACLPolicyUpsertRequest, reply *struct

// Validate non-zero set of policies
if len(args.Policies) == 0 {
return fmt.Errorf("must specify as least one policy")
return structs.NewErrRPCCoded(400, "must specify as least one policy")
}

// Validate each policy, compute hash
for idx, policy := range args.Policies {
if err := policy.Validate(); err != nil {
return fmt.Errorf("policy %d invalid: %v", idx, err)
return structs.NewErrRPCCodedf(404, "policy %d invalid: %v", idx, err)
}
policy.SetHash()
}
Expand Down Expand Up @@ -99,7 +99,7 @@ func (a *ACL) DeletePolicies(args *structs.ACLPolicyDeleteRequest, reply *struct

// Validate non-zero set of policies
if len(args.Names) == 0 {
return fmt.Errorf("must specify as least one policy")
return structs.NewErrRPCCoded(400, "must specify as least one policy")
}

// Update via Raft
Expand Down Expand Up @@ -370,9 +370,9 @@ func (a *ACL) Bootstrap(args *structs.ACLTokenBootstrapRequest, reply *structs.A
// Check if there is a reset index specified
specifiedIndex := a.fileBootstrapResetIndex()
if specifiedIndex == 0 {
return fmt.Errorf("ACL bootstrap already done (reset index: %d)", resetIdx)
return structs.NewErrRPCCodedf(400, "ACL bootstrap already done (reset index: %d)", resetIdx)
} else if specifiedIndex != resetIdx {
return fmt.Errorf("Invalid bootstrap reset index (specified %d, reset index: %d)", specifiedIndex, resetIdx)
return structs.NewErrRPCCodedf(400, "Invalid bootstrap reset index (specified %d, reset index: %d)", specifiedIndex, resetIdx)
}

// Setup the reset index to allow bootstrapping again
Expand Down Expand Up @@ -404,7 +404,7 @@ func (a *ACL) Bootstrap(args *structs.ACLTokenBootstrapRequest, reply *structs.A
}
out, err := state.ACLTokenByAccessorID(nil, args.Token.AccessorID)
if err != nil {
return fmt.Errorf("token lookup failed: %v", err)
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
}
reply.Tokens = append(reply.Tokens, out)

Expand Down Expand Up @@ -448,7 +448,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A

// Validate non-zero set of tokens
if len(args.Tokens) == 0 {
return fmt.Errorf("must specify as least one token")
return structs.NewErrRPCCoded(400, "must specify as least one token")
}

// Force the request to the authoritative region if we are creating global tokens
Expand All @@ -466,7 +466,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
// the entire request as a single batch.
if hasGlobal {
if !allGlobal {
return fmt.Errorf("cannot upsert mixed global and non-global tokens")
return structs.NewErrRPCCoded(400, "cannot upsert mixed global and non-global tokens")
}

// Force the request to the authoritative region if it has global
Expand Down Expand Up @@ -494,7 +494,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
// Validate each token
for idx, token := range args.Tokens {
if err := token.Validate(); err != nil {
return fmt.Errorf("token %d invalid: %v", idx, err)
return structs.NewErrRPCCodedf(400, "token %d invalid: %v", idx, err)
}

// Generate an accessor and secret ID if new
Expand All @@ -507,15 +507,15 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
// Verify the token exists
out, err := state.ACLTokenByAccessorID(nil, token.AccessorID)
if err != nil {
return fmt.Errorf("token lookup failed: %v", err)
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
}
if out == nil {
return fmt.Errorf("cannot find token %s", token.AccessorID)
return structs.NewErrRPCCodedf(400, "cannot find token %s", token.AccessorID)
}

// Cannot toggle the "Global" mode
if token.Global != out.Global {
return fmt.Errorf("cannot toggle global mode of %s", token.AccessorID)
return structs.NewErrRPCCodedf(400, "cannot toggle global mode of %s", token.AccessorID)
}
}

Expand All @@ -538,7 +538,7 @@ func (a *ACL) UpsertTokens(args *structs.ACLTokenUpsertRequest, reply *structs.A
for _, token := range args.Tokens {
out, err := state.ACLTokenByAccessorID(nil, token.AccessorID)
if err != nil {
return fmt.Errorf("token lookup failed: %v", err)
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
}
reply.Tokens = append(reply.Tokens, out)
}
Expand All @@ -557,7 +557,7 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G

// Validate non-zero set of tokens
if len(args.AccessorIDs) == 0 {
return fmt.Errorf("must specify as least one token")
return structs.NewErrRPCCoded(400, "must specify as least one token")
}

if done, err := a.srv.forward("ACL.DeleteTokens", args, args, reply); done {
Expand Down Expand Up @@ -585,7 +585,7 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G
for _, accessor := range args.AccessorIDs {
token, err := state.ACLTokenByAccessorID(nil, accessor)
if err != nil {
return fmt.Errorf("token lookup failed: %v", err)
return structs.NewErrRPCCodedf(400, "token lookup failed: %v", err)
}
if token == nil {
nonexistentTokens = append(nonexistentTokens, accessor)
Expand All @@ -599,14 +599,14 @@ func (a *ACL) DeleteTokens(args *structs.ACLTokenDeleteRequest, reply *structs.G
}

if len(nonexistentTokens) != 0 {
return fmt.Errorf("Cannot delete nonexistent tokens: %v", strings.Join(nonexistentTokens, ", "))
return structs.NewErrRPCCodedf(400, "Cannot delete nonexistent tokens: %v", strings.Join(nonexistentTokens, ", "))
}

// Disallow mixed requests with global and non-global tokens since we forward
// the entire request as a single batch.
if hasGlobal {
if !allGlobal {
return fmt.Errorf("cannot delete mixed global and non-global tokens")
return structs.NewErrRPCCoded(400, "cannot delete mixed global and non-global tokens")
}

// Force the request to the authoritative region if it has global
Expand Down
31 changes: 31 additions & 0 deletions nomad/structs/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package structs
import (
"errors"
"fmt"
"strconv"
"strings"
)

Expand All @@ -24,6 +25,8 @@ const (
ErrUnknownJobPrefix = "Unknown job"
ErrUnknownEvaluationPrefix = "Unknown evaluation"
ErrUnknownDeploymentPrefix = "Unknown deployment"

errRPCCodedErrorPrefix = "RPC_ERROR::"
)

var (
Expand Down Expand Up @@ -142,3 +145,31 @@ func IsErrUnknownNomadVersion(err error) bool {
func IsErrNodeLacksRpc(err error) bool {
return err != nil && strings.Contains(err.Error(), errNodeLacksRpc)
}

func NewErrRPCCoded(code int, msg string) error {
return fmt.Errorf("%s%d,%s", errRPCCodedErrorPrefix, code, msg)
}

func NewErrRPCCodedf(code int, format string, args ...interface{}) error {
msg := fmt.Sprintf(format, args...)
return fmt.Errorf("%s%d,%s", errRPCCodedErrorPrefix, code, msg)
}

func CodeFromRPCCodedErr(err error) (code int, msg string, ok bool) {
if err == nil || !strings.HasPrefix(err.Error(), errRPCCodedErrorPrefix) {
return 0, "", false
}

headerLen := len(errRPCCodedErrorPrefix)
parts := strings.SplitN(err.Error()[headerLen:], ",", 2)
if len(parts) != 2 {
return 0, "", false
}

code, err = strconv.Atoi(parts[0])
if err != nil {
return 0, "", false
}

return code, parts[1], true
}
49 changes: 49 additions & 0 deletions nomad/structs/errors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package structs

import (
"errors"
"testing"

"github.com/stretchr/testify/assert"
)

func TestRPCCodedErrors(t *testing.T) {
cases := []struct {
err error
code int
message string
}{
{
NewErrRPCCoded(400, "a test message,here"),
400,
"a test message,here",
},
{
NewErrRPCCodedf(500, "a test message,here %s %s", "and,here%s", "second"),
500,
"a test message,here and,here%s second",
},
}

for _, c := range cases {
t.Run(c.err.Error(), func(t *testing.T) {
code, msg, ok := CodeFromRPCCodedErr(c.err)
assert.True(t, ok)
assert.Equal(t, c.code, code)
assert.Equal(t, c.message, msg)
})
}

negativeCases := []string{
"random error",
errRPCCodedErrorPrefix,
errRPCCodedErrorPrefix + "123",
errRPCCodedErrorPrefix + "qwer,asdf",
}
for _, c := range negativeCases {
t.Run(c, func(t *testing.T) {
_, _, ok := CodeFromRPCCodedErr(errors.New(c))
assert.False(t, ok)
})
}
}

0 comments on commit c1b3668

Please sign in to comment.