Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

acl: correctly resolve ACL roles within client cache. #14922

Merged
merged 3 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changelog/14922.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```release-note:bug
client: Resolve ACL roles within client ACL cache
```

```release-note:bug
client: Check ACL token expiry when resolving token within ACL cache
```
148 changes: 146 additions & 2 deletions client/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ const (
// tokenCacheSize is the number of ACL tokens to keep cached. Tokens have a fetching cost,
// so we keep the hot tokens cached to reduce the lookups.
tokenCacheSize = 64

// roleCacheSize is the number of ACL roles to keep cached. Looking up
// roles requires an RPC call, so we keep the hot roles cached to reduce
// the number of lookups.
roleCacheSize = 64
)

// clientACLResolver holds the state required for client resolution
Expand All @@ -34,6 +39,10 @@ type clientACLResolver struct {

// tokenCache is used to maintain the fetched token objects
tokenCache *lru.TwoQueueCache

// roleCache is used to maintain a cache of the fetched ACL roles. Each
// entry is keyed by the role ID.
roleCache *lru.TwoQueueCache
}

// init is used to setup the client resolver state
Expand All @@ -52,13 +61,19 @@ func (c *clientACLResolver) init() error {
if err != nil {
return err
}
c.roleCache, err = lru.New2Q(roleCacheSize)
if err != nil {
return err
}
return nil
}

// cachedACLValue is used to manage ACL Token or Policy TTLs
// cachedACLValue is used to manage ACL Token, Policy, or Role cache entries
// and their TTLs.
type cachedACLValue struct {
Token *structs.ACLToken
Policy *structs.ACLPolicy
Role *structs.ACLRole
CacheTime time.Time
}

Expand Down Expand Up @@ -95,13 +110,29 @@ func (c *Client) resolveTokenAndACL(secretID string) (*acl.ACL, *structs.ACLToke
return nil, nil, structs.ErrTokenNotFound
}

// Give the token expiry some slight leeway in case the client and server
// clocks are skewed.
if token.IsExpired(time.Now().Add(2 * time.Second)) {
return nil, nil, structs.ErrTokenExpired
}

// Check if this is a management token
if token.Type == structs.ACLManagementToken {
return acl.ManagementACL, token, nil
}

// Resolve the policy links within the token ACL roles.
policyNames, err := c.resolveTokenACLRoles(secretID, token.Roles)
if err != nil {
return nil, nil, err
}

// Generate a slice of all policy names included within the token, taken
// from both the ACL roles and the direct assignments.
policyNames = append(policyNames, token.Policies...)

// Resolve the policies
policies, err := c.resolvePolicies(token.SecretID, token.Policies)
policies, err := c.resolvePolicies(token.SecretID, policyNames)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -227,3 +258,116 @@ func (c *Client) resolvePolicies(secretID string, policies []string) ([]*structs
// Return the valid policies
return out, nil
}

// resolveTokenACLRoles is used to unpack an ACL roles and their policy
// assignments into a list of ACL policy names. This can then be used to
// compile an ACL object.
//
// When roles need to be looked up from state via server RPC, we may use the
// expired cache version. This can only occur if we can fully resolve the role
// via the cache.
func (c *Client) resolveTokenACLRoles(secretID string, roleLinks []*structs.ACLTokenRoleLink) ([]string, error) {

var (
// policyNames tracks the resolved ACL policies which are linked to the
// role. This is the output object and represents the authorisation
// this role provides token bearers.
policyNames []string

// missingRoleIDs are the roles linked which are not found within our
// cache. These must be looked up from the server via and RPC, so we
// can correctly identify the policy links.
missingRoleIDs []string

// expiredRoleIDs are the roles linked which have been found within our
// cache, but are expired. These must be looked up from the server via
// and RPC, so we can correctly identify the policy links.
expiredRoleIDs []string
)

for _, roleLink := range roleLinks {

// Look within the cache to see if the role is already present. If we
// do not find it, add the ID to our tracking, so we look this up via
// RPC.
raw, ok := c.roleCache.Get(roleLink.ID)
if !ok {
missingRoleIDs = append(missingRoleIDs, roleLink.ID)
continue
}

// If the cached value is expired, add the ID to our tracking, so we
// look this up via RPC. Otherwise, iterate the policy links and add
// each policy name to our return object tracking.
cached := raw.(*cachedACLValue)
if cached.Age() <= c.GetConfig().ACLRoleTTL {
for _, policyLink := range cached.Role.Policies {
policyNames = append(policyNames, policyLink.Name)
}
} else {
expiredRoleIDs = append(expiredRoleIDs, cached.Role.ID)
}
}

// Hot-path: we were able to resolve all ACL roles via the cache and
// generate a list of linked policy names. Therefore, we can avoid making
// any RPC calls.
if len(missingRoleIDs)+len(expiredRoleIDs) == 0 {
return policyNames, nil
}

// Created a combined list of role IDs that we need to lookup from server
// state.
roleIDsToFetch := missingRoleIDs
roleIDsToFetch = append(roleIDsToFetch, expiredRoleIDs...)

// Generate an RPC request to detail all the ACL roles that we did not find
// or were expired within the cache.
roleByIDReq := structs.ACLRolesByIDRequest{
ACLRoleIDs: roleIDsToFetch,
QueryOptions: structs.QueryOptions{
Region: c.Region(),
AuthToken: secretID,
AllowStale: true,
},
}

var roleByIDResp structs.ACLRolesByIDResponse

// Perform the RPC call to detail the required ACL roles. If the RPC call
// fails, and we are only updating expired cache entries, use the expired
// entries. This allows use to handle intermittent failures.
err := c.RPC(structs.ACLGetRolesByIDRPCMethod, &roleByIDReq, &roleByIDResp)
tgross marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
if len(missingRoleIDs) == 0 {
c.logger.Warn("failed to resolve ACL roles, using expired cached value", "error", err)
for _, aclRole := range roleByIDResp.ACLRoles {
for _, rolePolicyLink := range aclRole.Policies {
policyNames = append(policyNames, rolePolicyLink.Name)
}
}
return policyNames, nil
}
return nil, err
}

// Generate a timestamp for the cache entry. We do not need to use a
// timestamp per ACL role response integration.
now := time.Now()

for _, aclRole := range roleByIDResp.ACLRoles {
jrasell marked this conversation as resolved.
Show resolved Hide resolved

// Add an entry to the cache using the generated timestamp for future
// expiry calculations. Any existing, expired entry will be
// overwritten.
c.roleCache.Add(aclRole.ID, &cachedACLValue{Role: aclRole, CacheTime: now})

// Iterate the role policy links, extracting the name and adding this
// to our return response tracking.
for _, rolePolicyLink := range aclRole.Policies {
policyNames = append(policyNames, rolePolicyLink.Name)
}
}

return policyNames, nil
}
63 changes: 63 additions & 0 deletions client/acl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,29 @@ package client

import (
"testing"
"time"

"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
)

func Test_clientACLResolver_init(t *testing.T) {
resolver := &clientACLResolver{}
must.NoError(t, resolver.init())
must.NotNil(t, resolver.aclCache)
must.NotNil(t, resolver.policyCache)
must.NotNil(t, resolver.tokenCache)
must.NotNil(t, resolver.roleCache)
}

func TestClient_ACL_resolveTokenValue(t *testing.T) {
ci.Parallel(t)

Expand Down Expand Up @@ -106,6 +118,47 @@ func TestClient_ACL_resolvePolicies(t *testing.T) {
}
}

func TestClient_resolveTokenACLRoles(t *testing.T) {
ci.Parallel(t)

testServer, _, rootACLToken, testServerCleanupS1 := testACLServer(t, nil)
defer testServerCleanupS1()
testutil.WaitForLeader(t, testServer.RPC)

testClient, cleanup := TestClient(t, func(c *config.Config) {
c.RPCHandler = testServer
c.ACLEnabled = true
})
defer cleanup()

// Create an ACL Role and a client token which is linked to this.
mockACLRole := mock.ACLRole()

mockACLToken := mock.ACLToken()
mockACLToken.Policies = []string{}
mockACLToken.Roles = []*structs.ACLTokenRoleLink{{ID: mockACLRole.ID}}

err := testServer.State().UpsertACLRoles(structs.MsgTypeTestSetup, 10, []*structs.ACLRole{mockACLRole}, true)
must.NoError(t, err)
err = testServer.State().UpsertACLTokens(structs.MsgTypeTestSetup, 20, []*structs.ACLToken{mockACLToken})
must.NoError(t, err)

// Resolve the ACL policies linked via the role.
resolvedRoles1, err := testClient.resolveTokenACLRoles(rootACLToken.SecretID, mockACLToken.Roles)
must.NoError(t, err)
must.Len(t, 2, resolvedRoles1)

// Test the cache directly and check that the ACL role previously queried
// is now cached.
must.Eq(t, 1, testClient.roleCache.Len())
must.True(t, testClient.roleCache.Contains(mockACLRole.ID))

// Resolve the roles again to check we get the same results.
resolvedRoles2, err := testClient.resolveTokenACLRoles(rootACLToken.SecretID, mockACLToken.Roles)
must.NoError(t, err)
must.SliceContainsAll(t, resolvedRoles1, resolvedRoles2)
}

func TestClient_ACL_ResolveToken_Disabled(t *testing.T) {
ci.Parallel(t)

Expand Down Expand Up @@ -199,4 +252,14 @@ func TestClient_ACL_ResolveSecretToken(t *testing.T) {
assert.NotEmpty(t, respToken.AccessorID)
}

// Create and upsert a token which has just expired.
mockExpiredToken := mock.ACLToken()
mockExpiredToken.ExpirationTime = pointer.Of(time.Now().Add(-5 * time.Minute))

err = s1.State().UpsertACLTokens(structs.MsgTypeTestSetup, 120, []*structs.ACLToken{mockExpiredToken})
must.NoError(t, err)

expiredTokenResp, err := c1.ResolveSecretToken(mockExpiredToken.SecretID)
must.Nil(t, expiredTokenResp)
must.StrContains(t, err.Error(), "ACL token expired")
}
4 changes: 4 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ type Config struct {
// ACLPolicyTTL is how long we cache policy values for
ACLPolicyTTL time.Duration

// ACLRoleTTL is how long we cache ACL role value for within each Nomad
// client.
ACLRoleTTL time.Duration

// DisableRemoteExec disables remote exec targeting tasks on this client
DisableRemoteExec bool

Expand Down
1 change: 1 addition & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
conf.ACLEnabled = agentConfig.ACL.Enabled
conf.ACLTokenTTL = agentConfig.ACL.TokenTTL
conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
conf.ACLRoleTTL = agentConfig.ACL.RoleTTL

// Setup networking configuration
conf.CNIPath = agentConfig.Client.CNIPath
Expand Down
13 changes: 13 additions & 0 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,12 @@ type ACLConfig struct {
PolicyTTL time.Duration
PolicyTTLHCL string `hcl:"policy_ttl" json:"-"`

// RoleTTL controls how long we cache ACL roles. This controls how stale
// they can be when we are enforcing policies. Defaults to "30s".
// Reducing this impacts performance by forcing more frequent resolution.
RoleTTL time.Duration
RoleTTLHCL string `hcl:"role_ttl" json:"-"`

// ReplicationToken is used by servers to replicate tokens and policies
// from the authoritative region. This must be a valid management token
// within the authoritative region.
Expand Down Expand Up @@ -1250,6 +1256,7 @@ func DefaultConfig() *Config {
Enabled: false,
TokenTTL: 30 * time.Second,
PolicyTTL: 30 * time.Second,
RoleTTL: 30 * time.Second,
},
SyslogFacility: "LOCAL0",
Telemetry: &Telemetry{
Expand Down Expand Up @@ -1755,6 +1762,12 @@ func (a *ACLConfig) Merge(b *ACLConfig) *ACLConfig {
if b.PolicyTTLHCL != "" {
result.PolicyTTLHCL = b.PolicyTTLHCL
}
if b.RoleTTL != 0 {
result.RoleTTL = b.RoleTTL
}
if b.RoleTTLHCL != "" {
result.RoleTTLHCL = b.RoleTTLHCL
}
if b.TokenMinExpirationTTL != 0 {
result.TokenMinExpirationTTL = b.TokenMinExpirationTTL
}
Expand Down
1 change: 1 addition & 0 deletions command/agent/config_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func ParseConfigFile(path string) (*Config, error) {
{"gc_interval", &c.Client.GCInterval, &c.Client.GCIntervalHCL, nil},
{"acl.token_ttl", &c.ACL.TokenTTL, &c.ACL.TokenTTLHCL, nil},
{"acl.policy_ttl", &c.ACL.PolicyTTL, &c.ACL.PolicyTTLHCL, nil},
{"acl.policy_ttl", &c.ACL.RoleTTL, &c.ACL.RoleTTLHCL, nil},
{"acl.token_min_expiration_ttl", &c.ACL.TokenMinExpirationTTL, &c.ACL.TokenMinExpirationTTLHCL, nil},
{"acl.token_max_expiration_ttl", &c.ACL.TokenMaxExpirationTTL, &c.ACL.TokenMaxExpirationTTLHCL, nil},
{"client.server_join.retry_interval", &c.Client.ServerJoin.RetryInterval, &c.Client.ServerJoin.RetryIntervalHCL, nil},
Expand Down
2 changes: 2 additions & 0 deletions command/agent/config_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ var basicConfig = &Config{
TokenTTLHCL: "60s",
PolicyTTL: 60 * time.Second,
PolicyTTLHCL: "60s",
RoleTTLHCL: "60s",
RoleTTL: 60 * time.Second,
TokenMinExpirationTTLHCL: "1h",
TokenMinExpirationTTL: 1 * time.Hour,
TokenMaxExpirationTTLHCL: "100h",
Expand Down
2 changes: 2 additions & 0 deletions command/agent/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ func TestConfig_Merge(t *testing.T) {
Enabled: true,
TokenTTL: 60 * time.Second,
PolicyTTL: 60 * time.Second,
RoleTTL: 60 * time.Second,
TokenMinExpirationTTL: 60 * time.Second,
TokenMaxExpirationTTL: 60 * time.Second,
ReplicationToken: "foo",
Expand Down Expand Up @@ -360,6 +361,7 @@ func TestConfig_Merge(t *testing.T) {
Enabled: true,
TokenTTL: 20 * time.Second,
PolicyTTL: 20 * time.Second,
RoleTTL: 20 * time.Second,
TokenMinExpirationTTL: 20 * time.Second,
TokenMaxExpirationTTL: 20 * time.Second,
ReplicationToken: "foobar",
Expand Down
1 change: 1 addition & 0 deletions command/agent/testdata/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ acl {
enabled = true
token_ttl = "60s"
policy_ttl = "60s"
role_ttl = "60s"
token_min_expiration_ttl = "1h"
token_max_expiration_ttl = "100h"
replication_token = "foobar"
Expand Down
1 change: 1 addition & 0 deletions command/agent/testdata/basic.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"policy_ttl": "60s",
"replication_token": "foobar",
"token_ttl": "60s",
"role_ttl": "60s",
"token_min_expiration_ttl": "1h",
"token_max_expiration_ttl": "100h"
}
Expand Down
Loading