Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate Agent Configuration #11819

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ type Config struct {

// Network speed is the default speed of network interfaces if they can not
// be determined dynamically.
//
// Deprecated since 0.12: https://www.nomadproject.io/docs/upgrade/upgrade-specific#nomad-0-12-0
NetworkSpeed int

// CpuCompute is the default total CPU compute if they can not be determined
Expand Down
29 changes: 28 additions & 1 deletion command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, i
// Global logger should match internal logger as much as possible
golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds)

// Validate config after logging is initialized but before starting any
// more agent components. This allows reporting validation issues to
// the user's configured logger at the expense of not being able to
// usefully validate logging parameters.
if err := a.validateConfig(); err != nil {
return nil, fmt.Errorf("Invalid agent configuration. Must fix configuration errors before agent can start: %w", err)
}

if err := a.setupConsul(config.Consul); err != nil {
return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
}
Expand All @@ -152,6 +160,25 @@ func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, i
return a, nil
}

// validateConfig and return an error if the config is invalid. Errors and
// warnings are logged but only errors return a non-nil value.
func (a *Agent) validateConfig() error {
results := a.config.Validate()
if !results.Problems() {
// Valid! Short-circuit
return nil
}

for _, warning := range results.Warnings {
a.logger.Warn("Agent validation warning", "warning", warning)
}
for _, err := range results.Errors.Errors {
a.logger.Error("Agent validation error", "error", err)
}

return results.ErrSummary()
}

// convertServerConfig takes an agent config and log output and returns a Nomad
// Config. There may be missing fields that must be set by the agent. To do this
// call finalizeServerConfig
Expand Down Expand Up @@ -641,7 +668,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
if agentConfig.Client.Reserved.Cores != "" {
cores, err := cpuset.Parse(agentConfig.Client.Reserved.Cores)
if err != nil {
return nil, fmt.Errorf("failed to parse client > reserved > cores value %q: %v", agentConfig.Client.Reserved.Cores, err)
return nil, fmt.Errorf("failed to parse client.reserved.cores value %q: %v", agentConfig.Client.Reserved.Cores, err)
}
res.Cpu.ReservedCpuCores = cores.ToSlice()
}
Expand Down
173 changes: 173 additions & 0 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/hashicorp/go-sockaddr/template"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/lib/cpuset"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
Expand Down Expand Up @@ -321,6 +322,136 @@ type ClientConfig struct {
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
}

// Validate client agent configuration and append results to parameter.
func (c *ClientConfig) Validate(results *helper.ValidationResults) {
for i, s := range c.Servers {
_, _, err := net.SplitHostPort(s)
if err == nil {
continue
}
results.AppendErrorf("client.servers[%d] invalid: %v", i, err)
}

//TODO Validate NodeClass
//TODO Validate ChrootEnv

if n := c.NetworkSpeed; n < 0 {
results.AppendErrorf("client.network_speed must be >= 0 but found: %v", n)
}

if n := c.CpuCompute; n < 0 {
results.AppendErrorf("client.cpu_total_compute must be >= 0 but found: %v", n)
}

if n := c.MemoryMB; n < 0 {
results.AppendErrorf("client.memory_total_mb must be >= 0 but found: %v", n)
}

//TODO Validate ReserveableCores

if n, err := time.ParseDuration(c.MaxKillTimeout); err != nil {
results.AppendErrorf("client.max_kill_timeout is not a valid duration: %w", err)
} else if n < 0 {
results.AppendErrorf("client.max_kill_timeout must be >= 0 but found: %v", n)
}

if c.MaxDynamicPort > structs.MaxValidPort {
results.AppendErrorf("client.max_dynamic_port must be <= %d but found %d", structs.MaxValidPort, c.MaxDynamicPort)
}

if c.MaxDynamicPort <= 0 {
results.AppendErrorf("client.max_dynamic_port must be > 0 but found %d", c.MaxDynamicPort)
}

if c.MinDynamicPort > structs.MaxValidPort {
results.AppendErrorf("client.min_dynamic_port must be <= %d but found %d", structs.MaxValidPort, c.MinDynamicPort)
}

if c.MinDynamicPort <= 0 {
results.AppendErrorf("client.min_dynamic_port must be > 0, but found %d", c.MinDynamicPort)
}

if c.MinDynamicPort > c.MaxDynamicPort {
results.AppendErrorf("client.min_dynamic_port (%d) must be < client.max_dynamic_port (%d)", c.MinDynamicPort, c.MaxDynamicPort)
}

c.validateReserved(results, c.Reserved)

// GCIntervalHCL parsing in agent/config_parse.go
if c.GCInterval <= 0 {
results.AppendErrorf("client.gc_interval must be > 0 but found %s", c.GCInterval)
}

if c.GCParallelDestroys <= 0 {
results.AppendErrorf("client.gc_parallel_destroys must be > 0 but found %d", c.GCParallelDestroys)
}

if c.GCDiskUsageThreshold <= 0 {
results.AppendErrorf("client.gc_disk_usage_threshold must be > 0 but found %f", c.GCDiskUsageThreshold)
}

if c.GCInodeUsageThreshold <= 0 {
results.AppendErrorf("client.gc_inode_usage_threshold must be > 0 but found %f", c.GCInodeUsageThreshold)
}

if c.GCMaxAllocs <= 0 {
results.AppendErrorf("client.gc_max_allocs must be > 0 but found %d", c.GCMaxAllocs)
}

//TODO Validate TemplateConfig
//TODO Validate ServerJoin
//TODO Validate HostVolumes
//TODO Validate CNIPath
//TODO Validate CNIConfigDir
//TODO Validate BridgeNetworkName
//TODO Validate BridgeNetworkSubnet

for _, hn := range c.HostNetworks {
hn.Validate(results)
}

//TODO Validate CgroupParent
}

// validateReserved validates the reserved resources for client agents.
func (c *ClientConfig) validateReserved(results *helper.ValidationResults, r *Resources) {
if r == nil {
// Coding error; should always be set my DefaultConfig()
results.AppendErrorf("client.reserved must be initialized. Please report a bug")
return
}

if r.CPU < 0 {
results.AppendErrorf("reserved.cpu must be >= 0 but found: %d", r.CPU)
}

if c.CpuCompute > 0 && c.CpuCompute <= r.CPU {
results.AppendErrorf("reserved.cpu >= cpu_total_compute: node would be ineligible for scheduling")
}

if r.MemoryMB < 0 {
results.AppendErrorf("reserved.memory must be >= 0 but found: %d", r.MemoryMB)
}

if c.MemoryMB > 0 && c.MemoryMB <= r.MemoryMB {
results.AppendErrorf("reserved.memory >= memory_total_mb: node would be ineligible for scheduling")
}

if r.DiskMB < 0 {
results.AppendErrorf("reserved.disk must be >= 0 but found: %d", r.DiskMB)
}

if ports := r.ReservedPorts; ports != "" {
if _, err := structs.ParsePortRanges(ports); err != nil {
results.AppendErrorf("reserved.reserved_ports %s invalid: %w", ports, err)
}
}

if _, err := cpuset.Parse(r.Cores); err != nil {
results.AppendErrorf("failed to parse client.reserved.cores value %q: %w", r.Cores, err)
}
}

// ACLConfig is configuration specific to the ACL system
type ACLConfig struct {
// Enabled controls if we are enforce and manage ACLs
Expand Down Expand Up @@ -1208,6 +1339,48 @@ func (c *Config) Merge(b *Config) *Config {
return &result
}

// Validate the agent configuration. Required values such as region are a
// validation error if unset, so Validate should be called after
// Merge(DefaultConfig()).
//
// Logging is not validated as it is expected to already be configured.
//
// Addresses and Ports are validated during address normalization.
func (c *Config) Validate() *helper.ValidationResults {
results := helper.NewValidationResults()

if c.Region == "" {
results.AppendErrorf("Missing region")
} else {
//TODO Emit warning if region name is not a valid hostname
}

if c.Datacenter == "" {
results.AppendErrorf("Missing datacenter")
} else {
//TODO Emit warning if datacenter name is not a valid hostname
}

// TODO Validate NodeName (it is valid for it to be "" here)

c.Client.Validate(results)

//TODO Validate Server
//TODO Validate ACL
//TODO Validate Telemetry
//TODO Validate Consul
//TODO Validate Vault
//TODO Validate UI
//TODO Validate TLSConfig
//TODO Validate Sentinel
//TODO Validate Autopilot
//TODO Validate Limits
//TODO Validate Audit

return results

}

// normalizeAddrs normalizes Addresses and AdvertiseAddrs to always be
// initialized and have reasonable defaults.
func (c *Config) normalizeAddrs() error {
Expand Down
63 changes: 63 additions & 0 deletions command/agent/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/hashicorp/nomad/helper/freeport"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -1441,3 +1442,65 @@ func TestParseMultipleIPTemplates(t *testing.T) {
})
}
}

// TestConfig_Validate_Default asserts the DefaultConfig validates
// successfully.
func TestConfig_Validate(t *testing.T) {
results := DefaultConfig().Validate()
assert.Empty(t, results.Warnings)
assert.Nil(t, results.Errors)
assert.NoError(t, results.Err())
assert.NoError(t, results.ErrSummary())
}

// TestConfig_Validate_Bad asserts a wide range of config validation failures.
func TestConfig_Validate_Bad(t *testing.T) {
config, err := ParseConfigFile("testdata/badagent.hcl")
require.NoError(t, err)

// Intentionally not merging in the DefaultConfig to test every bad
// case.

results := config.Validate()
assert.Empty(t, results.Warnings)
require.NotNil(t, results.Errors)

expected := map[string]struct{}{
"Missing region": {},
"Missing datacenter": {},
"client.cpu_total_compute must be >= 0 but found: -10": {},
"client.memory_total_mb must be >= 0 but found: -11": {},
`client.max_kill_timeout is not a valid duration: time: invalid duration ""`: {},
`client.max_dynamic_port must be <= 65536 but found 999998`: {},
`client.min_dynamic_port must be <= 65536 but found 999999`: {},
`client.min_dynamic_port (999999) must be < client.max_dynamic_port (999998)`: {},
`reserved.disk must be >= 0 but found: -3`: {},
`client.gc_max_allocs must be > 0 but found 0`: {},
`client.gc_inode_usage_threshold must be > 0 but found 0.000000`: {},
`client.gc_interval must be > 0 but found -5m0s`: {},
`client.gc_parallel_destroys must be > 0 but found -12`: {},
`client.gc_disk_usage_threshold must be > 0 but found 0.000000`: {},

`reserved.reserved_ports 1,10-99999999 invalid: port must be < 65536 but found 99999999`: {},
`host_network["bad"].reserved_ports "-10" invalid: strconv.ParseUint: parsing "": invalid syntax`: {},
}
unexpected := []string{}

for _, err := range results.Errors.Errors {
v := err.Error()
if _, ok := expected[v]; ok {
// Error found! Remove and continue
delete(expected, v)
continue
}

// Unexpected
unexpected = append(unexpected, v)
}

// All expected errors should be removed
assert.Len(t, expected, 0)

// There should be no unexpected errors
assert.Len(t, unexpected, 0, strings.Join(unexpected, "\n"))
}
23 changes: 23 additions & 0 deletions command/agent/testdata/badagent.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# See command/agent/config_test.go#TestConifg_Validate_Bad

client {
cpu_total_compute = -10
memory_total_mb = -11

max_dynamic_port = 999998
min_dynamic_port = 999999

gc_interval = "-5m"
gc_parallel_destroys = -12

reserved {
cpu = 1
memory = 2
disk = -3
reserved_ports = "1,10-99999999"
}

host_network "bad" {
reserved_ports = "-10"
}
}
Loading