diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ac26f1ab5e7..98573c0ecf67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ IMPROVEMENTS: * api/job: Ability to revert job to older versions [GH-2575] * client: Environment variables for client DC and Region [GH-2507] * client: Hash host ID so its stable and well distributed [GH-2541] + * client: GC dead allocs if total allocs > `gc_max_allocs` tunable [GH-2636] * client: Persist state using bolt-db and more efficient write patterns [GH-2610] * client: Fingerprint all routable addresses on an interface including IPv6 diff --git a/client/client.go b/client/client.go index 59460bf1199a..a4b751b24e47 100644 --- a/client/client.go +++ b/client/client.go @@ -240,13 +240,15 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic // Add the garbage collector gcConfig := &GCConfig{ + MaxAllocs: cfg.GCMaxAllocs, DiskUsageThreshold: cfg.GCDiskUsageThreshold, InodeUsageThreshold: cfg.GCInodeUsageThreshold, Interval: cfg.GCInterval, ParallelDestroys: cfg.GCParallelDestroys, ReservedDiskMB: cfg.Node.Reserved.DiskMB, } - c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig) + c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, c, gcConfig) + go c.garbageCollector.Run() // Setup the node if err := c.setupNode(); err != nil { @@ -482,17 +484,13 @@ func (c *Client) RPC(method string, args interface{}, reply interface{}) error { // Stats is used to return statistics for debugging and insight // for various sub-systems func (c *Client) Stats() map[string]map[string]string { - c.allocLock.RLock() - numAllocs := len(c.allocs) - c.allocLock.RUnlock() - c.heartbeatLock.Lock() defer c.heartbeatLock.Unlock() stats := map[string]map[string]string{ "client": map[string]string{ "node_id": c.Node().ID, "known_servers": c.servers.all().String(), - "num_allocations": strconv.Itoa(numAllocs), + "num_allocations": strconv.Itoa(c.NumAllocs()), "last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)), "heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL), }, @@ -722,6 +720,21 @@ func (c *Client) getAllocRunners() map[string]*AllocRunner { return runners } +// NumAllocs returns the number of allocs this client has. Used to +// fulfill the AllocCounter interface for the GC. +func (c *Client) NumAllocs() int { + c.allocLock.RLock() + c.blockedAllocsLock.Lock() + c.migratingAllocsLock.Lock() + n := len(c.allocs) + n += len(c.blockedAllocations) + n += len(c.migratingAllocs) + c.migratingAllocsLock.Unlock() + c.blockedAllocsLock.Unlock() + c.allocLock.RUnlock() + return n +} + // nodeID restores, or generates if necessary, a unique node ID and SecretID. // The node ID is, if available, a persistent unique ID. The secret ID is a // high-entropy random UUID. diff --git a/client/config/config.go b/client/config/config.go index 24e115f35db1..db4d8233c755 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -171,6 +171,10 @@ type Config struct { // beyond which the Nomad client triggers GC of the terminal allocations GCInodeUsageThreshold float64 + // GCMaxAllocs is the maximum number of allocations a node can have + // before garbage collection is triggered. + GCMaxAllocs int + // LogLevel is the level of the logs to putout LogLevel string @@ -205,6 +209,7 @@ func DefaultConfig() *Config { GCParallelDestroys: 2, GCDiskUsageThreshold: 80, GCInodeUsageThreshold: 70, + GCMaxAllocs: 50, } } diff --git a/client/gc.go b/client/gc.go index a07db1415a8e..d6b58d777829 100644 --- a/client/gc.go +++ b/client/gc.go @@ -18,6 +18,9 @@ const ( // GCConfig allows changing the behaviour of the garbage collector type GCConfig struct { + // MaxAllocs is the maximum number of allocations to track before a GC + // is triggered. + MaxAllocs int DiskUsageThreshold float64 InodeUsageThreshold float64 Interval time.Duration @@ -25,10 +28,17 @@ type GCConfig struct { ParallelDestroys int } +// AllocCounter is used by AllocGarbageCollector to discover how many +// allocations a node has and is generally fulfilled by the Client. +type AllocCounter interface { + NumAllocs() int +} + // AllocGarbageCollector garbage collects terminated allocations on a node type AllocGarbageCollector struct { allocRunners *IndexedGCAllocPQ statsCollector stats.NodeStatsCollector + allocCounter AllocCounter config *GCConfig logger *log.Logger destroyCh chan struct{} @@ -36,8 +46,9 @@ type AllocGarbageCollector struct { } // NewAllocGarbageCollector returns a garbage collector for terminated -// allocations on a node. -func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector { +// allocations on a node. Must call Run() in a goroutine enable periodic +// garbage collection. +func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, ac AllocCounter, config *GCConfig) *AllocGarbageCollector { // Require at least 1 to make progress if config.ParallelDestroys <= 0 { logger.Printf("[WARN] client: garbage collector defaulting parallism to 1 due to invalid input value of %d", config.ParallelDestroys) @@ -47,17 +58,18 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats gc := &AllocGarbageCollector{ allocRunners: NewIndexedGCAllocPQ(), statsCollector: statsCollector, + allocCounter: ac, config: config, logger: logger, destroyCh: make(chan struct{}, config.ParallelDestroys), shutdownCh: make(chan struct{}), } - go gc.run() return gc } -func (a *AllocGarbageCollector) run() { +// Run the periodic garbage collector. +func (a *AllocGarbageCollector) Run() { ticker := time.NewTicker(a.config.Interval) for { select { @@ -100,23 +112,33 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error { break } - if diskStats.UsedPercent <= a.config.DiskUsageThreshold && - diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold { + reason := "" + + switch { + case diskStats.UsedPercent > a.config.DiskUsageThreshold: + reason = fmt.Sprintf("disk usage of %.0f is over gc threshold of %.0f", + diskStats.UsedPercent, a.config.DiskUsageThreshold) + case diskStats.InodesUsedPercent > a.config.InodeUsageThreshold: + reason = fmt.Sprintf("inode usage of %.0f is over gc threshold of %.0f", + diskStats.InodesUsedPercent, a.config.InodeUsageThreshold) + case a.numAllocs() > a.config.MaxAllocs: + reason = fmt.Sprintf("number of allocations is over the limit (%d)", a.config.MaxAllocs) + } + + // No reason to gc, exit + if reason == "" { break } // Collect an allocation gcAlloc := a.allocRunners.Pop() if gcAlloc == nil { + a.logger.Printf("[WARN] client: garbage collection due to %s skipped because no terminal allocations", reason) break } - ar := gcAlloc.allocRunner - alloc := ar.Alloc() - a.logger.Printf("[INFO] client: garbage collecting allocation %v", alloc.ID) - // Destroy the alloc runner and wait until it exits - a.destroyAllocRunner(ar) + a.destroyAllocRunner(gcAlloc.allocRunner, reason) } return nil } @@ -124,7 +146,13 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error { // destroyAllocRunner is used to destroy an allocation runner. It will acquire a // lock to restrict parallelism and then destroy the alloc runner, returning // once the allocation has been destroyed. -func (a *AllocGarbageCollector) destroyAllocRunner(ar *AllocRunner) { +func (a *AllocGarbageCollector) destroyAllocRunner(ar *AllocRunner, reason string) { + id := "" + if alloc := ar.Alloc(); alloc != nil { + id = alloc.ID + } + a.logger.Printf("[INFO] client: garbage collecting allocation %s due to %s", id, reason) + // Acquire the destroy lock select { case <-a.shutdownCh: @@ -155,11 +183,7 @@ func (a *AllocGarbageCollector) Collect(allocID string) error { if err != nil { return fmt.Errorf("unable to collect allocation %q: %v", allocID, err) } - - ar := gcAlloc.allocRunner - a.logger.Printf("[INFO] client: garbage collecting allocation %q", ar.Alloc().ID) - - a.destroyAllocRunner(ar) + a.destroyAllocRunner(gcAlloc.allocRunner, "forced collection") return nil } @@ -177,9 +201,7 @@ func (a *AllocGarbageCollector) CollectAll() error { break } - ar := gcAlloc.allocRunner - a.logger.Printf("[INFO] client: garbage collecting alloc runner for alloc %q", ar.Alloc().ID) - go a.destroyAllocRunner(ar) + go a.destroyAllocRunner(gcAlloc.allocRunner, "forced full collection") } return nil } @@ -187,6 +209,26 @@ func (a *AllocGarbageCollector) CollectAll() error { // MakeRoomFor garbage collects enough number of allocations in the terminal // state to make room for new allocations func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) error { + // GC allocs until below the max limit + the new allocations + max := a.config.MaxAllocs - len(allocations) + for a.numAllocs() > max { + select { + case <-a.shutdownCh: + return nil + default: + } + + gcAlloc := a.allocRunners.Pop() + if gcAlloc == nil { + // It's fine if we can't lower below the limit here as + // we'll keep trying to drop below the limit with each + // periodic gc + break + } + + // Destroy the alloc runner and wait until it exits + a.destroyAllocRunner(gcAlloc.allocRunner, "new allocations") + } totalResource := &structs.Resources{} for _, alloc := range allocations { if err := totalResource.Add(alloc.Resources); err != nil { @@ -244,10 +286,9 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e ar := gcAlloc.allocRunner alloc := ar.Alloc() - a.logger.Printf("[INFO] client: garbage collecting allocation %v", alloc.ID) // Destroy the alloc runner and wait until it exits - a.destroyAllocRunner(ar) + a.destroyAllocRunner(ar, fmt.Sprintf("freeing %d MB for new allocations", alloc.Resources.DiskMB)) // Call stats collect again diskCleared += alloc.Resources.DiskMB @@ -261,8 +302,7 @@ func (a *AllocGarbageCollector) MarkForCollection(ar *AllocRunner) error { return fmt.Errorf("nil allocation runner inserted for garbage collection") } if ar.Alloc() == nil { - a.logger.Printf("[INFO] client: alloc is nil, so garbage collecting") - a.destroyAllocRunner(ar) + a.destroyAllocRunner(ar, "alloc is nil") } a.logger.Printf("[INFO] client: marking allocation %v for GC", ar.Alloc().ID) @@ -281,6 +321,12 @@ func (a *AllocGarbageCollector) Remove(ar *AllocRunner) { } } +// numAllocs returns the total number of allocs tracked by the client as well +// as those marked for GC. +func (a *AllocGarbageCollector) numAllocs() int { + return a.allocRunners.Length() + a.allocCounter.NumAllocs() +} + // GCAlloc wraps an allocation runner and an index enabling it to be used within // a PQ type GCAlloc struct { diff --git a/client/gc_test.go b/client/gc_test.go index f4fdedfbfcc8..e132a8e9f8b7 100644 --- a/client/gc_test.go +++ b/client/gc_test.go @@ -1,8 +1,6 @@ package client import ( - "log" - "os" "testing" "time" @@ -11,11 +9,14 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -var gcConfig = GCConfig{ - DiskUsageThreshold: 80, - InodeUsageThreshold: 70, - Interval: 1 * time.Minute, - ReservedDiskMB: 0, +func gcConfig() *GCConfig { + return &GCConfig{ + DiskUsageThreshold: 80, + InodeUsageThreshold: 70, + Interval: 1 * time.Minute, + ReservedDiskMB: 0, + MaxAllocs: 100, + } } func TestIndexedGCAllocPQ(t *testing.T) { @@ -57,6 +58,15 @@ func TestIndexedGCAllocPQ(t *testing.T) { } } +// MockAllocCounter implements AllocCounter interface. +type MockAllocCounter struct { + allocs int +} + +func (m *MockAllocCounter) NumAllocs() int { + return m.allocs +} + type MockStatsCollector struct { availableValues []uint64 usedPercents []float64 @@ -90,8 +100,8 @@ func (m *MockStatsCollector) Stats() *stats.HostStats { } func TestAllocGarbageCollector_MarkForCollection(t *testing.T) { - logger := log.New(os.Stdout, "", 0) - gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig) + logger := testLogger() + gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) if err := gc.MarkForCollection(ar1); err != nil { @@ -105,8 +115,8 @@ func TestAllocGarbageCollector_MarkForCollection(t *testing.T) { } func TestAllocGarbageCollector_Collect(t *testing.T) { - logger := log.New(os.Stdout, "", 0) - gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig) + logger := testLogger() + gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) _, ar2 := testAllocRunnerFromAlloc(mock.Alloc(), false) @@ -131,8 +141,8 @@ func TestAllocGarbageCollector_Collect(t *testing.T) { } func TestAllocGarbageCollector_CollectAll(t *testing.T) { - logger := log.New(os.Stdout, "", 0) - gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig) + logger := testLogger() + gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &MockAllocCounter{}, gcConfig()) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) _, ar2 := testAllocRunnerFromAlloc(mock.Alloc(), false) @@ -153,10 +163,11 @@ func TestAllocGarbageCollector_CollectAll(t *testing.T) { } func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -190,10 +201,11 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T) } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -228,10 +240,11 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) { } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -262,10 +275,11 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) { } func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -294,11 +308,49 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T) } } +func TestAllocGarbageCollector_MakeRoomForAllocations_MaxAllocs(t *testing.T) { + const ( + liveAllocs = 3 + maxAllocs = 6 + gcAllocs = 4 + gcAllocsLeft = 1 + ) + + logger := testLogger() + statsCollector := &MockStatsCollector{ + availableValues: []uint64{10 * 1024 * MB}, + usedPercents: []float64{0}, + inodePercents: []float64{0}, + } + allocCounter := &MockAllocCounter{allocs: liveAllocs} + conf := gcConfig() + conf.MaxAllocs = maxAllocs + gc := NewAllocGarbageCollector(logger, statsCollector, allocCounter, conf) + + for i := 0; i < gcAllocs; i++ { + _, ar := testAllocRunnerFromAlloc(mock.Alloc(), false) + close(ar.waitCh) + if err := gc.MarkForCollection(ar); err != nil { + t.Fatalf("error marking alloc for gc: %v", err) + } + } + + if err := gc.MakeRoomFor([]*structs.Allocation{mock.Alloc(), mock.Alloc()}); err != nil { + t.Fatalf("error making room for 2 new allocs: %v", err) + } + + // There should be gcAllocsLeft alloc runners left to be collected + if n := len(gc.allocRunners.index); n != gcAllocsLeft { + t.Fatalf("expected %d remaining GC-able alloc runners but found %d", gcAllocsLeft, n) + } +} + func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -329,10 +381,11 @@ func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) { } func TestAllocGarbageCollector_UsedPercentThreshold(t *testing.T) { - logger := log.New(os.Stdout, "", 0) + logger := testLogger() statsCollector := &MockStatsCollector{} - gcConfig.ReservedDiskMB = 20 - gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig) + conf := gcConfig() + conf.ReservedDiskMB = 20 + gc := NewAllocGarbageCollector(logger, statsCollector, &MockAllocCounter{}, conf) _, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false) close(ar1.waitCh) @@ -363,3 +416,40 @@ func TestAllocGarbageCollector_UsedPercentThreshold(t *testing.T) { t.Fatalf("gcAlloc: %v", gcAlloc) } } + +func TestAllocGarbageCollector_MaxAllocsThreshold(t *testing.T) { + const ( + liveAllocs = 3 + maxAllocs = 6 + gcAllocs = 4 + gcAllocsLeft = 1 + ) + + logger := testLogger() + statsCollector := &MockStatsCollector{ + availableValues: []uint64{1000}, + usedPercents: []float64{0}, + inodePercents: []float64{0}, + } + allocCounter := &MockAllocCounter{allocs: liveAllocs} + conf := gcConfig() + conf.MaxAllocs = 4 + gc := NewAllocGarbageCollector(logger, statsCollector, allocCounter, conf) + + for i := 0; i < gcAllocs; i++ { + _, ar := testAllocRunnerFromAlloc(mock.Alloc(), false) + close(ar.waitCh) + if err := gc.MarkForCollection(ar); err != nil { + t.Fatalf("error marking alloc for gc: %v", err) + } + } + + if err := gc.keepUsageBelowThreshold(); err != nil { + t.Fatalf("error gc'ing: %v", err) + } + + // We should have gc'd down to MaxAllocs + if n := len(gc.allocRunners.index); n != gcAllocsLeft { + t.Fatalf("expected remaining gc allocs (%d) to equal %d", n, gcAllocsLeft) + } +} diff --git a/client/task_runner_test.go b/client/task_runner_test.go index cfac5ea4256b..51117d8d0f72 100644 --- a/client/task_runner_test.go +++ b/client/task_runner_test.go @@ -31,7 +31,10 @@ func testLogger() *log.Logger { } func prefixedTestLogger(prefix string) *log.Logger { - return log.New(os.Stderr, prefix, log.LstdFlags) + if testing.Verbose() { + return log.New(os.Stderr, prefix, log.LstdFlags) + } + return log.New(ioutil.Discard, "", 0) } type MockTaskStateUpdater struct { diff --git a/command/agent/agent.go b/command/agent/agent.go index d09ee4147b91..45912712fc32 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -321,6 +321,7 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) { conf.GCParallelDestroys = a.config.Client.GCParallelDestroys conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold + conf.GCMaxAllocs = a.config.Client.GCMaxAllocs conf.NoHostUUID = a.config.Client.NoHostUUID return conf, nil diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 8d4880a7d27e..006a4340c2ce 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -58,6 +58,7 @@ client { gc_parallel_destroys = 6 gc_disk_usage_threshold = 82 gc_inode_usage_threshold = 91 + gc_max_allocs = 50 no_host_uuid = true } server { diff --git a/command/agent/config.go b/command/agent/config.go index cccd095f1010..a96202d3dd4c 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -209,14 +209,18 @@ type ClientConfig struct { // collector will allow. GCParallelDestroys int `mapstructure:"gc_parallel_destroys"` - // GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad - // client triggers GC of the terminal allocations + // GCDiskUsageThreshold is the disk usage threshold given as a percent + // beyond which the Nomad client triggers GC of terminal allocations GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"` // GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad // client triggers GC of the terminal allocations GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"` + // GCMaxAllocs is the maximum number of allocations a node can have + // before garbage collection is triggered. + GCMaxAllocs int `mapstructure:"gc_max_allocs"` + // NoHostUUID disables using the host's UUID and will force generation of a // random UUID. NoHostUUID bool `mapstructure:"no_host_uuid"` @@ -503,6 +507,7 @@ func DevConfig() *Config { conf.Client.GCInterval = 10 * time.Minute conf.Client.GCDiskUsageThreshold = 99 conf.Client.GCInodeUsageThreshold = 99 + conf.Client.GCMaxAllocs = 50 return conf } @@ -532,8 +537,9 @@ func DefaultConfig() *Config { Reserved: &Resources{}, GCInterval: 1 * time.Minute, GCParallelDestroys: 2, - GCInodeUsageThreshold: 70, GCDiskUsageThreshold: 80, + GCInodeUsageThreshold: 70, + GCMaxAllocs: 50, }, Server: &ServerConfig{ Enabled: false, @@ -949,6 +955,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { if b.GCInodeUsageThreshold != 0 { result.GCInodeUsageThreshold = b.GCInodeUsageThreshold } + if b.GCMaxAllocs != 0 { + result.GCMaxAllocs = b.GCMaxAllocs + } if b.NoHostUUID { result.NoHostUUID = b.NoHostUUID } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 403f5b75b5ca..1d0f387bf273 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -346,6 +346,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { "gc_disk_usage_threshold", "gc_inode_usage_threshold", "gc_parallel_destroys", + "gc_max_allocs", "no_host_uuid", } if err := checkHCLKeys(listVal, valid); err != nil { diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 9774b0db3fde..adff7035b720 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -3,10 +3,12 @@ package agent import ( "path/filepath" "reflect" + "strings" "testing" "time" "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/kr/pretty" ) func TestConfig_Parse(t *testing.T) { @@ -75,6 +77,7 @@ func TestConfig_Parse(t *testing.T) { GCParallelDestroys: 6, GCDiskUsageThreshold: 82, GCInodeUsageThreshold: 91, + GCMaxAllocs: 50, NoHostUUID: true, }, Server: &ServerConfig{ @@ -165,22 +168,20 @@ func TestConfig_Parse(t *testing.T) { } for _, tc := range cases { - t.Logf("Testing parse: %s", tc.File) + t.Run(tc.File, func(t *testing.T) { + path, err := filepath.Abs(filepath.Join("./config-test-fixtures", tc.File)) + if err != nil { + t.Fatalf("file: %s\n\n%s", tc.File, err) + } - path, err := filepath.Abs(filepath.Join("./config-test-fixtures", tc.File)) - if err != nil { - t.Fatalf("file: %s\n\n%s", tc.File, err) - continue - } + actual, err := ParseConfigFile(path) + if (err != nil) != tc.Err { + t.Fatalf("file: %s\n\n%s", tc.File, err) + } - actual, err := ParseConfigFile(path) - if (err != nil) != tc.Err { - t.Fatalf("file: %s\n\n%s", tc.File, err) - continue - } - - if !reflect.DeepEqual(actual, tc.Result) { - t.Fatalf("file: %s\n\n%#v\n\n%#v", tc.File, actual, tc.Result) - } + if !reflect.DeepEqual(actual, tc.Result) { + t.Errorf("file: %s diff: (actual vs expected)\n\n%s", tc.File, strings.Join(pretty.Diff(actual, tc.Result), "\n")) + } + }) } } diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 1ce6fe77fbe3..b817c154b040 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -100,6 +100,12 @@ client { - `gc_inode_usage_threshold` `(float: 70)` - Specifies the inode usage percent which Nomad tries to maintain by garbage collecting terminal allocations. +- `gc_max_allocs` `(int: 50)` - Specifies the maximum number of allocations + which a client will track before triggering a garbage collection of terminal + allocations. This will *not* limit the number of allocations a node can run at + a time, however after `gc_max_allocs` every new allocation will cause terminal + allocations to be GC'd. + - `gc_parallel_destroys` `(int: 2)` - Specifies the maximum number of parallel destroys allowed by the garbage collector. This value should be relatively low to avoid high resource usage during garbage collections.