Skip to content

Commit

Permalink
Merge branch 'master' into f-vendor-serf-memberlist
Browse files Browse the repository at this point in the history
  • Loading branch information
dadgar authored Feb 2, 2017
2 parents 45c7863 + efc0602 commit 91ae993
Show file tree
Hide file tree
Showing 62 changed files with 6,564 additions and 848 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## 0.5.4 (January 31, 2017)

IMPROVEMENTS:
* client: Made the GC related tunables configurable via client configuration
[GH-2261]

BUG FIXES:
* client: Fix panic when upgrading to 0.5.3 [GH-2256]

## 0.5.3 (January 30, 2017)

IMPROVEMENTS:
Expand Down
10 changes: 6 additions & 4 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ else
tar -xf go${GO_VERSION}.linux-${ARCH}.tar.gz
sudo mv go $SRCROOT
sudo chmod 775 $SRCROOT
sudo chown ubuntu:ubuntu $SRCROOT
sudo chown vagrant:vagrant $SRCROOT
fi
# Setup the GOPATH; even though the shared folder spec gives the working
# directory the right user/group, we need to set it properly on the
# parent path to allow subsequent "go get" commands to work.
sudo mkdir -p $SRCPATH
sudo chown -R ubuntu:ubuntu $SRCPATH 2>/dev/null || true
sudo chown -R vagrant:vagrant $SRCPATH 2>/dev/null || true
# ^^ silencing errors here because we expect this to fail for the shared folder
cat <<EOF >/tmp/gopath.sh
Expand All @@ -70,8 +70,8 @@ sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-engine
# Restart docker to make sure we get the latest version of the daemon if there is an upgrade
sudo service docker restart
# Make sure we can actually use docker as the ubuntu user
sudo usermod -aG docker ubuntu
# Make sure we can actually use docker as the vagrant user
sudo usermod -aG docker vagrant
# Setup Nomad for development
cd /opt/gopath/src/github.com/hashicorp/nomad && make bootstrap
Expand All @@ -93,6 +93,7 @@ def configureVM(vmCfg, vmParams={
numCPUs: DEFAULT_CPU_COUNT,
}
)
# When updating make sure to use a box that supports VMWare and VirtualBox
vmCfg.vm.box = "bento/ubuntu-16.04" # 16.04 LTS

vmCfg.vm.provision "shell", inline: $script, privileged: false
Expand All @@ -116,6 +117,7 @@ def configureVM(vmCfg, vmParams={

["vmware_fusion", "vmware_workstation"].each do |p|
vmCfg.vm.provider p do |v|
v.enable_vmrun_ip_lookup = false
v.gui = false
v.memory = memory
v.cpus = cpus
Expand Down
12 changes: 10 additions & 2 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg
return nil, fmt.Errorf("failed to initialize client: %v", err)
}

// Add the stats collector and the garbage collector
// Add the stats collector
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
c.hostStatsCollector = statsCollector
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB)

// Add the garbage collector
gcConfig := &GCConfig{
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
Interval: cfg.GCInterval,
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
}
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig)

// Setup the node
if err := c.setupNode(); err != nil {
Expand Down
15 changes: 15 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ type Config struct {
// TLSConfig holds various TLS related configurations
TLSConfig *config.TLSConfig

// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration

// GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad
// client triggers GC of terminal allocations
GCDiskUsageThreshold float64

// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64

// LogLevel is the level of the logs to putout
LogLevel string
}
Expand All @@ -177,6 +189,9 @@ func DefaultConfig() *Config {
StatsCollectionInterval: 1 * time.Second,
TLSConfig: &config.TLSConfig{},
LogLevel: "DEBUG",
GCInterval: 1 * time.Minute,
GCDiskUsageThreshold: 80,
GCInodeUsageThreshold: 70,
}
}

Expand Down
7 changes: 6 additions & 1 deletion client/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ func (r *CreatedResources) Remove(k, needle string) bool {

// Copy returns a new deep copy of CreatedResrouces.
func (r *CreatedResources) Copy() *CreatedResources {
if r == nil {
return nil
}

newr := CreatedResources{
Resources: make(map[string][]string, len(r.Resources)),
}
Expand Down Expand Up @@ -165,7 +169,8 @@ type Driver interface {
Open(ctx *ExecContext, handleID string) (DriverHandle, error)

// Cleanup is called to remove resources which were created for a task
// and no longer needed.
// and no longer needed. Cleanup is not called if CreatedResources is
// nil.
//
// If Cleanup returns a recoverable error it may be retried. On retry
// it will be passed the same CreatedResources, so all successfully
Expand Down
4 changes: 4 additions & 0 deletions client/driver/mock_driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ func (m *MockDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
// Config.Options["cleanup_fail_num"] times. For failures it will return a
// recoverable error.
func (m *MockDriver) Cleanup(ctx *ExecContext, res *CreatedResources) error {
if res == nil {
panic("Cleanup should not be called with nil *CreatedResources")
}

var err error
failn, _ := strconv.Atoi(m.config.Options["cleanup_fail_num"])
failk := m.config.Options["cleanup_fail_on"]
Expand Down
36 changes: 16 additions & 20 deletions client/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,6 @@ import (
)

const (
// diskUsageThreshold is the percent of used disk space beyond which Nomad
// GCs terminated allocations
diskUsageThreshold = 80

// gcInterval is the interval at which Nomad runs the garbage collector
gcInterval = 1 * time.Minute

// inodeUsageThreshold is the percent of inode usage that Nomad tries to
// maintain, whenever we are over it we will attempt to GC terminal
// allocations
inodeUsageThreshold = 70

// MB is a constant which converts values in bytes to MB
MB = 1024 * 1024
)
Expand Down Expand Up @@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int {
return len(i.heap)
}

// GCConfig allows changing the behaviour of the garbage collector
type GCConfig struct {
DiskUsageThreshold float64
InodeUsageThreshold float64
Interval time.Duration
ReservedDiskMB int
}

// AllocGarbageCollector garbage collects terminated allocations on a node
type AllocGarbageCollector struct {
allocRunners *IndexedGCAllocPQ
statsCollector stats.NodeStatsCollector
reservedDiskMB int
config *GCConfig
logger *log.Logger
shutdownCh chan struct{}
}

// NewAllocGarbageCollector returns a garbage collector for terminated
// allocations on a node.
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector {
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector {
gc := &AllocGarbageCollector{
allocRunners: NewIndexedGCAllocPQ(),
statsCollector: statsCollector,
reservedDiskMB: reservedDiskMB,
config: config,
logger: logger,
shutdownCh: make(chan struct{}),
}
Expand All @@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats
}

func (a *AllocGarbageCollector) run() {
ticker := time.NewTicker(gcInterval)
ticker := time.NewTicker(a.config.Interval)
for {
select {
case <-ticker.C:
Expand Down Expand Up @@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
break
}

if diskStats.UsedPercent <= diskUsageThreshold &&
diskStats.InodesUsedPercent <= inodeUsageThreshold {
if diskStats.UsedPercent <= a.config.DiskUsageThreshold &&
diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold {
break
}

Expand Down Expand Up @@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e
// we don't need to garbage collect terminated allocations
if hostStats := a.statsCollector.Stats(); hostStats != nil {
var availableForAllocations uint64
if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) {
if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
availableForAllocations = 0
} else {
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB)
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
}
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
return nil
Expand Down
32 changes: 23 additions & 9 deletions client/gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,20 @@ import (
"log"
"os"
"testing"
"time"

"github.com/hashicorp/nomad/client/stats"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
)

var gcConfig = GCConfig{
DiskUsageThreshold: 80,
InodeUsageThreshold: 70,
Interval: 1 * time.Minute,
ReservedDiskMB: 0,
}

func TestIndexedGCAllocPQ(t *testing.T) {
pq := NewIndexedGCAllocPQ()

Expand Down Expand Up @@ -83,7 +91,7 @@ func (m *MockStatsCollector) Stats() *stats.HostStats {

func TestAllocGarbageCollector_MarkForCollection(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
if err := gc.MarkForCollection(ar1); err != nil {
Expand All @@ -98,7 +106,7 @@ func TestAllocGarbageCollector_MarkForCollection(t *testing.T) {

func TestAllocGarbageCollector_Collect(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
_, ar2 := testAllocRunnerFromAlloc(mock.Alloc(), false)
Expand All @@ -120,7 +128,7 @@ func TestAllocGarbageCollector_Collect(t *testing.T) {

func TestAllocGarbageCollector_CollectAll(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, 0)
gc := NewAllocGarbageCollector(logger, &MockStatsCollector{}, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
_, ar2 := testAllocRunnerFromAlloc(mock.Alloc(), false)
Expand All @@ -143,7 +151,8 @@ func TestAllocGarbageCollector_CollectAll(t *testing.T) {
func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down Expand Up @@ -179,7 +188,8 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_EnoughSpace(t *testing.T)
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down Expand Up @@ -216,7 +226,8 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Partial(t *testing.T) {
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down Expand Up @@ -249,7 +260,8 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_All(t *testing.T) {
func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down Expand Up @@ -281,7 +293,8 @@ func TestAllocGarbageCollector_MakeRoomForAllocations_GC_Fallback(t *testing.T)
func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down Expand Up @@ -314,7 +327,8 @@ func TestAllocGarbageCollector_UsageBelowThreshold(t *testing.T) {
func TestAllocGarbageCollector_UsedPercentThreshold(t *testing.T) {
logger := log.New(os.Stdout, "", 0)
statsCollector := &MockStatsCollector{}
gc := NewAllocGarbageCollector(logger, statsCollector, 20)
gcConfig.ReservedDiskMB = 20
gc := NewAllocGarbageCollector(logger, statsCollector, &gcConfig)

_, ar1 := testAllocRunnerFromAlloc(mock.Alloc(), false)
close(ar1.waitCh)
Expand Down
5 changes: 5 additions & 0 deletions client/task_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,11 @@ func (r *TaskRunner) cleanup() {
res := r.createdResources.Copy()
r.createdResourcesLock.Unlock()

if res == nil {
// No created resources to cleanup
return
}

ctx := driver.NewExecContext(r.taskDir, r.alloc.ID)
attempts := 1
var cleanupErr error
Expand Down
21 changes: 21 additions & 0 deletions client/task_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,27 @@ func TestTaskRunner_SimpleRun_Dispatch(t *testing.T) {
}
}

// TestTaskRunner_CleanupNil ensures TaskRunner doesn't call Driver.Cleanup if
// no resources were created.
func TestTaskRunner_CleanupNil(t *testing.T) {
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Driver = "mock_driver"

ctx := testTaskRunnerFromAlloc(t, false, alloc)
ctx.tr.MarkReceived()

ctx.tr.createdResources = nil

defer ctx.Cleanup()
ctx.tr.Run()

// Since we only failed once, createdResources should be empty
if ctx.tr.createdResources != nil {
t.Fatalf("createdResources should still be nil: %v", ctx.tr.createdResources)
}
}

func TestTaskRunner_CleanupOK(t *testing.T) {
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
Expand Down
5 changes: 5 additions & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
conf.TLSConfig = a.config.TLSConfig
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP

// Set the GC related configs
conf.GCInterval = a.config.Client.GCInterval
conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold

return conf, nil
}

Expand Down
3 changes: 3 additions & 0 deletions command/agent/config-test-fixtures/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ client {
data_points = 35
collection_interval = "5s"
}
gc_interval = "6s"
gc_disk_usage_threshold = 82
gc_inode_usage_threshold = 91
}
server {
enabled = true
Expand Down
Loading

0 comments on commit 91ae993

Please sign in to comment.