Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scaleutils: add post scale in task to optionally purge nodes. #258

Merged
merged 2 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ require (
github.com/hashicorp/go-multierror v1.0.0
github.com/hashicorp/go-plugin v1.0.1
github.com/hashicorp/hcl/v2 v2.3.0
github.com/hashicorp/nomad/api v0.0.0-20200812215312-956c3a426dbc
github.com/hashicorp/nomad/api v0.0.0-20200904210342-cfe4f8314ff7
github.com/kr/pretty v0.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/mitchellh/cli v1.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCO
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl/v2 v2.3.0 h1:iRly8YaMwTBAKhn1Ybk7VSdzbnopghktCD031P8ggUE=
github.com/hashicorp/hcl/v2 v2.3.0/go.mod h1:d+FwDBbOLvpAM3Z6J7gPj/VoAGkNe/gm352ZhjJ/Zv8=
github.com/hashicorp/nomad/api v0.0.0-20200812215312-956c3a426dbc h1:b3Q1n+vuKmCLOa1H6cuet8xJT5LDaMiGJet55KnxW6w=
github.com/hashicorp/nomad/api v0.0.0-20200812215312-956c3a426dbc/go.mod h1:DCi2k47yuUDzf2qWAK8E1RVmWgz/lc0jZQeEnICTxmY=
github.com/hashicorp/nomad/api v0.0.0-20200904210342-cfe4f8314ff7 h1:xVUfoEZ6PAu+NZPZY/aRlG5FG/VosFS6kw1FjjEy30A=
github.com/hashicorp/nomad/api v0.0.0-20200904210342-cfe4f8314ff7/go.mod h1:DCi2k47yuUDzf2qWAK8E1RVmWgz/lc0jZQeEnICTxmY=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb h1:b5rjCoWHc7eqmAS4/qyk21ZsHyb6Mxv/jykxvNTkU4M=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
Expand Down
49 changes: 49 additions & 0 deletions helper/scaleutils/nomad.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import (
"errors"
"fmt"
"os"
"strconv"
"sync"
"time"

hclog "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad-autoscaler/plugins/target"
"github.com/hashicorp/nomad/api"
)

Expand Down Expand Up @@ -83,6 +85,53 @@ func (si *ScaleIn) RunPreScaleInTasks(ctx context.Context, req *ScaleInReq) ([]N
return nodeIDMap, nil
}

// RunPostScaleInTasks runs any tasks that need to occur after a remote node
// provider has completed its work. It handles any users configuration so that
// the plugin does not need to perform this work.
func (si *ScaleIn) RunPostScaleInTasks(cfg map[string]string, nodes []NodeID) error {

// Attempt to read of the node purge config parameter. If it has been set
// then check its value, otherwise the default stance is that node purging
// is disabled.
if val, ok := cfg[target.ConfigKeyNodePurge]; ok {

// Parse the string as a bool. If we get an error return this as the
// operator has attempted to configure this value, but it's not worth
// breaking the whole pipeline for. Therefore log the error and return
// as Nomad will eventually perform this work.
boolVal, err := strconv.ParseBool(val)
if err != nil {
si.log.Error("failed to parse node_purge config param", "error", err)
return nil
}

// If the operator has disabled node purging, exit.
if !boolVal {
return nil
}
} else {
return nil
}

// Use a multierror to collect errors from any and all node purge calls
// that fail.
var mErr *multierror.Error

// Iterate the node list and perform a purge on each node. In the event of
// an error, add this to the list. Otherwise log useful information.
for _, node := range nodes {
resp, _, err := si.nomad.Nodes().Purge(node.NomadID, nil)
if err != nil {
mErr = multierror.Append(mErr, err)
} else {
si.log.Info("successfully purged Nomad node",
"node_id", node.NomadID, "nomad_evals", resp.EvalIDs)
}
}

return mErr.ErrorOrNil()
}

// identifyTargets filters the current Nomad cluster node list and then sorts
// and selects nodes for removal based on the specified strategy. It is
// possible the list does not contain as many nodes as requested. In this case,
Expand Down
7 changes: 6 additions & 1 deletion plugins/builtin/target/aws-asg/plugin/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, asg *autoscaling.AutoScaling

ids, err := t.scaleInUtils.RunPreScaleInTasks(ctx, scaleReq)
if err != nil {
return fmt.Errorf("failed to perform Nomad scale in tasks: %v", err)
return fmt.Errorf("failed to perform pre-scale Nomad scale in tasks: %v", err)
}

// Grab the instanceIDs once as it is used multiple times throughout the
Expand Down Expand Up @@ -139,6 +139,11 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, asg *autoscaling.AutoScaling
log.Info("successfully terminated EC2 instances")
eWriter.write(ctx, scalingEventTerminate)

// Run any post scale in tasks that are desired.
if err := t.scaleInUtils.RunPostScaleInTasks(config, ids); err != nil {
return fmt.Errorf("failed to perform post-scale Nomad scale in tasks: %v", err)
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions plugins/target/target.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ const (
ConfigKeyTaskGroup = "Group"
ConfigKeyClass = "node_class"
ConfigKeyDrainDeadline = "node_drain_deadline"
ConfigKeyNodePurge = "node_purge"
)

// RPC is a plugin implementation that talks over net/rpc
Expand Down