From 4675f46c5b09100855ad63857d1f361b262373e2 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:21:01 +0100 Subject: [PATCH 1/9] scaleutils: deprecate old scaleutils functionality. --- .../{filter.go => deprecated_filter.go} | 21 ++++---- ...lter_test.go => deprecated_filter_test.go} | 29 ---------- .../{nomad.go => deprecated_nomad.go} | 54 ++++--------------- .../{request.go => deprecated_request.go} | 8 ++- ...est_test.go => deprecated_request_test.go} | 0 .../{status.go => deprecated_status.go} | 0 6 files changed, 27 insertions(+), 85 deletions(-) rename sdk/helper/scaleutils/{filter.go => deprecated_filter.go} (95%) rename sdk/helper/scaleutils/{filter_test.go => deprecated_filter_test.go} (92%) rename sdk/helper/scaleutils/{nomad.go => deprecated_nomad.go} (89%) rename sdk/helper/scaleutils/{request.go => deprecated_request.go} (87%) rename sdk/helper/scaleutils/{request_test.go => deprecated_request_test.go} (100%) rename sdk/helper/scaleutils/{status.go => deprecated_status.go} (100%) diff --git a/sdk/helper/scaleutils/filter.go b/sdk/helper/scaleutils/deprecated_filter.go similarity index 95% rename from sdk/helper/scaleutils/filter.go rename to sdk/helper/scaleutils/deprecated_filter.go index 3c2e3581..3529f1ec 100644 --- a/sdk/helper/scaleutils/filter.go +++ b/sdk/helper/scaleutils/deprecated_filter.go @@ -8,12 +8,16 @@ import ( "github.com/hashicorp/nomad/api" ) +// Deprecated. Please use NodeResourceID. +// // NodeID provides a mapping between the Nomad ID of a node and its remote // infrastructure provider specific ID. type NodeID struct { NomadID, RemoteID string } +// Deprecated. Please use nodepool.ClusterNodePoolIdentifier. +// // PoolIdentifier is the information used to identify nodes into pools of // resources. This then forms our scalable unit. type PoolIdentifier struct { @@ -42,6 +46,8 @@ func (p *PoolIdentifier) IdentifyNodes(n []*api.NodeListStub) ([]*api.NodeListSt } } +// Deprecated. Please use nodepool.ClusterNodePoolIdentifier. +// // IdentifierKey is the identifier to group nodes into a pool of resource and // thus forms the scalable object. type IdentifierKey string @@ -50,6 +56,8 @@ type IdentifierKey string // resource. This is the default. const IdentifierKeyClass IdentifierKey = "class" +// Deprecated. Pleas use ClusterNodeIDLookupFunc. +// // RemoteProvider is infrastructure provider which hosts and therefore manages // the Nomad client instances. This is used to understand how to translate the // Nomad NodeID to an ID that the provider understands. @@ -67,6 +75,8 @@ const RemoteProviderAzureInstanceID RemoteProvider = "azure_instance_id" const RemoteProviderGCEInstanceID RemoteProvider = "gce_instance_id" +// Deprecated. Please use ClusterScaleInNodeIDStrategy. +// // NodeIDStrategy is the strategy used to identify nodes for removal as part of // scaling in. type NodeIDStrategy string @@ -174,17 +184,6 @@ func filterByClass(n []*api.NodeListStub, id string) ([]*api.NodeListStub, error return out, nil } -// multiErrorFunc is a helper to convert the standard multierror output into -// something a little more friendly to consoles. This is currently only used by -// the node filter, but could be more useful elsewhere in the future. -func multiErrorFunc(err []error) string { - points := make([]string, len(err)) - for i, err := range err { - points[i] = err.Error() - } - return strings.Join(points, ", ") -} - // nodeIDMapFunc is the function signature used to find the Nomad node's remote // identifier. Specific implementations can be found below. type nodeIDMapFunc func(n *api.Node) (string, error) diff --git a/sdk/helper/scaleutils/filter_test.go b/sdk/helper/scaleutils/deprecated_filter_test.go similarity index 92% rename from sdk/helper/scaleutils/filter_test.go rename to sdk/helper/scaleutils/deprecated_filter_test.go index cac6438c..1195e860 100644 --- a/sdk/helper/scaleutils/filter_test.go +++ b/sdk/helper/scaleutils/deprecated_filter_test.go @@ -255,35 +255,6 @@ func Test_filterByClass(t *testing.T) { } } -func Test_multiErrorFunc(t *testing.T) { - testCases := []struct { - inputErr []error - expectedOutput string - name string - }{ - { - inputErr: []error{ - errors.New("hello"), - errors.New("is it me you're looking for"), - errors.New("cause I wonder where you are"), - }, - expectedOutput: "hello, is it me you're looking for, cause I wonder where you are", - name: "multiple input errors", - }, - { - inputErr: []error{errors.New("this is not an exciting test message")}, - expectedOutput: "this is not an exciting test message", - name: "single input error", - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expectedOutput, multiErrorFunc(tc.inputErr), tc.name) - }) - } -} - func Test_awsNodeIDMap(t *testing.T) { testCases := []struct { inputNode *api.Node diff --git a/sdk/helper/scaleutils/nomad.go b/sdk/helper/scaleutils/deprecated_nomad.go similarity index 89% rename from sdk/helper/scaleutils/nomad.go rename to sdk/helper/scaleutils/deprecated_nomad.go index ef2f0d11..60a0a469 100644 --- a/sdk/helper/scaleutils/nomad.go +++ b/sdk/helper/scaleutils/deprecated_nomad.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "os" "strconv" "sync" "time" @@ -15,6 +14,7 @@ import ( "github.com/hashicorp/nomad/api" ) +// Deprecated. Please use ClusterScaleUtils. type ScaleIn struct { log hclog.Logger nomad *api.Client @@ -27,6 +27,8 @@ type ScaleIn struct { curNodeID string } +// Deprecated. Please use NewClusterScaleUtils. +// // NewScaleInUtils returns a new ScaleIn implementation which provides helper // functions for performing scaling in operations. func NewScaleInUtils(cfg *api.Config, log hclog.Logger) (*ScaleIn, error) { @@ -38,7 +40,7 @@ func NewScaleInUtils(cfg *api.Config, log hclog.Logger) (*ScaleIn, error) { // Identifying the node is best-effort and should not result in a terminal // error when setting up the utils. - id, err := identifyAutoscalerNodeID(client) + id, err := autoscalerNodeID(client) if err != nil { log.Error("failed to identify Nomad Autoscaler nodeID", "error", err) } @@ -293,7 +295,7 @@ func (si *ScaleIn) drainNode(ctx context.Context, nodeID string, spec *api.Drain // Monitor the drain so we output the log messages. An error here indicates // the drain failed to complete successfully. - if err := si.monitorNodeDrain(ctx, nodeID, resp.LastIndex, spec.IgnoreSystemJobs); err != nil { + if err := si.monitorNodeDrain(ctx, nodeID, resp.LastIndex); err != nil { return fmt.Errorf("context done while monitoring node drain: %v", err) } return nil @@ -301,8 +303,11 @@ func (si *ScaleIn) drainNode(ctx context.Context, nodeID string, spec *api.Drain // monitorNodeDrain follows the drain of a node, logging the messages we // receive to their appropriate level. -func (si *ScaleIn) monitorNodeDrain(ctx context.Context, nodeID string, index uint64, ignoreSys bool) error { - for msg := range si.nomad.Nodes().MonitorDrain(ctx, nodeID, index, ignoreSys) { +// +// TODO(jrasell): currently the ignoreSys param is hardcoded to false, we will +// probably want to expose this to operators in the future. +func (si *ScaleIn) monitorNodeDrain(ctx context.Context, nodeID string, index uint64) error { + for msg := range si.nomad.Nodes().MonitorDrain(ctx, nodeID, index, false) { switch msg.Level { case api.MonitorMsgLevelInfo: si.log.Info("received node drain message", "node_id", nodeID, "msg", msg.Message) @@ -316,42 +321,3 @@ func (si *ScaleIn) monitorNodeDrain(ctx context.Context, nodeID string, index ui } return ctx.Err() } - -// identifyAutoscalerNodeID identifies the NodeID which the autoscaler is -// running on. -// -// TODO(jrasell) this should be removed once the cluster targets and core -// autoscaler components are updated to handle reconciliation. -func identifyAutoscalerNodeID(client *api.Client) (string, error) { - - envVar := os.Getenv("NOMAD_ALLOC_ID") - if envVar == "" { - return "", nil - } - - allocInfo, _, err := client.Allocations().Info(envVar, nil) - if err != nil { - return "", fmt.Errorf("failed to call Nomad allocations info: %v", err) - } - - return allocInfo.NodeID, nil -} - -// TODO(jrasell) this should be removed once the cluster targets and core -// autoscaler components are updated to handle reconciliation. -func filterOutNodeID(n []*api.NodeListStub, id string) []*api.NodeListStub { - - if id == "" { - return n - } - - var out []*api.NodeListStub - - for _, node := range n { - if node.ID == id { - continue - } - out = append(out, node) - } - return out -} diff --git a/sdk/helper/scaleutils/request.go b/sdk/helper/scaleutils/deprecated_request.go similarity index 87% rename from sdk/helper/scaleutils/request.go rename to sdk/helper/scaleutils/deprecated_request.go index df3c4b0e..2f5dd666 100644 --- a/sdk/helper/scaleutils/request.go +++ b/sdk/helper/scaleutils/deprecated_request.go @@ -8,12 +8,18 @@ import ( ) const ( + // Deprecated. Please use defaultNodeDrainDeadline. + // // DefaultDrainDeadline is the drainSpec deadline used if one is not // specified by an operator. - DefaultDrainDeadline = 15 * time.Minute + DefaultDrainDeadline = 15 * time.Minute + + // Deprecated. Please use defaultNodeIgnoreSystemJobs. DefaultIgnoreSystemJobs = false ) +// Deprecated. Please use NewClusterScaleUtils. +// // ScaleInReq represents an individual cluster scaling request and encompasses // all the information needed to perform the pre-termination tasks. type ScaleInReq struct { diff --git a/sdk/helper/scaleutils/request_test.go b/sdk/helper/scaleutils/deprecated_request_test.go similarity index 100% rename from sdk/helper/scaleutils/request_test.go rename to sdk/helper/scaleutils/deprecated_request_test.go diff --git a/sdk/helper/scaleutils/status.go b/sdk/helper/scaleutils/deprecated_status.go similarity index 100% rename from sdk/helper/scaleutils/status.go rename to sdk/helper/scaleutils/deprecated_status.go From ca4751e0dd6217fbc438e1b0f07bf087dcba5c58 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:45:29 +0100 Subject: [PATCH 2/9] scaleutils: add nodepool interface and node_class implementation. --- sdk/helper/scaleutils/nodepool/class.go | 36 +++++++++++++ sdk/helper/scaleutils/nodepool/class_test.go | 54 ++++++++++++++++++++ sdk/helper/scaleutils/nodepool/nodepool.go | 20 ++++++++ 3 files changed, 110 insertions(+) create mode 100644 sdk/helper/scaleutils/nodepool/class.go create mode 100644 sdk/helper/scaleutils/nodepool/class_test.go create mode 100644 sdk/helper/scaleutils/nodepool/nodepool.go diff --git a/sdk/helper/scaleutils/nodepool/class.go b/sdk/helper/scaleutils/nodepool/class.go new file mode 100644 index 00000000..a3a82601 --- /dev/null +++ b/sdk/helper/scaleutils/nodepool/class.go @@ -0,0 +1,36 @@ +package nodepool + +import "github.com/hashicorp/nomad/api" + +// defaultClassIdentifier is the class value used when the nodes NodeClass is +// empty. +const defaultClassIdentifier = "autoscaler-default-pool" + +// nodeClassClusterPoolIdentifier is the NodeClass implementation of the +// ClusterNodePoolIdentifier interface and filters Nomad nodes by their +// Node.NodeClass parameter. +type nodeClassClusterPoolIdentifier struct { + id string +} + +// NewNodeClassPoolIdentifier returns a new nodeClassClusterPoolIdentifier +// implementation of the ClusterNodePoolIdentifier interface. +func NewNodeClassPoolIdentifier(id string) ClusterNodePoolIdentifier { + return &nodeClassClusterPoolIdentifier{ + id: id, + } +} + +// NodeIsPoolMember satisfies the NodeIsPoolMember function on the +// ClusterNodePoolIdentifier interface. +func (n nodeClassClusterPoolIdentifier) IsPoolMember(node *api.NodeListStub) bool { + return node.NodeClass != "" && node.NodeClass == n.id || + node.NodeClass == "" && n.id == defaultClassIdentifier +} + +// Key satisfies the Key function on the ClusterNodePoolIdentifier interface. +func (n nodeClassClusterPoolIdentifier) Key() string { return "node_class" } + +// Value satisfies the Value function on the ClusterNodePoolIdentifier +// interface. +func (n nodeClassClusterPoolIdentifier) Value() string { return n.id } diff --git a/sdk/helper/scaleutils/nodepool/class_test.go b/sdk/helper/scaleutils/nodepool/class_test.go new file mode 100644 index 00000000..5890093a --- /dev/null +++ b/sdk/helper/scaleutils/nodepool/class_test.go @@ -0,0 +1,54 @@ +package nodepool + +import ( + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/stretchr/testify/assert" +) + +func TestNewNodeClassPoolIdentifier(t *testing.T) { + poolID := NewNodeClassPoolIdentifier("high-memory-ridiculous-price") + assert.Equal(t, "node_class", poolID.Key()) + assert.Equal(t, "high-memory-ridiculous-price", poolID.Value()) +} + +func TestNodeClassClusterPoolIdentifier_NodeIsPoolMember(t *testing.T) { + testCases := []struct { + inputPI ClusterNodePoolIdentifier + inputNode *api.NodeListStub + expectedOutput bool + name string + }{ + { + inputPI: NewNodeClassPoolIdentifier("foo"), + inputNode: &api.NodeListStub{NodeClass: ""}, + expectedOutput: false, + name: "non-matched empty class", + }, + { + inputPI: NewNodeClassPoolIdentifier("foo"), + inputNode: &api.NodeListStub{NodeClass: "bar"}, + expectedOutput: false, + name: "non-matched non-empty class", + }, + { + inputPI: NewNodeClassPoolIdentifier("autoscaler-default-pool"), + inputNode: &api.NodeListStub{NodeClass: ""}, + expectedOutput: true, + name: "matched default class", + }, + { + inputPI: NewNodeClassPoolIdentifier("foo"), + inputNode: &api.NodeListStub{NodeClass: "foo"}, + expectedOutput: true, + name: "matched class", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedOutput, tc.inputPI.IsPoolMember(tc.inputNode), tc.name) + }) + } +} diff --git a/sdk/helper/scaleutils/nodepool/nodepool.go b/sdk/helper/scaleutils/nodepool/nodepool.go new file mode 100644 index 00000000..847a3d1b --- /dev/null +++ b/sdk/helper/scaleutils/nodepool/nodepool.go @@ -0,0 +1,20 @@ +package nodepool + +import "github.com/hashicorp/nomad/api" + +// ClusterNodePoolIdentifier is the interface that defines how nodes are +// classed into pools of resources. +type ClusterNodePoolIdentifier interface { + + // IsPoolMember identifies whether the node is a member of the node pool. + // It is the responsibility of the implementation to read the node info if + // the required information is not within the stub struct. + IsPoolMember(*api.NodeListStub) bool + + // Key returns the string representation of the pool identifier. + Key() string + + // Value returns the pool identifier value that nodes are being filtered + // by. + Value() string +} From a4de66e9b7794c4a697bbf1dafb44d55afee7904 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:47:18 +0100 Subject: [PATCH 3/9] scaleutils: add multierror format handling. --- sdk/helper/scaleutils/error.go | 28 ++++++++++++ sdk/helper/scaleutils/error_test.go | 67 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 sdk/helper/scaleutils/error.go create mode 100644 sdk/helper/scaleutils/error_test.go diff --git a/sdk/helper/scaleutils/error.go b/sdk/helper/scaleutils/error.go new file mode 100644 index 00000000..7dcaa42a --- /dev/null +++ b/sdk/helper/scaleutils/error.go @@ -0,0 +1,28 @@ +package scaleutils + +import ( + "strings" + + multierror "github.com/hashicorp/go-multierror" +) + +// multiErrorFunc is a helper to convert the standard multierror output into +// something a little more friendly to consoles. This is currently only used by +// the node filter, but could be more useful elsewhere in the future. +func multiErrorFunc(err []error) string { + points := make([]string, len(err)) + for i, err := range err { + points[i] = err.Error() + } + return strings.Join(points, ", ") +} + +// formattedMultiError wraps any non-nil multierrors with the multiErrorFunc. +// It is safe to call in cases where the err may or may not be nil and will +// overwrite the existing formatter. +func formattedMultiError(err *multierror.Error) error { + if err != nil { + err.ErrorFormat = multiErrorFunc + } + return err.ErrorOrNil() +} diff --git a/sdk/helper/scaleutils/error_test.go b/sdk/helper/scaleutils/error_test.go new file mode 100644 index 00000000..e94643a3 --- /dev/null +++ b/sdk/helper/scaleutils/error_test.go @@ -0,0 +1,67 @@ +package scaleutils + +import ( + "errors" + "testing" + + multierror "github.com/hashicorp/go-multierror" + "github.com/stretchr/testify/assert" +) + +func Test_multiErrorFunc(t *testing.T) { + testCases := []struct { + inputErr []error + expectedOutput string + name string + }{ + { + inputErr: []error{ + errors.New("hello"), + errors.New("is it me you're looking for"), + errors.New("cause I wonder where you are"), + }, + expectedOutput: "hello, is it me you're looking for, cause I wonder where you are", + name: "multiple input errors", + }, + { + inputErr: []error{errors.New("this is not an exciting test message")}, + expectedOutput: "this is not an exciting test message", + name: "single input error", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedOutput, multiErrorFunc(tc.inputErr), tc.name) + }) + } +} + +func Test_formattedMultiError(t *testing.T) { + testCases := []struct { + inputErr *multierror.Error + name string + }{ + { + inputErr: nil, + name: "nil input error", + }, + { + inputErr: &multierror.Error{ + Errors: []error{errors.New("some ambiguous error")}, + }, + name: "non-nil input error", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + output := formattedMultiError(tc.inputErr) + if tc.inputErr != nil { + assert.NotNil(t, output, tc.name) + } else { + assert.Nil(t, output, tc.name) + } + }) + } +} From 470db74561f15dc5a1417ae92475f838cd90ed78 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:48:31 +0100 Subject: [PATCH 4/9] scaleutils: add refactored scaleutils for cluster scaling. --- sdk/helper/scaleutils/cluster.go | 296 +++++++++++++ sdk/helper/scaleutils/cluster_test.go | 63 +++ sdk/helper/scaleutils/node_drain.go | 153 +++++++ sdk/helper/scaleutils/node_drain_test.go | 89 ++++ sdk/helper/scaleutils/node_identifier.go | 127 ++++++ sdk/helper/scaleutils/node_identifier_test.go | 397 ++++++++++++++++++ 6 files changed, 1125 insertions(+) create mode 100644 sdk/helper/scaleutils/cluster.go create mode 100644 sdk/helper/scaleutils/cluster_test.go create mode 100644 sdk/helper/scaleutils/node_drain.go create mode 100644 sdk/helper/scaleutils/node_drain_test.go create mode 100644 sdk/helper/scaleutils/node_identifier.go create mode 100644 sdk/helper/scaleutils/node_identifier_test.go diff --git a/sdk/helper/scaleutils/cluster.go b/sdk/helper/scaleutils/cluster.go new file mode 100644 index 00000000..ac7dd90c --- /dev/null +++ b/sdk/helper/scaleutils/cluster.go @@ -0,0 +1,296 @@ +package scaleutils + +import ( + "context" + "errors" + "fmt" + "os" + "strconv" + + hclog "github.com/hashicorp/go-hclog" + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad-autoscaler/sdk" + "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils/nodepool" + "github.com/hashicorp/nomad/api" +) + +// ClusterScaleUtils provides common functionality when performing horizontal +// cluster scaling evaluations and actions. +type ClusterScaleUtils struct { + log hclog.Logger + client *api.Client + + // curNodeID is the ID of the node that the Nomad Autoscaler is currently + // running on. + // + // TODO(jrasell) this should be removed once the cluster targets and core + // autoscaler components are updated to handle reconciliation. + curNodeID string + + // ClusterNodeIDLookupFunc is the callback function used to translate a + // Nomad nodes ID to the remote resource ID used by the target platform. + ClusterNodeIDLookupFunc ClusterNodeIDLookupFunc +} + +// NewClusterScaleUtils instantiates a new ClusterScaleUtils object for use. +func NewClusterScaleUtils(cfg *api.Config, log hclog.Logger) (*ClusterScaleUtils, error) { + + client, err := api.NewClient(cfg) + if err != nil { + return nil, fmt.Errorf("failed to instantiate Nomad client: %v", err) + } + + // Identifying the node is best-effort and should not result in a terminal + // error when setting up the utils. + id, err := autoscalerNodeID(client) + if err != nil { + log.Error("failed to identify Nomad Autoscaler nodeID", "error", err) + } + + return &ClusterScaleUtils{ + log: log, + client: client, + curNodeID: id, + }, nil +} + +// RunPreScaleInTasks triggers all the tasks, including node identification and +// draining, required before terminating the nodes in the remote provider. +func (c *ClusterScaleUtils) RunPreScaleInTasks(ctx context.Context, cfg map[string]string, num int) ([]NodeResourceID, error) { + + // Check that the ClusterNodeIDLookupFunc has been set, otherwise we cannot + // attempt to identify nodes and their remote resource IDs. + if c.ClusterNodeIDLookupFunc == nil { + return nil, errors.New("required ClusterNodeIDLookupFunc not set") + } + + nodes, err := c.identifyScaleInNodes(cfg, num) + if err != nil { + return nil, err + } + + // Technically we do not need this information until after the nodes have + // been drained. However, this doesn't change cluster state and so do this + // first to make sure there are no issues in translating. + nodeResourceIDs, err := c.identifyScaleInRemoteIDs(nodes) + if err != nil { + return nil, err + } + + // Drain the nodes. + // TODO(jrasell) we should try some reconciliation here, where we identify + // failed nodes and continue with nodes that drained successfully. + if err := c.drainNodes(ctx, cfg, nodeResourceIDs); err != nil { + return nil, err + } + c.log.Info("pre scale-in tasks now complete") + + return nodeResourceIDs, nil +} + +func (c *ClusterScaleUtils) identifyScaleInNodes(cfg map[string]string, num int) ([]*api.NodeListStub, error) { + + // The Nomad Autoscaler can only handle node class identifiers currently + // and therefore we just set that up. In the future if we wish to expand on + // this, it will need live within scaleutils otherwise it would be tied to + // an external plugin and will utilise filtering of the config keys. + poolID, err := classClusterPoolIdentifier(cfg) + if err != nil { + return nil, err + } + + // Pull a current list of Nomad nodes from the API. + nodes, _, err := c.client.Nodes().List(nil) + if err != nil { + return nil, fmt.Errorf("failed to list Nomad nodes from API: %v", err) + } + + // Filter our nodes to select only those within our identified pool. + filteredNodes, err := FilterNodes(nodes, poolID.IsPoolMember) + if err != nil { + return nil, err + } + + // Filter out the Nomad node ID where this autoscaler instance is running. + filteredNodes = filterOutNodeID(filteredNodes, c.curNodeID) + + // Ensure we have not filtered out all the available nodes. + if len(filteredNodes) == 0 { + return nil, fmt.Errorf("no nodes unfiltered for %s with value %s", poolID.Key(), poolID.Value()) + } + + // If the caller has requested more nodes than we have available once + // filtered, adjust the value. This shouldn't cause the whole scaling + // action to fail, but we should warn. + if num > len(filteredNodes) { + c.log.Warn("can only identify portion of requested nodes for removal", + "requested", num, "available", len(filteredNodes)) + num = len(filteredNodes) + } + + var out []*api.NodeListStub + + // We currently only support a single ClusterScaleInNodeIDStrategy. If we + // expand this in the future, this will likely be pulled from the config + // map. + strategy := NewestCreateIndexClusterScaleInNodeIDStrategy + + // Depending on which strategy has been chosen, sort the node listing. + switch strategy { + case NewestCreateIndexClusterScaleInNodeIDStrategy: + default: + return nil, fmt.Errorf("unsupported scale in node identification strategy: %s", strategy) + } + + // Iterate through our filtered and sorted list of nodes, selecting the + // required number of nodes to scale in. + for i := 0; i <= num-1; i++ { + c.log.Debug("identified Nomad node for removal", "node_id", filteredNodes[i].ID) + out = append(out, filteredNodes[i]) + } + + return out, nil +} + +func (c *ClusterScaleUtils) identifyScaleInRemoteIDs(nodes []*api.NodeListStub) ([]NodeResourceID, error) { + + var ( + out []NodeResourceID + mErr *multierror.Error + ) + + for _, node := range nodes { + + // Read the full node object from the API which will contain the full + // information required to identify the remote provider ID. If we get a + // single error here, its likely we won't be able to perform any of the + // API calls, therefore just exit rather than collect all the errors. + nodeInfo, _, err := c.client.Nodes().Info(node.ID, nil) + if err != nil { + return nil, err + } + + // Use the identification function to attempt to pull the remote + // provider ID information from the Node info. + id, err := c.ClusterNodeIDLookupFunc(nodeInfo) + if err != nil { + mErr = multierror.Append(mErr, err) + continue + } + + // Add a nice log message for the operators so they can see the node + // that has been identified if they wish. + c.log.Debug("identified remote provider ID for node", "node_id", nodeInfo.ID, "remote_id", id) + out = append(out, NodeResourceID{NomadNodeID: node.ID, RemoteResourceID: id}) + } + + if mErr != nil { + return nil, formattedMultiError(mErr) + } + return out, nil +} + +// RunPostScaleInTasks triggers any tasks which should occur after the nodes +// have been terminated within the remote provider. +// +// The context is currently ignored on purpose, pending investigation into +// plugging this into the Nomad API query meta. +func (c *ClusterScaleUtils) RunPostScaleInTasks(_ context.Context, cfg map[string]string, ids []NodeResourceID) error { + + // Attempt to read of the node purge config parameter. If it has been set + // then check its value, otherwise the default stance is that node purging + // is disabled. + if val, ok := cfg[sdk.TargetConfigKeyNodePurge]; ok { + + // Parse the string as a bool. If we get an error return this as the + // operator has attempted to configure this value, but it's not worth + // breaking the whole pipeline for. Therefore log the error and return + // as Nomad will eventually perform this work. + boolVal, err := strconv.ParseBool(val) + if err != nil { + c.log.Error("failed to parse node_purge config param", "error", err) + return nil + } + + // If the operator has disabled node purging, exit. + if !boolVal { + return nil + } + } else { + return nil + } + + // Use a multierror to collect errors from any and all node purge calls + // that fail. + var mErr *multierror.Error + + // Iterate the node list and perform a purge on each node. In the event of + // an error, add this to the list. Otherwise log useful information. + for _, node := range ids { + + resp, _, err := c.client.Nodes().Purge(node.NomadNodeID, nil) + if err != nil { + mErr = multierror.Append(mErr, err) + } else { + c.log.Info("successfully purged Nomad node", "node_id", node.NomadNodeID, "nomad_evals", resp.EvalIDs) + } + } + + return formattedMultiError(mErr) +} + +// IsPoolReady provides a method for understanding whether the node pool is in +// a state that allows it to be safely scaled. This should be used by target +// plugins when providing their status response. A non-nil error indicates +// there was a problem performing the check. +func (c *ClusterScaleUtils) IsPoolReady(cfg map[string]string) (bool, error) { + + // The Nomad Autoscaler can only handle node class identifiers currently + // and therefore we just set that up. In the future if we wish to expand on + // this, it will need live within scaleutils otherwise it would be tied to + // an external plugin and will utilise filtering of the config keys. + poolID, err := classClusterPoolIdentifier(cfg) + if err != nil { + return false, err + } + + nodes, _, err := c.client.Nodes().List(nil) + if err != nil { + return false, fmt.Errorf("failed to list Nomad nodes: %v", err) + } + + if _, err := FilterNodes(nodes, poolID.IsPoolMember); err != nil { + c.log.Warn("node pool status readiness check failed", "error", err) + return false, nil + } + return true, nil +} + +// classClusterPoolIdentifier generates a new +// nodepool.ClusterNodePoolIdentifier based on the passed operator config. In +// the event the config key is not found, an error will be returned. +func classClusterPoolIdentifier(cfg map[string]string) (nodepool.ClusterNodePoolIdentifier, error) { + + class, ok := cfg[sdk.TargetConfigKeyClass] + if !ok || class == "" { + return nil, fmt.Errorf("required config param %q not set", sdk.TargetConfigKeyClass) + } + + return nodepool.NewNodeClassPoolIdentifier(class), nil +} + +// autoscalerNodeID identifies the NodeID which the Nomad Autoscaler is running +// on so that it can be protected from scaling in actions. +func autoscalerNodeID(client *api.Client) (string, error) { + + envVar := os.Getenv("NOMAD_ALLOC_ID") + if envVar == "" { + return "", nil + } + + allocInfo, _, err := client.Allocations().Info(envVar, nil) + if err != nil { + return "", fmt.Errorf("failed to call Nomad allocation info: %v", err) + } + return allocInfo.NodeID, nil +} diff --git a/sdk/helper/scaleutils/cluster_test.go b/sdk/helper/scaleutils/cluster_test.go new file mode 100644 index 00000000..609f9f53 --- /dev/null +++ b/sdk/helper/scaleutils/cluster_test.go @@ -0,0 +1,63 @@ +package scaleutils + +import ( + "errors" + "testing" + + "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils/nodepool" + "github.com/stretchr/testify/assert" +) + +func Test_classClusterPoolIdentifier(t *testing.T) { + testCases := []struct { + inputCfg map[string]string + expectedOutputPI nodepool.ClusterNodePoolIdentifier + expectedOutputError error + name string + }{ + { + inputCfg: map[string]string{}, + expectedOutputPI: nil, + expectedOutputError: errors.New(`required config param "node_class" not set`), + name: "node_class cfg param not set", + }, + { + inputCfg: map[string]string{"node_class": "my_pet_server"}, + expectedOutputPI: nodepool.NewNodeClassPoolIdentifier("my_pet_server"), + expectedOutputError: nil, + name: "node_class cfg param set", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actualPI, actualError := classClusterPoolIdentifier(tc.inputCfg) + assert.Equal(t, tc.expectedOutputPI, actualPI, tc.name) + assert.Equal(t, tc.expectedOutputError, actualError, tc.name) + }) + } +} + +func Test_autoscalerNodeID(t *testing.T) { + testCases := []struct { + envVar bool + expectedOutputString string + expectedOutputError error + name string + }{ + { + envVar: false, + expectedOutputString: "", + expectedOutputError: nil, + name: "no alloc ID found", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actualString, actualError := autoscalerNodeID(nil) + assert.Equal(t, tc.expectedOutputString, actualString, tc.name) + assert.Equal(t, tc.expectedOutputError, actualError, tc.name) + }) + } +} diff --git a/sdk/helper/scaleutils/node_drain.go b/sdk/helper/scaleutils/node_drain.go new file mode 100644 index 00000000..9049a6a8 --- /dev/null +++ b/sdk/helper/scaleutils/node_drain.go @@ -0,0 +1,153 @@ +package scaleutils + +import ( + "context" + "fmt" + "strconv" + "sync" + "time" + + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad-autoscaler/sdk" + "github.com/hashicorp/nomad/api" +) + +const ( + defaultNodeDrainDeadline = 15 * time.Minute + defaultNodeIgnoreSystemJobs = false +) + +// drainNodes iterates the provided nodeID list and performs a drain on each +// one. Each node drain is monitored and events logged until the context is +// closed or all drains reach a terminal state. +func (c *ClusterScaleUtils) drainNodes(ctx context.Context, cfg map[string]string, nodes []NodeResourceID) error { + + drainSpec, err := drainSpec(cfg) + if err != nil { + return fmt.Errorf("failed to generate node drainspec: %v", err) + } + + // Define a WaitGroup. This allows us to trigger each node drain in a go + // routine and then wait for them all to complete before exiting. + var wg sync.WaitGroup + wg.Add(len(nodes)) + + // Define an error to collect errors from each drain routine and a mutex to + // provide thread safety when calling multierror.Append. + var ( + result *multierror.Error + resultLock sync.Mutex + ) + + for _, node := range nodes { + + // Assign our node to a local variable as we are launching a go routine + // from within a for loop. + n := node + + // Launch a routine to drain the node. Append any error returned to the + // error. + go func() { + + // Ensure we call done on the WaitGroup to decrement the count remaining. + defer wg.Done() + + if err := c.drainNode(ctx, n.NomadNodeID, drainSpec); err != nil { + resultLock.Lock() + result = multierror.Append(result, err) + resultLock.Unlock() + } + c.log.Info("node drain complete", "node_id", n.NomadNodeID) + }() + } + + wg.Wait() + + return formattedMultiError(result) +} + +// drainSpec generates the Nomad API node drain specification based on the user +// configuration. Any options which have attempted to be configured, but are +// malformed are considered a terminal error. This allows operators to correct +// any mistakes and ensures we do not assume values. +func drainSpec(cfg map[string]string) (*api.DrainSpec, error) { + + // The configuration options that define the drain spec are optional and + // have sensible defaults. + deadline := defaultNodeDrainDeadline + ignoreSystemJobs := defaultNodeIgnoreSystemJobs + + // Use a multierror so we can report all errors in a single call. This + // allows for faster resolution and a nicer UX. + var mErr *multierror.Error + + // Attempt to read the operator defined deadline from the config. + if drainString, ok := cfg[sdk.TargetConfigKeyDrainDeadline]; ok { + d, err := time.ParseDuration(drainString) + if err != nil { + mErr = multierror.Append(mErr, err) + } else { + deadline = d + } + } + + // Attempt to read the operator defined ignore system jobs from the config. + if ignoreSystemJobsString, ok := cfg[sdk.TargetConfigKeyIgnoreSystemJobs]; ok { + isj, err := strconv.ParseBool(ignoreSystemJobsString) + if err != nil { + mErr = multierror.Append(mErr, err) + } else { + ignoreSystemJobs = isj + } + } + + // Check whether we have found errors, and return these in a nicely + // formatted way. + if mErr != nil { + return nil, formattedMultiError(mErr) + } + + return &api.DrainSpec{ + Deadline: deadline, + IgnoreSystemJobs: ignoreSystemJobs, + }, nil +} + +// drainNode triggers a drain on the supplied ID using the DrainSpec. The +// function handles monitoring the drain and reporting its terminal status to +// the caller. +func (c *ClusterScaleUtils) drainNode(ctx context.Context, nodeID string, spec *api.DrainSpec) error { + + c.log.Info("triggering drain on node", "node_id", nodeID, "deadline", spec.Deadline) + + // Update the drain on the node. + resp, err := c.client.Nodes().UpdateDrain(nodeID, spec, false, nil) + if err != nil { + return fmt.Errorf("failed to drain node: %v", err) + } + + // Monitor the drain so we output the log messages. An error here indicates + // the drain failed to complete successfully. + if err := c.monitorNodeDrain(ctx, nodeID, resp.LastIndex, spec.IgnoreSystemJobs); err != nil { + return fmt.Errorf("context done while monitoring node drain: %v", err) + } + return nil +} + +// monitorNodeDrain follows the drain of a node, logging the messages we +// receive to their appropriate level. +func (c *ClusterScaleUtils) monitorNodeDrain(ctx context.Context, nodeID string, index uint64, ignoreSys bool) error { + for msg := range c.client.Nodes().MonitorDrain(ctx, nodeID, index, ignoreSys) { + switch msg.Level { + case api.MonitorMsgLevelInfo: + c.log.Info("received node drain message", "node_id", nodeID, "msg", msg.Message) + case api.MonitorMsgLevelWarn: + c.log.Warn("received node drain message", "node_id", nodeID, "msg", msg.Message) + case api.MonitorMsgLevelError: + return fmt.Errorf("received error while draining node: %s", msg.Message) + default: + c.log.Debug("received node drain message", "node_id", nodeID, "msg", msg.Message) + } + } + return ctx.Err() +} diff --git a/sdk/helper/scaleutils/node_drain_test.go b/sdk/helper/scaleutils/node_drain_test.go new file mode 100644 index 00000000..881ac7d1 --- /dev/null +++ b/sdk/helper/scaleutils/node_drain_test.go @@ -0,0 +1,89 @@ +package scaleutils + +import ( + "errors" + "testing" + "time" + + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/api" + "github.com/stretchr/testify/assert" +) + +func TestNewClusterScaleUtils_drainSpec(t *testing.T) { + testCases := []struct { + inputCfg map[string]string + expectedOutputSpec *api.DrainSpec + expectedOutputError *multierror.Error + name string + }{ + { + inputCfg: map[string]string{}, + expectedOutputSpec: &api.DrainSpec{ + Deadline: 15 * time.Minute, + IgnoreSystemJobs: false, + }, + expectedOutputError: nil, + name: "no user parameters set", + }, + { + inputCfg: map[string]string{ + "node_drain_deadline": "10m", + "node_drain_ignore_system_jobs": "true", + }, + expectedOutputSpec: &api.DrainSpec{ + Deadline: 10 * time.Minute, + IgnoreSystemJobs: true, + }, + expectedOutputError: nil, + name: "all parameters set in config", + }, + { + inputCfg: map[string]string{ + "node_drain_deadline": "10mm", + }, + expectedOutputSpec: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{errors.New(`time: unknown unit "mm" in duration "10mm"`)}, + ErrorFormat: multiErrorFunc, + }, + name: "config deadline parse error", + }, + { + inputCfg: map[string]string{ + "node_drain_ignore_system_jobs": "maybe", + }, + expectedOutputSpec: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{errors.New(`strconv.ParseBool: parsing "maybe": invalid syntax`)}, + ErrorFormat: multiErrorFunc, + }, + name: "config ignore system jobs parse error", + }, + { + inputCfg: map[string]string{ + "node_drain_deadline": "10mm", + "node_drain_ignore_system_jobs": "maybe", + }, + expectedOutputSpec: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{ + errors.New(`time: unknown unit "mm" in duration "10mm"`), + errors.New(`strconv.ParseBool: parsing "maybe": invalid syntax`), + }, + ErrorFormat: multiErrorFunc, + }, + name: "multi config params parse error", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actualDrainSpec, actualError := drainSpec(tc.inputCfg) + assert.Equal(t, tc.expectedOutputSpec, actualDrainSpec, tc.name) + if tc.expectedOutputError != nil { + assert.EqualError(t, tc.expectedOutputError, actualError.Error(), tc.name) + } + }) + } +} diff --git a/sdk/helper/scaleutils/node_identifier.go b/sdk/helper/scaleutils/node_identifier.go new file mode 100644 index 00000000..d801c3a8 --- /dev/null +++ b/sdk/helper/scaleutils/node_identifier.go @@ -0,0 +1,127 @@ +package scaleutils + +import ( + "fmt" + + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/api" +) + +// NodeResourceID maps a Nomad node ID to a remote resource ID. +type NodeResourceID struct { + + // NomadNodeID is the ID as seen within the Nomad api.Node object. + NomadNodeID string + + // RemoteResourceID is the remote resource ID of the server/instance such + // as the AWS EC2 instance ID. + RemoteResourceID string +} + +// ClusterNodeIDLookupFunc is the callback function signature used to identify +// a nodes remote ID from the api.Node object. This allows the function to be +// defined by external plugins. +type ClusterNodeIDLookupFunc func(*api.Node) (string, error) + +// ClusterScaleInNodeIDStrategy identifies the method in which nodes are +// selected from the node pool for removal during scale in actions. +type ClusterScaleInNodeIDStrategy string + +const ( + // NewestCreateIndexClusterScaleInNodeIDStrategy uses the Nomad + // Nodes().List() output in the order it is presented. This means we do not + // need additional sorting and thus it is fastest. In an environment that + // uses bin-packing this may also be preferable as nodes with older create + // indexes are expected to be most packed. + NewestCreateIndexClusterScaleInNodeIDStrategy ClusterScaleInNodeIDStrategy = "newest_create_index" +) + +// FilterNodes returns a filtered list of nodes which are active in the cluster +// and where they pass the match performed by the idFn. In the event that nodes +// are found within the pool in an unstable state, and thus indicating there is +// change occurring; an error will be returned. +func FilterNodes(n []*api.NodeListStub, idFn func(*api.NodeListStub) bool) ([]*api.NodeListStub, error) { + + // Create our output list object. + var out []*api.NodeListStub + + // Track upto 10 nodes which are deemed to cause an error to the + // autoscaler. It is possible to make an argument that the first error + // should be returned in order to improve speed. In a situation where two + // nodes are in an undesired state, it would require the operator to + // perform the same tidy and restart of the autoscaler loop twice, which + // seems worse than having some extra time within this function. + var err *multierror.Error + + for _, node := range n { + + // Track till 10 nodes in an unstable state so that we have some + // efficiency, whilst still responding with useful information. It also + // avoids error logs messages which are extremely long and potentially + // unsuitable for log aggregators. + if err != nil && err.Len() >= 10 { + return nil, formattedMultiError(err) + } + + // Filter out all nodes which do not match the target first. + if !idFn(node) { + continue + } + + // We should class an initializing node as an error, this is caused by + // node registration and could be sourced from scaling out. + if node.Status == api.NodeStatusInit { + err = multierror.Append(err, fmt.Errorf("node %s is initializing", node.ID)) + continue + } + + // Assuming a cluster has most, if not all nodes in a correct state for + // scheduling then this is the fastest route. Only append in the event + // we have not encountered any error to save some cycles. + if node.SchedulingEligibility == api.NodeSchedulingEligible { + if err == nil { + out = append(out, node) + } + continue + } + + // This lifecycle phase relates to nodes that are being drained. + if node.Drain && node.Status == api.NodeStatusReady { + err = multierror.Append(err, fmt.Errorf("node %s is draining", node.ID)) + continue + } + + // This lifecycle phase relates to nodes that typically have had their + // drain completed, and now await removal from the cluster. + if !node.Drain && node.Status == api.NodeStatusReady { + err = multierror.Append(err, fmt.Errorf("node %s is ineligible", node.ID)) + } + } + + // Be choosy with our returns to avoid sending a large list to the caller + // that will just get ignored. + if err != nil { + return nil, formattedMultiError(err) + } + return out, nil +} + +// filterOutNodeID removes the node as specified by the ID function parameter +// from the list of nodes if it is found. +func filterOutNodeID(n []*api.NodeListStub, id string) []*api.NodeListStub { + + // Protect against instances where the ID is empty. + if id == "" { + return n + } + + // Iterate the node list, removing and returning the modified list if an ID + // match is found. + for i, node := range n { + if node.ID == id { + n[len(n)-1], n[i] = n[i], n[len(n)-1] + return n[:len(n)-1] + } + } + return n +} diff --git a/sdk/helper/scaleutils/node_identifier_test.go b/sdk/helper/scaleutils/node_identifier_test.go new file mode 100644 index 00000000..25932051 --- /dev/null +++ b/sdk/helper/scaleutils/node_identifier_test.go @@ -0,0 +1,397 @@ +package scaleutils + +import ( + "errors" + "testing" + + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/api" + "github.com/stretchr/testify/assert" +) + +func Test_FilterNodes(t *testing.T) { + testCases := []struct { + inputNodeList []*api.NodeListStub + inputIDCfg map[string]string + expectedOutputNodes []*api.NodeListStub + expectedOutputError error + name string + }{ + { + inputNodeList: []*api.NodeListStub{ + { + ID: "super-test-class-1", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "super-test-class-3", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "worst-test-class-1", + NodeClass: "worst-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "worst-test-class-2", + NodeClass: "worst-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "no-test-class-2", + NodeClass: "", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + }, + inputIDCfg: map[string]string{"node_class": "super-test-class"}, + expectedOutputNodes: []*api.NodeListStub{ + { + ID: "super-test-class-1", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "super-test-class-3", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + }, + expectedOutputError: nil, + name: "filter of multiclass input for named class without error", + }, + { + inputNodeList: []*api.NodeListStub{ + { + ID: "super-test-class-1", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "super-test-class-2", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusDown, + }, + { + ID: "super-test-class-3", + NodeClass: "super-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "worst-test-class-1", + NodeClass: "worst-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "worst-test-class-2", + NodeClass: "worst-test-class", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "no-test-class-1", + NodeClass: "", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + }, + inputIDCfg: map[string]string{"node_class": "autoscaler-default-pool"}, + expectedOutputNodes: []*api.NodeListStub{ + { + ID: "no-test-class-1", + NodeClass: "", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + }, + expectedOutputError: nil, + name: "filter of multiclass input for default class without error", + }, + { + inputNodeList: []*api.NodeListStub{ + { + ID: "node1", + NodeClass: "lionel", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "node2", + NodeClass: "lionel", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusInit, + }, + }, + inputIDCfg: map[string]string{"node_class": "lionel"}, + expectedOutputNodes: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{errors.New("node node2 is initializing")}, + ErrorFormat: multiErrorFunc, + }, + name: "filter of single class input for named class with initializing error", + }, + { + inputNodeList: []*api.NodeListStub{ + { + ID: "node1", + NodeClass: "lionel", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "node2", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + }, + inputIDCfg: map[string]string{"node_class": "lionel"}, + expectedOutputNodes: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{errors.New("node node2 is draining")}, + ErrorFormat: multiErrorFunc, + }, + name: "filter of single class input for named class with draining error", + }, + { + inputNodeList: []*api.NodeListStub{ + { + ID: "node1", + NodeClass: "lionel", + Drain: false, + SchedulingEligibility: api.NodeSchedulingEligible, + Status: api.NodeStatusReady, + }, + { + ID: "node2", + NodeClass: "lionel", + Drain: false, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + }, + inputIDCfg: map[string]string{"node_class": "lionel"}, + expectedOutputNodes: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{errors.New("node node2 is ineligible")}, + ErrorFormat: multiErrorFunc, + }, + name: "filter of single class input for named class with ineligible error", + }, + { + inputNodeList: []*api.NodeListStub{ + { + ID: "node1", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node2", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node3", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node4", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node5", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node6", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node7", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node8", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node9", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node10", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + { + ID: "node11", + NodeClass: "lionel", + Drain: true, + SchedulingEligibility: api.NodeSchedulingIneligible, + Status: api.NodeStatusReady, + }, + }, + inputIDCfg: map[string]string{"node_class": "lionel"}, + expectedOutputNodes: nil, + expectedOutputError: &multierror.Error{ + Errors: []error{ + errors.New("node node1 is draining"), + errors.New("node node2 is draining"), + errors.New("node node3 is draining"), + errors.New("node node4 is draining"), + errors.New("node node5 is draining"), + errors.New("node node6 is draining"), + errors.New("node node7 is draining"), + errors.New("node node8 is draining"), + errors.New("node node9 is draining"), + errors.New("node node10 is draining"), + }, + ErrorFormat: multiErrorFunc, + }, + name: "limited error count return", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + + idFn, err := classClusterPoolIdentifier(tc.inputIDCfg) + assert.NotNil(t, idFn, tc.name) + assert.Nil(t, err, tc.name) + + actualNodes, actualError := FilterNodes(tc.inputNodeList, idFn.IsPoolMember) + assert.Equal(t, tc.expectedOutputNodes, actualNodes, tc.name) + + if tc.expectedOutputError != nil { + assert.EqualError(t, actualError, tc.expectedOutputError.Error(), tc.name) + } + }) + } +} + +func Test_filterOutNodeID(t *testing.T) { + testCases := []struct { + inputNodeList []*api.NodeListStub + inputID string + expectedOutput []*api.NodeListStub + name string + }{ + { + inputNodeList: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo2"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + inputID: "", + expectedOutput: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo2"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + name: "empty input ID", + }, + { + inputNodeList: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo2"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + inputID: "foo2", + expectedOutput: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + name: "input ID found", + }, + { + inputNodeList: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo2"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + inputID: "bar1", + expectedOutput: []*api.NodeListStub{ + {ID: "foo1"}, + {ID: "foo2"}, + {ID: "foo3"}, + {ID: "foo4"}, + {ID: "foo5"}, + }, + name: "input ID not found", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actualOutput := filterOutNodeID(tc.inputNodeList, tc.inputID) + assert.ElementsMatch(t, tc.expectedOutput, actualOutput, tc.name) + }) + } +} From 1d0437c7554681501c34c2bd1e314705b4c34ae3 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:49:59 +0100 Subject: [PATCH 5/9] plugins/target/aws-asg: use refactored scaleutils. --- plugins/builtin/target/aws-asg/plugin/aws.go | 74 ++++---------- .../builtin/target/aws-asg/plugin/aws_test.go | 96 ++++--------------- .../builtin/target/aws-asg/plugin/plugin.go | 30 +++--- 3 files changed, 51 insertions(+), 149 deletions(-) diff --git a/plugins/builtin/target/aws-asg/plugin/aws.go b/plugins/builtin/target/aws-asg/plugin/aws.go index 275c5279..8f89bd3e 100644 --- a/plugins/builtin/target/aws-asg/plugin/aws.go +++ b/plugins/builtin/target/aws-asg/plugin/aws.go @@ -3,20 +3,19 @@ package plugin import ( "context" "fmt" - "strconv" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/external" "github.com/aws/aws-sdk-go-v2/service/autoscaling" "github.com/aws/aws-sdk-go-v2/service/ec2" - "github.com/hashicorp/nomad-autoscaler/sdk" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" ) const ( - defaultRetryInterval = 10 * time.Second - defaultRetryLimit = 15 + defaultRetryInterval = 10 * time.Second + defaultRetryLimit = 15 + nodeAttrAWSInstanceID = "unique.platform.aws.instance-id" ) // setupAWSClients takes the passed config mapping and instantiates the @@ -94,12 +93,7 @@ func (t *TargetPlugin) scaleOut(ctx context.Context, asg *autoscaling.AutoScalin func (t *TargetPlugin) scaleIn(ctx context.Context, asg *autoscaling.AutoScalingGroup, num int64, config map[string]string) error { - scaleReq, err := t.generateScaleReq(num, config) - if err != nil { - return fmt.Errorf("failed to generate scale in request: %v", err) - } - - ids, err := t.scaleInUtils.RunPreScaleInTasks(ctx, scaleReq) + ids, err := t.clusterUtils.RunPreScaleInTasks(ctx, config, int(num)) if err != nil { return fmt.Errorf("failed to perform pre-scale Nomad scale in tasks: %v", err) } @@ -109,7 +103,7 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, asg *autoscaling.AutoScaling var instanceIDs []string for _, node := range ids { - instanceIDs = append(instanceIDs, node.RemoteID) + instanceIDs = append(instanceIDs, node.RemoteResourceID) } // Create the event writer and write that the drain event has been @@ -141,57 +135,13 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, asg *autoscaling.AutoScaling eWriter.write(ctx, scalingEventTerminate) // Run any post scale in tasks that are desired. - if err := t.scaleInUtils.RunPostScaleInTasks(config, ids); err != nil { + if err := t.clusterUtils.RunPostScaleInTasks(ctx, config, ids); err != nil { return fmt.Errorf("failed to perform post-scale Nomad scale in tasks: %v", err) } return nil } -func (t *TargetPlugin) generateScaleReq(num int64, config map[string]string) (*scaleutils.ScaleInReq, error) { - - // Pull the class key from the config mapping. This is a required value and - // we cannot scale without this. - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) - } - - // The drain_deadline is an optional parameter so define out default and - // then attempt to find an operator specified value. - drain := scaleutils.DefaultDrainDeadline - ignoreSystemJobs := scaleutils.DefaultIgnoreSystemJobs - - if drainString, ok := config[sdk.TargetConfigKeyDrainDeadline]; ok { - d, err := time.ParseDuration(drainString) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as time duration", drainString) - } - drain = d - } - - if ignoreSystemJobsString, ok := config[sdk.TargetConfigKeyIgnoreSystemJobs]; ok { - isj, err := strconv.ParseBool(ignoreSystemJobsString) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as boolean", ignoreSystemJobsString) - } - ignoreSystemJobs = isj - } - - return &scaleutils.ScaleInReq{ - Num: int(num), - DrainDeadline: drain, - IgnoreSystemJobs: ignoreSystemJobs, - - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }, - RemoteProvider: scaleutils.RemoteProviderAWSInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, - }, nil -} - func (t *TargetPlugin) detachInstances(ctx context.Context, asgName *string, instanceIDs []string) error { asgInput := autoscaling.DetachInstancesInput{ @@ -363,3 +313,13 @@ func (t *TargetPlugin) ensureASGInstancesCount(ctx context.Context, desired int6 return retry(ctx, defaultRetryInterval, defaultRetryLimit, f) } + +// awsNodeIDMap is used to identify the AWS InstanceID of a Nomad node using +// the relevant attribute value. +func awsNodeIDMap(n *api.Node) (string, error) { + val, ok := n.Attributes[nodeAttrAWSInstanceID] + if !ok || val == "" { + return "", fmt.Errorf("attribute %q not found", nodeAttrAWSInstanceID) + } + return val, nil +} diff --git a/plugins/builtin/target/aws-asg/plugin/aws_test.go b/plugins/builtin/target/aws-asg/plugin/aws_test.go index 975caca0..dfabc966 100644 --- a/plugins/builtin/target/aws-asg/plugin/aws_test.go +++ b/plugins/builtin/target/aws-asg/plugin/aws_test.go @@ -3,104 +3,48 @@ package plugin import ( "errors" "testing" - "time" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" "github.com/stretchr/testify/assert" ) -func TestTargetPlugin_generateScaleReq(t *testing.T) { +func Test_awsNodeIDMap(t *testing.T) { testCases := []struct { - inputNum int64 - inputConfig map[string]string - expectedOutputReq *scaleutils.ScaleInReq + inputNode *api.Node + expectedOutputID string expectedOutputError error name string }{ { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "5m", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 5 * time.Minute, - IgnoreSystemJobs: false, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAWSInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, + inputNode: &api.Node{ + Attributes: map[string]string{"unique.platform.aws.instance-id": "i-1234567890abcdef0"}, }, + expectedOutputID: "i-1234567890abcdef0", expectedOutputError: nil, - name: "valid request with drain_deadline in config", + name: "required attribute found", }, { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_ignore_system_jobs": "true", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - IgnoreSystemJobs: true, - DrainDeadline: 15 * time.Minute, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAWSInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, + inputNode: &api.Node{ + Attributes: map[string]string{}, }, - expectedOutputError: nil, - name: "valid request with node_drain_ignore_system_jobs in config", + expectedOutputID: "", + expectedOutputError: errors.New(`attribute "unique.platform.aws.instance-id" not found`), + name: "required attribute not found", }, { - inputNum: 2, - inputConfig: map[string]string{}, - expectedOutputReq: nil, - expectedOutputError: errors.New("required config param \"node_class\" not found"), - name: "no class key found in config", - }, - { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - IgnoreSystemJobs: false, - DrainDeadline: 15 * time.Minute, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAWSInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, + inputNode: &api.Node{ + Attributes: map[string]string{"unique.platform.aws.instance-id": ""}, }, - expectedOutputError: nil, - name: "drain_deadline not specified within config", - }, - { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "time to make a cuppa", - }, - expectedOutputReq: nil, - expectedOutputError: errors.New("failed to parse \"time to make a cuppa\" as time duration"), - name: "malformed drain_deadline config value", + expectedOutputID: "", + expectedOutputError: errors.New(`attribute "unique.platform.aws.instance-id" not found`), + name: "required attribute found but empty", }, } - tp := TargetPlugin{} - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - actualReq, actualErr := tp.generateScaleReq(tc.inputNum, tc.inputConfig) - assert.Equal(t, tc.expectedOutputReq, actualReq, tc.name) + actualID, actualErr := awsNodeIDMap(tc.inputNode) + assert.Equal(t, tc.expectedOutputID, actualID, tc.name) assert.Equal(t, tc.expectedOutputError, actualErr, tc.name) }) } diff --git a/plugins/builtin/target/aws-asg/plugin/plugin.go b/plugins/builtin/target/aws-asg/plugin/plugin.go index af5cd18f..203d88fe 100644 --- a/plugins/builtin/target/aws-asg/plugin/plugin.go +++ b/plugins/builtin/target/aws-asg/plugin/plugin.go @@ -49,11 +49,14 @@ var _ target.Target = (*TargetPlugin)(nil) // TargetPlugin is the AWS ASG implementation of the target.Target interface. type TargetPlugin struct { - config map[string]string - logger hclog.Logger - asg *autoscaling.Client - ec2 *ec2.Client - scaleInUtils *scaleutils.ScaleIn + config map[string]string + logger hclog.Logger + asg *autoscaling.Client + ec2 *ec2.Client + + // clusterUtils provides general cluster scaling utilities for querying the + // state of nodes pools and performing scaling tasks. + clusterUtils *scaleutils.ClusterScaleUtils } // NewAWSASGPlugin returns the AWS ASG implementation of the target.Target @@ -73,11 +76,14 @@ func (t *TargetPlugin) SetConfig(config map[string]string) error { return err } - utils, err := scaleutils.NewScaleInUtils(nomad.ConfigFromNamespacedMap(config), t.logger) + clusterUtils, err := scaleutils.NewClusterScaleUtils(nomad.ConfigFromNamespacedMap(config), t.logger) if err != nil { return err } - t.scaleInUtils = utils + + // Store and set the remote ID callback function. + t.clusterUtils = clusterUtils + t.clusterUtils.ClusterNodeIDLookupFunc = awsNodeIDMap return nil } @@ -138,18 +144,10 @@ func (t *TargetPlugin) Scale(action sdk.ScalingAction, config map[string]string) // Status satisfies the Status function on the target.Target interface. func (t *TargetPlugin) Status(config map[string]string) (*sdk.TargetStatus, error) { - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) - } - // Perform our check of the Nomad node pool. If the pool is not ready, we // can exit here and avoid calling the AWS API as it won't affect the // outcome. - ready, err := t.scaleInUtils.Ready(scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }) + ready, err := t.clusterUtils.IsPoolReady(config) if err != nil { return nil, fmt.Errorf("failed to run Nomad node readiness check: %v", err) } From e974105b992c45942ad9047cfbc87cacca5626e2 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:50:35 +0100 Subject: [PATCH 6/9] plugins/target/azure-vmss: use refactored scaleutils. --- .../builtin/target/azure-vmss/plugin/azure.go | 68 ++++--------- .../target/azure-vmss/plugin/azure_test.go | 95 ++++--------------- .../target/azure-vmss/plugin/plugin.go | 22 ++--- 3 files changed, 47 insertions(+), 138 deletions(-) diff --git a/plugins/builtin/target/azure-vmss/plugin/azure.go b/plugins/builtin/target/azure-vmss/plugin/azure.go index a9a8eb1a..70c5f798 100644 --- a/plugins/builtin/target/azure-vmss/plugin/azure.go +++ b/plugins/builtin/target/azure-vmss/plugin/azure.go @@ -5,18 +5,17 @@ import ( "errors" "fmt" "os" - "strconv" "strings" - "time" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-01/compute" "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/azure/auth" - "github.com/hashicorp/nomad-autoscaler/sdk" "github.com/hashicorp/nomad-autoscaler/sdk/helper/ptr" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" ) +const nodeAttrAzureInstanceID = "unique.platform.azure.name" + // argsOrEnv allows you to pick an environmental variable for a setting if the arg is not set func argsOrEnv(args map[string]string, key, env string) string { if value, ok := args[key]; ok { @@ -89,14 +88,9 @@ func (t *TargetPlugin) scaleOut(ctx context.Context, resourceGroup string, vmSca // scaleIn drain and delete Scale Set instances to match the Autoscaler has deemed required. func (t *TargetPlugin) scaleIn(ctx context.Context, resourceGroup string, vmScaleSet string, num int64, config map[string]string) error { - scaleReq, err := t.generateScaleReq(num, config) - if err != nil { - return fmt.Errorf("failed to generate scale in request: %v", err) - } - - ids, err := t.scaleInUtils.RunPreScaleInTasks(ctx, scaleReq) + ids, err := t.clusterUtils.RunPreScaleInTasks(ctx, config, int(num)) if err != nil { - return fmt.Errorf("failed to perform Nomad scale in tasks: %v", err) + return fmt.Errorf("failed to perform pre-scale Nomad scale in tasks: %v", err) } // Grab the instanceIDs once as it is used multiple times throughout the @@ -107,8 +101,8 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, resourceGroup string, vmScal // RemoteID should be in the format of "{scale-set-name}_{instance-id}" // If RemoteID doesn't start vmScaleSet then assume its not part of this scale set. // https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-instance-ids#scale-set-vm-names - if idx := strings.LastIndex(node.RemoteID, "_"); idx != -1 && strings.EqualFold(node.RemoteID[0:idx], vmScaleSet) { - instanceIDs = append(instanceIDs, node.RemoteID[idx+1:]) + if idx := strings.LastIndex(node.RemoteResourceID, "_"); idx != -1 && strings.EqualFold(node.RemoteResourceID[0:idx], vmScaleSet) { + instanceIDs = append(instanceIDs, node.RemoteResourceID[idx+1:]) } else { return errors.New("failed to get instance-id from remoteid") } @@ -137,52 +131,24 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, resourceGroup string, vmScal log.Info("successfully deleted Azure ScaleSet instances") // Run any post scale in tasks that are desired. - if err := t.scaleInUtils.RunPostScaleInTasks(config, ids); err != nil { + if err := t.clusterUtils.RunPostScaleInTasks(ctx, config, ids); err != nil { return fmt.Errorf("failed to perform post-scale Nomad scale in tasks: %v", err) } return nil } -func (t *TargetPlugin) generateScaleReq(num int64, config map[string]string) (*scaleutils.ScaleInReq, error) { - - // Pull the class key from the config mapping. This is a required value and - // we cannot scale without this. - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) +// azureNodeIDMap is used to identify the Azure InstanceID of a Nomad node using +// the relevant attribute value. +func azureNodeIDMap(n *api.Node) (string, error) { + if val, ok := n.Attributes[nodeAttrAzureInstanceID]; ok { + return val, nil } - // The drain_deadline is an optional parameter so define out default and - // then attempt to find an operator specified value. - drain := scaleutils.DefaultDrainDeadline - ignoreSystemJobs := scaleutils.DefaultIgnoreSystemJobs - - if drainString, ok := config[sdk.TargetConfigKeyDrainDeadline]; ok { - d, err := time.ParseDuration(drainString) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as time duration", drainString) - } - drain = d - } - - if ignoreSystemJobsString, ok := config[sdk.TargetConfigKeyIgnoreSystemJobs]; ok { - isj, err := strconv.ParseBool(ignoreSystemJobsString) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as boolean", ignoreSystemJobsString) - } - ignoreSystemJobs = isj + // Fallback to meta tag. + if val, ok := n.Meta[nodeAttrAzureInstanceID]; ok { + return val, nil } - return &scaleutils.ScaleInReq{ - Num: int(num), - DrainDeadline: drain, - IgnoreSystemJobs: ignoreSystemJobs, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }, - RemoteProvider: scaleutils.RemoteProviderAzureInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, - }, nil + return "", fmt.Errorf("attribute %q not found", nodeAttrAzureInstanceID) } diff --git a/plugins/builtin/target/azure-vmss/plugin/azure_test.go b/plugins/builtin/target/azure-vmss/plugin/azure_test.go index 88615910..52e491fc 100644 --- a/plugins/builtin/target/azure-vmss/plugin/azure_test.go +++ b/plugins/builtin/target/azure-vmss/plugin/azure_test.go @@ -3,104 +3,49 @@ package plugin import ( "errors" "testing" - "time" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" "github.com/stretchr/testify/assert" ) -func TestTargetPlugin_generateScaleReq(t *testing.T) { +func Test_azureNodeIDMap(t *testing.T) { testCases := []struct { - inputNum int64 - inputConfig map[string]string - expectedOutputReq *scaleutils.ScaleInReq + inputNode *api.Node + expectedOutputID string expectedOutputError error name string }{ { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "5m", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 5 * time.Minute, - IgnoreSystemJobs: false, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAzureInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, - }, - expectedOutputError: nil, - name: "valid request with drain_deadline in config", - }, - { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_ignore_system_jobs": "true", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 15 * time.Minute, - IgnoreSystemJobs: true, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAzureInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, + inputNode: &api.Node{ + Attributes: map[string]string{"unique.platform.azure.name": "13f56399-bd52-4150-9748-7190aae1ff21"}, }, + expectedOutputID: "13f56399-bd52-4150-9748-7190aae1ff21", expectedOutputError: nil, - name: "valid request with drain_ignore_system_jobs in config", - }, - { - inputNum: 2, - inputConfig: map[string]string{}, - expectedOutputReq: nil, - expectedOutputError: errors.New("required config param \"node_class\" not found"), - name: "no class key found in config", + name: "required attribute found", }, { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 15 * time.Minute, - IgnoreSystemJobs: false, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderAzureInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, + inputNode: &api.Node{ + Attributes: map[string]string{}, + Meta: map[string]string{"unique.platform.azure.name": "13f56399-bd52-4150-9748-7190aae1ff21"}, }, + expectedOutputID: "13f56399-bd52-4150-9748-7190aae1ff21", expectedOutputError: nil, - name: "drain_deadline not specified within config", + name: "required fallback meta found", }, { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "time to make a cuppa", + inputNode: &api.Node{ + Attributes: map[string]string{}, }, - expectedOutputReq: nil, - expectedOutputError: errors.New("failed to parse \"time to make a cuppa\" as time duration"), - name: "malformed drain_deadline config value", + expectedOutputID: "", + expectedOutputError: errors.New(`attribute "unique.platform.azure.name" not found`), + name: "required attribute not found", }, } - tp := TargetPlugin{} - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - actualReq, actualErr := tp.generateScaleReq(tc.inputNum, tc.inputConfig) - assert.Equal(t, tc.expectedOutputReq, actualReq, tc.name) + actualID, actualErr := azureNodeIDMap(tc.inputNode) + assert.Equal(t, tc.expectedOutputID, actualID, tc.name) assert.Equal(t, tc.expectedOutputError, actualErr, tc.name) }) } diff --git a/plugins/builtin/target/azure-vmss/plugin/plugin.go b/plugins/builtin/target/azure-vmss/plugin/plugin.go index e88bec73..9d845b37 100644 --- a/plugins/builtin/target/azure-vmss/plugin/plugin.go +++ b/plugins/builtin/target/azure-vmss/plugin/plugin.go @@ -50,7 +50,10 @@ type TargetPlugin struct { config map[string]string logger hclog.Logger vmss compute.VirtualMachineScaleSetsClient - scaleInUtils *scaleutils.ScaleIn + + // clusterUtils provides general cluster scaling utilities for querying the + // state of nodes pools and performing scaling tasks. + clusterUtils *scaleutils.ClusterScaleUtils } // NewAzureVMSSPlugin returns the Azure VMSS implementation of the target.Target @@ -70,11 +73,14 @@ func (t *TargetPlugin) SetConfig(config map[string]string) error { return err } - utils, err := scaleutils.NewScaleInUtils(nomad.ConfigFromNamespacedMap(config), t.logger) + clusterUtils, err := scaleutils.NewClusterScaleUtils(nomad.ConfigFromNamespacedMap(config), t.logger) if err != nil { return err } - t.scaleInUtils = utils + + // Store and set the remote ID callback function. + t.clusterUtils = clusterUtils + t.clusterUtils.ClusterNodeIDLookupFunc = azureNodeIDMap return nil } @@ -136,18 +142,10 @@ func (t *TargetPlugin) Scale(action sdk.ScalingAction, config map[string]string) // Status satisfies the Status function on the target.Target interface. func (t *TargetPlugin) Status(config map[string]string) (*sdk.TargetStatus, error) { - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) - } - // Perform our check of the Nomad node pool. If the pool is not ready, we // can exit here and avoid calling the Azure API as it won't affect the // outcome. - ready, err := t.scaleInUtils.Ready(scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }) + ready, err := t.clusterUtils.IsPoolReady(config) if err != nil { return nil, fmt.Errorf("failed to run Nomad node readiness check: %v", err) } From a8f2509b3fe9eb3b24cc5bb5dcb9c6f735d91505 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:51:08 +0100 Subject: [PATCH 7/9] plugins/target/gce-mig: use refactored scaleutils. --- plugins/builtin/target/gce-mig/plugin/gce.go | 84 ++++++--------- .../builtin/target/gce-mig/plugin/gce_test.go | 100 ++++++------------ .../builtin/target/gce-mig/plugin/plugin.go | 27 +++-- 3 files changed, 73 insertions(+), 138 deletions(-) diff --git a/plugins/builtin/target/gce-mig/plugin/gce.go b/plugins/builtin/target/gce-mig/plugin/gce.go index f4e2e3ce..0ad481ad 100644 --- a/plugins/builtin/target/gce-mig/plugin/gce.go +++ b/plugins/builtin/target/gce-mig/plugin/gce.go @@ -5,11 +5,10 @@ import ( "fmt" "io/ioutil" "os" - "strconv" + "strings" "time" - "github.com/hashicorp/nomad-autoscaler/sdk" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" "github.com/mitchellh/go-homedir" "google.golang.org/api/compute/v1" "google.golang.org/api/option" @@ -18,6 +17,14 @@ import ( const ( defaultRetryInterval = 10 * time.Second defaultRetryLimit = 15 + + // nodeAttrGCEHostname is the node attribute to use when identifying the + // GCE hostname of a node. + nodeAttrGCEHostname = "unique.platform.gce.hostname" + + // nodeAttrGCEZone is the node attribute to use when identifying the GCE + // zone of a node. + nodeAttrGCEZone = "platform.gce.zone" ) func (t *TargetPlugin) setupGCEClients(config map[string]string) error { @@ -62,12 +69,8 @@ func (t *TargetPlugin) scaleOut(ctx context.Context, ig instanceGroup, num int64 } func (t *TargetPlugin) scaleIn(ctx context.Context, group instanceGroup, num int64, config map[string]string) error { - scaleReq, err := t.generateScaleReq(num, config) - if err != nil { - return fmt.Errorf("failed to generate scale in request: %v", err) - } - ids, err := t.scaleInUtils.RunPreScaleInTasks(ctx, scaleReq) + ids, err := t.clusterUtils.RunPreScaleInTasks(ctx, config, int(num)) if err != nil { return fmt.Errorf("failed to perform pre-scale Nomad scale in tasks: %v", err) } @@ -76,7 +79,7 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, group instanceGroup, num int var instanceIDs []string for _, node := range ids { - instanceIDs = append(instanceIDs, node.RemoteID) + instanceIDs = append(instanceIDs, node.RemoteResourceID) } // Create a logger for this action to pre-populate useful information we @@ -100,7 +103,7 @@ func (t *TargetPlugin) scaleIn(ctx context.Context, group instanceGroup, num int log.Debug("scale in GCE MIG confirmed") // Run any post scale in tasks that are desired. - if err := t.scaleInUtils.RunPostScaleInTasks(config, ids); err != nil { + if err := t.clusterUtils.RunPostScaleInTasks(ctx, config, ids); err != nil { return fmt.Errorf("failed to perform post-scale Nomad scale in tasks: %v", err) } @@ -121,49 +124,6 @@ func (t *TargetPlugin) ensureInstanceGroupIsStable(ctx context.Context, group in return retry(ctx, defaultRetryInterval, defaultRetryLimit, f) } -func (t *TargetPlugin) generateScaleReq(num int64, config map[string]string) (*scaleutils.ScaleInReq, error) { - - // Pull the class key from the config mapping. This is a required value and - // we cannot scale without this. - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) - } - - // The drain_deadline is an optional parameter so define our default and - // then attempt to find an operator specified value. - drain := scaleutils.DefaultDrainDeadline - ignoreSystemJobs := scaleutils.DefaultIgnoreSystemJobs - - if drainString, ok := config[sdk.TargetConfigKeyDrainDeadline]; ok { - d, err := time.ParseDuration(drainString) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as time duration", drainString) - } - drain = d - } - - if ignoreSystemJobsSting, ok := config[sdk.TargetConfigKeyIgnoreSystemJobs]; ok { - isj, err := strconv.ParseBool(ignoreSystemJobsSting) - if err != nil { - return nil, fmt.Errorf("failed to parse %q as boolean", ignoreSystemJobsSting) - } - ignoreSystemJobs = isj - } - - return &scaleutils.ScaleInReq{ - Num: int(num), - DrainDeadline: drain, - IgnoreSystemJobs: ignoreSystemJobs, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }, - RemoteProvider: scaleutils.RemoteProviderGCEInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, - }, nil -} - func pathOrContents(poc string) (string, error) { if len(poc) == 0 { return poc, nil @@ -188,3 +148,21 @@ func pathOrContents(poc string) (string, error) { return poc, nil } + +// gceNodeIDMap is used to identify the GCE Instance of a Nomad node using the +// relevant attribute value. +func gceNodeIDMap(n *api.Node) (string, error) { + zone, ok := n.Attributes[nodeAttrGCEZone] + if !ok { + return "", fmt.Errorf("attribute %q not found", nodeAttrGCEZone) + } + hostname, ok := n.Attributes[nodeAttrGCEHostname] + if !ok { + return "", fmt.Errorf("attribute %q not found", nodeAttrGCEHostname) + } + if idx := strings.Index(hostname, "."); idx != -1 { + return fmt.Sprintf("zones/%s/instances/%s", zone, hostname[0:idx]), nil + } else { + return fmt.Sprintf("zones/%s/instances/%s", zone, hostname), nil + } +} diff --git a/plugins/builtin/target/gce-mig/plugin/gce_test.go b/plugins/builtin/target/gce-mig/plugin/gce_test.go index 314e3225..162ea7a8 100644 --- a/plugins/builtin/target/gce-mig/plugin/gce_test.go +++ b/plugins/builtin/target/gce-mig/plugin/gce_test.go @@ -3,104 +3,64 @@ package plugin import ( "errors" "testing" - "time" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad/api" "github.com/stretchr/testify/assert" ) -func TestTargetPlugin_generateScaleReq(t *testing.T) { +func Test_gceNodeIDMap(t *testing.T) { testCases := []struct { - inputNum int64 - inputConfig map[string]string - expectedOutputReq *scaleutils.ScaleInReq + inputNode *api.Node + expectedOutputID string expectedOutputError error name string }{ { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "5m", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 5 * time.Minute, - IgnoreSystemJobs: false, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", + inputNode: &api.Node{ + Attributes: map[string]string{ + "platform.gce.zone": "us-central1-f", + "unique.platform.gce.hostname": "instance-1.c.project.internal", }, - RemoteProvider: scaleutils.RemoteProviderGCEInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, }, + expectedOutputID: "zones/us-central1-f/instances/instance-1", expectedOutputError: nil, - name: "valid request with drain_deadline in config", + name: "required attributes found", }, { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_ignore_system_jobs": "true", - }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 15 * time.Minute, - IgnoreSystemJobs: true, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", + inputNode: &api.Node{ + Attributes: map[string]string{ + "platform.gce.zone": "us-central1-f", + "unique.platform.gce.hostname": "instance-1", }, - RemoteProvider: scaleutils.RemoteProviderGCEInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, }, + expectedOutputID: "zones/us-central1-f/instances/instance-1", expectedOutputError: nil, - name: "valid request with node_drain_ignore_system_jobs in config", + name: "required attributes found with non-split hostname", }, { - inputNum: 2, - inputConfig: map[string]string{}, - expectedOutputReq: nil, - expectedOutputError: errors.New("required config param \"node_class\" not found"), - name: "no class key found in config", - }, - { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", + inputNode: &api.Node{ + Attributes: map[string]string{}, }, - expectedOutputReq: &scaleutils.ScaleInReq{ - Num: 2, - DrainDeadline: 15 * time.Minute, - IgnoreSystemJobs: false, - PoolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: "high-memory", - }, - RemoteProvider: scaleutils.RemoteProviderGCEInstanceID, - NodeIDStrategy: scaleutils.IDStrategyNewestCreateIndex, - }, - expectedOutputError: nil, - name: "drain_deadline not specified within config", + expectedOutputID: "", + expectedOutputError: errors.New(`attribute "platform.gce.zone" not found`), + name: "required attribute zone not found", }, { - inputNum: 2, - inputConfig: map[string]string{ - "node_class": "high-memory", - "node_drain_deadline": "time to make a cuppa", + inputNode: &api.Node{ + Attributes: map[string]string{ + "platform.gce.zone": "us-central1-f", + }, }, - expectedOutputReq: nil, - expectedOutputError: errors.New("failed to parse \"time to make a cuppa\" as time duration"), - name: "malformed drain_deadline config value", + expectedOutputID: "", + expectedOutputError: errors.New(`attribute "unique.platform.gce.hostname" not found`), + name: "required attribute hostname not found", }, } - tp := TargetPlugin{} - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - actualReq, actualErr := tp.generateScaleReq(tc.inputNum, tc.inputConfig) - assert.Equal(t, tc.expectedOutputReq, actualReq, tc.name) + actualID, actualErr := gceNodeIDMap(tc.inputNode) + assert.Equal(t, tc.expectedOutputID, actualID, tc.name) assert.Equal(t, tc.expectedOutputError, actualErr, tc.name) }) } diff --git a/plugins/builtin/target/gce-mig/plugin/plugin.go b/plugins/builtin/target/gce-mig/plugin/plugin.go index 956103c1..61e22b52 100644 --- a/plugins/builtin/target/gce-mig/plugin/plugin.go +++ b/plugins/builtin/target/gce-mig/plugin/plugin.go @@ -41,11 +41,13 @@ var _ target.Target = (*TargetPlugin)(nil) // TargetPlugin is the CGE MIG implementation of the target.Target interface. type TargetPlugin struct { - config map[string]string - logger hclog.Logger - scaleInUtils *scaleutils.ScaleIn - + config map[string]string + logger hclog.Logger service *compute.Service + + // clusterUtils provides general cluster scaling utilities for querying the + // state of nodes pools and performing scaling tasks. + clusterUtils *scaleutils.ClusterScaleUtils } // NewGCEMIGPlugin returns the GCE MIG implementation of the target.Target @@ -65,11 +67,14 @@ func (t *TargetPlugin) SetConfig(config map[string]string) error { return err } - utils, err := scaleutils.NewScaleInUtils(nomad.ConfigFromNamespacedMap(config), t.logger) + clusterUtils, err := scaleutils.NewClusterScaleUtils(nomad.ConfigFromNamespacedMap(config), t.logger) if err != nil { return err } - t.scaleInUtils = utils + + // Store and set the remote ID callback function. + t.clusterUtils = clusterUtils + t.clusterUtils.ClusterNodeIDLookupFunc = gceNodeIDMap return nil } @@ -123,18 +128,10 @@ func (t *TargetPlugin) Scale(action sdk.ScalingAction, config map[string]string) // Status satisfies the Status function on the target.Target interface. func (t *TargetPlugin) Status(config map[string]string) (*sdk.TargetStatus, error) { - class, ok := config[sdk.TargetConfigKeyClass] - if !ok { - return nil, fmt.Errorf("required config param %q not found", sdk.TargetConfigKeyClass) - } - // Perform our check of the Nomad node pool. If the pool is not ready, we // can exit here and avoid calling the Google API as it won't affect the // outcome. - ready, err := t.scaleInUtils.Ready(scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKeyClass, - Value: class, - }) + ready, err := t.clusterUtils.IsPoolReady(config) if err != nil { return nil, fmt.Errorf("failed to run Nomad node readiness check: %v", err) } From 4699805849f8f85743a21318a99c5d47132b0b9a Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 22 Feb 2021 09:51:31 +0100 Subject: [PATCH 8/9] plugins/apm/nomad: use refactored scaleutils. --- plugins/builtin/apm/nomad/plugin/node.go | 12 +++++------ plugins/builtin/apm/nomad/plugin/node_test.go | 20 +++++++------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/plugins/builtin/apm/nomad/plugin/node.go b/plugins/builtin/apm/nomad/plugin/node.go index ed689e53..d3941470 100644 --- a/plugins/builtin/apm/nomad/plugin/node.go +++ b/plugins/builtin/apm/nomad/plugin/node.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/nomad-autoscaler/sdk" "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils/nodepool" "github.com/hashicorp/nomad/api" ) @@ -15,7 +16,7 @@ import ( // all the information needed to perform a Nomad APM query for a node pool. type nodePoolQuery struct { metric string - poolIdentifier *scaleutils.PoolIdentifier + poolIdentifier nodepool.ClusterNodePoolIdentifier operation string } @@ -78,7 +79,7 @@ func (a *APMPlugin) queryNodePool(q string) (sdk.TimestampedMetrics, error) { // specified node pool. Any error in calling the Nomad API for details will // result in an error. This is because with missing data, we cannot reliably // make calculations. -func (a *APMPlugin) getPoolResources(id *scaleutils.PoolIdentifier) (*nodePoolResources, error) { +func (a *APMPlugin) getPoolResources(id nodepool.ClusterNodePoolIdentifier) (*nodePoolResources, error) { nodes, _, err := a.client.Nodes().List(nil) if err != nil { @@ -87,7 +88,7 @@ func (a *APMPlugin) getPoolResources(id *scaleutils.PoolIdentifier) (*nodePoolRe // Perform our node filtering so we are left with a list of nodes that form // our pool and that are in the correct state. - nodePoolList, err := id.IdentifyNodes(nodes) + nodePoolList, err := scaleutils.FilterNodes(nodes, id.IsPoolMember) if err != nil { return nil, fmt.Errorf("failed to identify nodes within pool: %v", err) } @@ -166,10 +167,7 @@ func parseNodePoolQuery(q string) (*nodePoolQuery, error) { } query := nodePoolQuery{ - poolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: scaleutils.IdentifierKey(mainParts[2]), - Value: mainParts[1], - }, + poolIdentifier: nodepool.NewNodeClassPoolIdentifier(mainParts[1]), } opMetricParts := strings.SplitN(mainParts[0], "_", 3) diff --git a/plugins/builtin/apm/nomad/plugin/node_test.go b/plugins/builtin/apm/nomad/plugin/node_test.go index c2783a5a..6ca2e463 100644 --- a/plugins/builtin/apm/nomad/plugin/node_test.go +++ b/plugins/builtin/apm/nomad/plugin/node_test.go @@ -4,7 +4,7 @@ import ( "errors" "testing" - "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils" + "github.com/hashicorp/nomad-autoscaler/sdk/helper/scaleutils/nodepool" "github.com/hashicorp/nomad/api" "github.com/stretchr/testify/assert" ) @@ -19,12 +19,9 @@ func Test_parseNodePoolQuery(t *testing.T) { { inputQuery: "node_percentage-allocated_memory/high-memory/class", expectedOutputQuery: &nodePoolQuery{ - metric: "memory", - poolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: "class", - Value: "high-memory", - }, - operation: "percentage-allocated", + metric: "memory", + poolIdentifier: nodepool.NewNodeClassPoolIdentifier("high-memory"), + operation: "percentage-allocated", }, expectError: nil, name: "node percentage-allocated memory", @@ -32,12 +29,9 @@ func Test_parseNodePoolQuery(t *testing.T) { { inputQuery: "node_percentage-allocated_cpu/high-compute/class", expectedOutputQuery: &nodePoolQuery{ - metric: "cpu", - poolIdentifier: &scaleutils.PoolIdentifier{ - IdentifierKey: "class", - Value: "high-compute", - }, - operation: "percentage-allocated", + metric: "cpu", + poolIdentifier: nodepool.NewNodeClassPoolIdentifier("high-compute"), + operation: "percentage-allocated", }, expectError: nil, name: "node percentage-allocated cpu", From fd8ac603b27d03414bbf5993de1b6754ae823b34 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Mon, 22 Feb 2021 05:15:29 -0600 Subject: [PATCH 9/9] Update sdk/helper/scaleutils/cluster.go --- sdk/helper/scaleutils/cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/helper/scaleutils/cluster.go b/sdk/helper/scaleutils/cluster.go index ac7dd90c..569a478f 100644 --- a/sdk/helper/scaleutils/cluster.go +++ b/sdk/helper/scaleutils/cluster.go @@ -163,7 +163,7 @@ func (c *ClusterScaleUtils) identifyScaleInRemoteIDs(nodes []*api.NodeListStub) // Read the full node object from the API which will contain the full // information required to identify the remote provider ID. If we get a - // single error here, its likely we won't be able to perform any of the + // single error here, it's likely we won't be able to perform any of the // API calls, therefore just exit rather than collect all the errors. nodeInfo, _, err := c.client.Nodes().Info(node.ID, nil) if err != nil {