Skip to content

Commit

Permalink
Merge pull request #324 from tydomitrovich/cpu-percent-used-of-allocated
Browse files Browse the repository at this point in the history
Use percent of allocated CPU for task group queries against the nomad-apm plugin
  • Loading branch information
lgfa29 authored Jan 5, 2021
2 parents e56e878 + 98a1a1d commit d601b21
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 23 deletions.
43 changes: 42 additions & 1 deletion plugins/builtin/apm/nomad/plugin/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ func (a *APMPlugin) getTaskGroupResourceUsage(query *taskGroupQuery) ([]float64,
metricFunc = func(m *[]float64, ru *api.ResourceUsage) {
*m = append(*m, ru.CpuStats.Percent)
}
case queryMetricCPUAllocated:

// Since the Nomad API does not provide a metric for the percentage of CPU used
// out of amount allocated for taskgroups, the calculation must be done here.
// The total CPU allocated to the task group is retrieved once here since it
// does not vary between allocations.
allocatedCPU, err := a.getAllocatedCPUForTaskGroup(query.job, query.group)
if err != nil {
return nil, fmt.Errorf("failed to get total alloacted CPU for taskgroup: %v", err)
}

// Create the metric function now that the total allocated CPU is known
metricFunc = func(m *[]float64, ru *api.ResourceUsage) {
*m = append(*m, (ru.CpuStats.TotalTicks/float64(allocatedCPU))*100)
}
case queryMetricMem:
metricFunc = func(m *[]float64, ru *api.ResourceUsage) {
*m = append(*m, float64(ru.MemoryStats.Usage))
Expand Down Expand Up @@ -107,6 +122,28 @@ func (a *APMPlugin) getTaskGroupResourceUsage(query *taskGroupQuery) ([]float64,
return resp, nil
}

// getAllocatedCPUForTaskGroup calculates the total allocated CPU in MHz for a taskgroup
func (a *APMPlugin) getAllocatedCPUForTaskGroup(job, taskgroup string) (int, error) {
jobInfo, _, err := a.client.Jobs().Info(job, nil)
if err != nil {
return -1, fmt.Errorf("failed to get info for job: %v", err)
}

taskGroupConfig := jobInfo.LookupTaskGroup(taskgroup)
if taskGroupConfig == nil {
return -1, fmt.Errorf("specified taskgroup was not found in job config")
}

taskGroupAllocatedCPU := 0
for _, task := range taskGroupConfig.Tasks {
if task.Resources == nil || task.Resources.CPU == nil {
continue
}
taskGroupAllocatedCPU += *task.Resources.CPU
}
return taskGroupAllocatedCPU, nil
}

// calculateTaskGroupResult determines the query result based on the metrics
// and operation to perform.
func calculateTaskGroupResult(op string, metrics []float64) sdk.TimestampedMetrics {
Expand Down Expand Up @@ -169,7 +206,7 @@ func parseTaskGroupQuery(q string) (*taskGroupQuery, error) {
op := opMetricParts[1]
metric := opMetricParts[2]

if err := validateMetric(metric); err != nil {
if err := validateMetricTaskGroupQuery(metric); err != nil {
return nil, err
}
query.metric = metric
Expand All @@ -184,3 +221,7 @@ func parseTaskGroupQuery(q string) (*taskGroupQuery, error) {

return query, nil
}

func validateMetricTaskGroupQuery(metric string) error {
return validateMetric(metric, []string{queryMetricCPU, queryMetricCPUAllocated, queryMetricMem})
}
11 changes: 11 additions & 0 deletions plugins/builtin/apm/nomad/plugin/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ func Test_parseTaskGroupQuery(t *testing.T) {
},
expectError: false,
},
{
name: "avg_cpu-allocated",
input: "taskgroup_avg_cpu-allocated/group/job",
expected: &taskGroupQuery{
metric: "cpu-allocated",
job: "job",
group: "group",
operation: "avg",
},
expectError: false,
},
{
name: "avg_memory",
input: "taskgroup_avg_memory/group/job",
Expand Down
24 changes: 17 additions & 7 deletions plugins/builtin/apm/nomad/plugin/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ const (
queryOpPercentageAllocated = "percentage-allocated"

// queryMetrics are the supported resources for querying.
queryMetricCPU = "cpu"
queryMetricMem = "memory"
queryMetricCPU = "cpu"
queryMetricCPUAllocated = "cpu-allocated"
queryMetricMem = "memory"
)

// Query satisfies the Query function on the apm.APM interface.
Expand Down Expand Up @@ -57,15 +58,24 @@ func (a *APMPlugin) QueryMultiple(q string, r sdk.TimeRange) ([]sdk.TimestampedM

// validateMetric helps ensure the desired metric within the query is able to
// be handled by the plugin.
func validateMetric(metric string) error {
func validateMetric(metric string, validMetrics []string) error {

var err error

switch metric {
case queryMetricCPU, queryMetricMem:
switch {
case contains(validMetrics, metric):
default:
err = fmt.Errorf(`invalid metric %q, allowed values are %s or %s`,
metric, queryMetricCPU, queryMetricMem)
err = fmt.Errorf(`invalid metric %q, allowed values are: %s`,
metric, strings.Join(validMetrics, ", "))
}
return err
}

func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}
36 changes: 23 additions & 13 deletions plugins/builtin/apm/nomad/plugin/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,40 @@ import (

func Test_validateMetric(t *testing.T) {
testCases := []struct {
inputMetric string
expectedOutput error
name string
inputMetric string
inputValidMetrics []string
expectedOutput error
name string
}{
{
inputMetric: "memory",
expectedOutput: nil,
name: "memory metric",
inputMetric: "memory",
inputValidMetrics: []string{queryMetricCPU, queryMetricCPUAllocated, queryMetricMem},
expectedOutput: nil,
name: "memory metric",
},
{
inputMetric: "cpu",
expectedOutput: nil,
name: "cpu metric",
inputMetric: "cpu",
inputValidMetrics: []string{queryMetricCPU, queryMetricCPUAllocated, queryMetricMem},
expectedOutput: nil,
name: "cpu metric",
},
{
inputMetric: "cost-of-server",
expectedOutput: errors.New("invalid metric \"cost-of-server\", allowed values are cpu or memory"),
name: "invalid metric",
inputMetric: "cpu-allocated",
inputValidMetrics: []string{queryMetricCPU, queryMetricCPUAllocated, queryMetricMem},
expectedOutput: nil,
name: "cpu-allocated metric",
},
{
inputMetric: "cost-of-server",
inputValidMetrics: []string{queryMetricCPU, queryMetricCPUAllocated, queryMetricMem},
expectedOutput: errors.New("invalid metric \"cost-of-server\", allowed values are: cpu, cpu-allocated, memory"),
name: "invalid metric",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actualOutput := validateMetric(tc.inputMetric)
actualOutput := validateMetric(tc.inputMetric, tc.inputValidMetrics)
assert.Equal(t, tc.expectedOutput, actualOutput, tc.name)
})
}
Expand Down
6 changes: 5 additions & 1 deletion plugins/builtin/apm/nomad/plugin/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ func parseNodePoolQuery(q string) (*nodePoolQuery, error) {
return nil, fmt.Errorf("expected node_<operation>_<metric>, received %s", mainParts[0])
}

if err := validateMetric(opMetricParts[2]); err != nil {
if err := validateMetricNodeQuery(opMetricParts[2]); err != nil {
return nil, err
}
query.metric = opMetricParts[2]
Expand All @@ -192,6 +192,10 @@ func parseNodePoolQuery(q string) (*nodePoolQuery, error) {
return &query, nil
}

func validateMetricNodeQuery(metric string) error {
return validateMetric(metric, []string{queryMetricCPU, queryMetricMem})
}

// calculateNodePoolResult returns the current usage percentage of the node
// pool.
func calculateNodePoolResult(allocated, allocatable float64) float64 {
Expand Down
8 changes: 7 additions & 1 deletion plugins/builtin/apm/nomad/plugin/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,15 @@ func Test_parseNodePoolQuery(t *testing.T) {
{
inputQuery: "node_percentage-allocated_invalid/class/high-compute",
expectedOutputQuery: nil,
expectError: errors.New("invalid metric \"invalid\", allowed values are cpu or memory"),
expectError: errors.New("invalid metric \"invalid\", allowed values are: cpu, memory"),
name: "invalid metric",
},
{
inputQuery: "node_percentage-allocated_cpu-allocated/class/high-compute",
expectedOutputQuery: nil,
expectError: errors.New("invalid metric \"cpu-allocated\", allowed values are: cpu, memory"),
name: "metric for task group queries only",
},
{
inputQuery: "node_invalid_cpu/class/high-compute",
expectedOutputQuery: nil,
Expand Down

0 comments on commit d601b21

Please sign in to comment.