Skip to content

Commit

Permalink
Merge pull request #13104 from hashicorp/b-blocked-eval-math
Browse files Browse the repository at this point in the history
core: fix blocked eval math
  • Loading branch information
shoenig committed May 24, 2022
2 parents 94abe33 + d796127 commit b844f30
Show file tree
Hide file tree
Showing 5 changed files with 337 additions and 49 deletions.
3 changes: 3 additions & 0 deletions .changelog/13104.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where blocked eval resources were incorrectly computed
```
8 changes: 4 additions & 4 deletions nomad/blocked_evals.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ func (b *BlockedEvals) processBlockJobDuplicate(eval *structs.Evaluation) (newCa
if ok {
if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
delete(b.captured, existingID)
b.stats.Unblock(eval)
dup = existingW.eval
b.stats.Unblock(dup)
} else {
dup = eval
newCancelled = true
Expand Down Expand Up @@ -727,10 +727,10 @@ func (b *BlockedEvals) EmitStats(period time.Duration, stopCh <-chan struct{}) {
metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "job", "memory"}, float32(v.MemoryMB), labels)
}

for k, v := range stats.BlockedResources.ByNodeInfo {
for k, v := range stats.BlockedResources.ByNode {
labels := []metrics.Label{
{Name: "datacenter", Value: k.Datacenter},
{Name: "node_class", Value: k.NodeClass},
{Name: "datacenter", Value: k.dc},
{Name: "node_class", Value: k.class},
}
metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "cpu"}, float32(v.CPU), labels)
metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "memory"}, float32(v.MemoryMB), labels)
Expand Down
71 changes: 35 additions & 36 deletions nomad/blocked_evals_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ type BlockedStats struct {

// BlockedResources stores the amount of resources requested by blocked
// evaluations.
BlockedResources BlockedResourcesStats
BlockedResources *BlockedResourcesStats
}

// node stores information related to nodes.
type node struct {
dc string
class string
}

// NewBlockedStats returns a new BlockedStats.
Expand Down Expand Up @@ -59,16 +65,16 @@ func (b *BlockedStats) prune(cutoff time.Time) {
}
}

for k, v := range b.BlockedResources.ByNodeInfo {
for k, v := range b.BlockedResources.ByNode {
if shouldPrune(v) {
delete(b.BlockedResources.ByNodeInfo, k)
delete(b.BlockedResources.ByNode, k)
}
}
}

// generateResourceStats returns a summary of the resources requested by the
// input evaluation.
func generateResourceStats(eval *structs.Evaluation) BlockedResourcesStats {
func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
dcs := make(map[string]struct{})
classes := make(map[string]struct{})

Expand All @@ -80,102 +86,95 @@ func generateResourceStats(eval *structs.Evaluation) BlockedResourcesStats {
for dc := range allocMetrics.NodesAvailable {
dcs[dc] = struct{}{}
}

for class := range allocMetrics.ClassExhausted {
classes[class] = struct{}{}
}

for _, r := range allocMetrics.ResourcesExhausted {
resources.CPU += r.CPU
resources.MemoryMB += r.MemoryMB
}
}

byJob := make(map[structs.NamespacedID]BlockedResourcesSummary)
byJob[structs.NewNamespacedID(eval.JobID, eval.Namespace)] = resources
nsID := structs.NewNamespacedID(eval.JobID, eval.Namespace)
byJob[nsID] = resources

byNodeInfo := make(map[NodeInfo]BlockedResourcesSummary)
byNodeInfo := make(map[node]BlockedResourcesSummary)
for dc := range dcs {
for class := range classes {
k := NodeInfo{dc, class}
k := node{dc: dc, class: class}
byNodeInfo[k] = resources
}
}

return BlockedResourcesStats{
ByJob: byJob,
ByNodeInfo: byNodeInfo,
return &BlockedResourcesStats{
ByJob: byJob,
ByNode: byNodeInfo,
}
}

// BlockedResourcesStats stores resources requested by block evaluations
// split into different dimensions.
// BlockedResourcesStats stores resources requested by blocked evaluations,
// tracked both by job and by node.
type BlockedResourcesStats struct {
ByJob map[structs.NamespacedID]BlockedResourcesSummary
ByNodeInfo map[NodeInfo]BlockedResourcesSummary
ByJob map[structs.NamespacedID]BlockedResourcesSummary
ByNode map[node]BlockedResourcesSummary
}

// NewBlockedResourcesStats returns a new BlockedResourcesStats.
func NewBlockedResourcesStats() BlockedResourcesStats {
return BlockedResourcesStats{
ByJob: make(map[structs.NamespacedID]BlockedResourcesSummary),
ByNodeInfo: make(map[NodeInfo]BlockedResourcesSummary),
func NewBlockedResourcesStats() *BlockedResourcesStats {
return &BlockedResourcesStats{
ByJob: make(map[structs.NamespacedID]BlockedResourcesSummary),
ByNode: make(map[node]BlockedResourcesSummary),
}
}

// Copy returns a deep copy of the blocked resource stats.
func (b BlockedResourcesStats) Copy() BlockedResourcesStats {
func (b *BlockedResourcesStats) Copy() *BlockedResourcesStats {
result := NewBlockedResourcesStats()

for k, v := range b.ByJob {
result.ByJob[k] = v
result.ByJob[k] = v // value copy
}

for k, v := range b.ByNodeInfo {
result.ByNodeInfo[k] = v
for k, v := range b.ByNode {
result.ByNode[k] = v // value copy
}

return result
}

// Add returns a new BlockedResourcesStats with the values set to the current
// resource values plus the input.
func (b BlockedResourcesStats) Add(a BlockedResourcesStats) BlockedResourcesStats {
func (b *BlockedResourcesStats) Add(a *BlockedResourcesStats) *BlockedResourcesStats {
result := b.Copy()

for k, v := range a.ByJob {
result.ByJob[k] = b.ByJob[k].Add(v)
}

for k, v := range a.ByNodeInfo {
result.ByNodeInfo[k] = b.ByNodeInfo[k].Add(v)
for k, v := range a.ByNode {
result.ByNode[k] = b.ByNode[k].Add(v)
}

return result
}

// Subtract returns a new BlockedResourcesStats with the values set to the
// current resource values minus the input.
func (b BlockedResourcesStats) Subtract(a BlockedResourcesStats) BlockedResourcesStats {
func (b *BlockedResourcesStats) Subtract(a *BlockedResourcesStats) *BlockedResourcesStats {
result := b.Copy()

for k, v := range a.ByJob {
result.ByJob[k] = b.ByJob[k].Subtract(v)
}

for k, v := range a.ByNodeInfo {
result.ByNodeInfo[k] = b.ByNodeInfo[k].Subtract(v)
for k, v := range a.ByNode {
result.ByNode[k] = b.ByNode[k].Subtract(v)
}

return result
}

// NodeInfo stores information related to nodes.
type NodeInfo struct {
Datacenter string
NodeClass string
}

// BlockedResourcesSummary stores resource values for blocked evals.
type BlockedResourcesSummary struct {
Timestamp time.Time
Expand Down
Loading

0 comments on commit b844f30

Please sign in to comment.