diff --git a/CHANGELOG.md b/CHANGELOG.md index b2dbb56e5a24..3b20c87f9175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ __BACKWARDS INCOMPATIBILITIES:__ * core: null characters are prohibited in region, datacenter, job name/ID, task group name, and task name [[GH-9020](https://github.com/hashicorp/nomad/issues/9020)] * csi: registering a CSI volume with a `block-device` attachment mode and `mount_options` now returns a validation error, instead of silently dropping the `mount_options`. [[GH-9044](https://github.com/hashicorp/nomad/issues/9044)] * driver/docker: Tasks are now issued SIGTERM instead of SIGINT when stopping [[GH-8932](https://github.com/hashicorp/nomad/issues/8932)] + * telemetry: removed backwards compatible/untagged metrics deprecated in 0.7 [[GH-9080](https://github.com/hashicorp/nomad/issues/9080)] BUG FIXES: diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index d5d5d5f8f109..7d7fa5184a7b 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -674,7 +674,7 @@ func (tr *TaskRunner) emitExitResultEvent(result *drivers.ExitResult) { tr.EmitEvent(event) - if result.OOMKilled && !tr.clientConfig.DisableTaggedMetrics { + if result.OOMKilled { metrics.IncrCounterWithLabels([]string{"client", "allocs", "oom_killed"}, 1, tr.baseLabels) } } @@ -1115,12 +1115,7 @@ func (tr *TaskRunner) updateStateImpl(state string) error { // Capture the start time if it is just starting if oldState != structs.TaskStateRunning { taskState.StartedAt = time.Now().UTC() - if !tr.clientConfig.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels) - } - //if r.config.BackwardsCompatibleMetrics { - //metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1) - //} + metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels) } case structs.TaskStateDead: // Capture the finished time if not already set @@ -1130,19 +1125,9 @@ func (tr *TaskRunner) updateStateImpl(state string) error { // Emitting metrics to indicate task complete and failures if taskState.Failed { - if !tr.clientConfig.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels) - } - //if r.config.BackwardsCompatibleMetrics { - //metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1) - //} + metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels) } else { - if !tr.clientConfig.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels) - } - //if r.config.BackwardsCompatibleMetrics { - //metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1) - //} + metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels) } } @@ -1202,12 +1187,7 @@ func (tr *TaskRunner) appendEvent(event *structs.TaskEvent) error { // XXX This seems like a super awkward spot for this? Why not shouldRestart? // Update restart metrics if event.Type == structs.TaskRestarting { - if !tr.clientConfig.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels) - } - //if r.config.BackwardsCompatibleMetrics { - //metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1) - //} + metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels) tr.state.Restarts++ tr.state.LastRestart = time.Unix(0, event.Time) } @@ -1322,38 +1302,23 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024 } - if !tr.clientConfig.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"}, - float32(ru.ResourceUsage.MemoryStats.RSS), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "cache"}, - float32(ru.ResourceUsage.MemoryStats.Cache), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "swap"}, - float32(ru.ResourceUsage.MemoryStats.Swap), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "usage"}, - float32(ru.ResourceUsage.MemoryStats.Usage), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_usage"}, - float32(ru.ResourceUsage.MemoryStats.MaxUsage), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_usage"}, - float32(ru.ResourceUsage.MemoryStats.KernelUsage), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"}, - float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), tr.baseLabels) - if allocatedMem > 0 { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"}, - allocatedMem, tr.baseLabels) - } - } - - if tr.clientConfig.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "usage"}, float32(ru.ResourceUsage.MemoryStats.Usage)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage)) - if allocatedMem > 0 { - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "memory", "allocated"}, allocatedMem) - } + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"}, + float32(ru.ResourceUsage.MemoryStats.RSS), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "cache"}, + float32(ru.ResourceUsage.MemoryStats.Cache), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "swap"}, + float32(ru.ResourceUsage.MemoryStats.Swap), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "usage"}, + float32(ru.ResourceUsage.MemoryStats.Usage), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_usage"}, + float32(ru.ResourceUsage.MemoryStats.MaxUsage), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_usage"}, + float32(ru.ResourceUsage.MemoryStats.KernelUsage), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"}, + float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), tr.baseLabels) + if allocatedMem > 0 { + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"}, + allocatedMem, tr.baseLabels) } } @@ -1365,35 +1330,21 @@ func (tr *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) { allocatedCPU = float32(taskRes.Cpu.CpuShares) } - if !tr.clientConfig.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"}, - float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"}, - float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"}, - float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"}, - float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"}, - float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"}, - float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels) - if allocatedCPU > 0 { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"}, - allocatedCPU, tr.baseLabels) - } - } - - if tr.clientConfig.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods)) - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks)) - if allocatedCPU > 0 { - metrics.SetGauge([]string{"client", "allocs", alloc.Job.Name, alloc.TaskGroup, tr.allocID, tr.taskName, "cpu", "allocated"}, allocatedCPU) - } + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"}, + float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"}, + float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"}, + float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"}, + float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"}, + float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"}, + float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels) + if allocatedCPU > 0 { + metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"}, + allocatedCPU, tr.baseLabels) } } diff --git a/client/client.go b/client/client.go index 54d11c098a20..c1bd1afcdb8b 100644 --- a/client/client.go +++ b/client/client.go @@ -18,6 +18,10 @@ import ( "github.com/hashicorp/consul/lib" hclog "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" + vaultapi "github.com/hashicorp/vault/api" + "github.com/pkg/errors" + "github.com/shirou/gopsutil/host" + "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" @@ -47,9 +51,6 @@ import ( "github.com/hashicorp/nomad/plugins/csi" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/drivers" - vaultapi "github.com/hashicorp/vault/api" - "github.com/pkg/errors" - "github.com/shirou/gopsutil/host" ) const ( @@ -2784,68 +2785,40 @@ func (c *Client) emitStats() { // setGaugeForMemoryStats proxies metrics for memory specific statistics func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { - if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total)) - metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available)) - metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used)) - metrics.SetGauge([]string{"client", "host", "memory", nodeID, "free"}, float32(hStats.Memory.Free)) - } + metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels) } // setGaugeForCPUStats proxies metrics for CPU specific statistics func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { for _, cpu := range hStats.CPU { - if !c.config.DisableTaggedMetrics { - labels := append(baseLabels, metrics.Label{ - Name: "cpu", - Value: cpu.CPU, - }) - - metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels) - } + labels := append(baseLabels, metrics.Label{ + Name: "cpu", + Value: cpu.CPU, + }) - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total)) - metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User)) - metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle)) - metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "system"}, float32(cpu.System)) - } + metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels) } } // setGaugeForDiskStats proxies metrics for disk specific statistics func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { for _, disk := range hStats.DiskStats { - if !c.config.DisableTaggedMetrics { - labels := append(baseLabels, metrics.Label{ - Name: "disk", - Value: disk.Device, - }) - - metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels) - metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels) - } + labels := append(baseLabels, metrics.Label{ + Name: "disk", + Value: disk.Device, + }) - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size)) - metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used)) - metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available)) - metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used_percent"}, float32(disk.UsedPercent)) - metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent)) - } + metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels) + metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels) } } @@ -2859,30 +2832,16 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics. allocated := c.getAllocatedResources(node) // Emit allocated - if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocated", "memory", nodeID}, float32(allocated.Flattened.Memory.MemoryMB)) - metrics.SetGauge([]string{"client", "allocated", "disk", nodeID}, float32(allocated.Shared.DiskMB)) - metrics.SetGauge([]string{"client", "allocated", "cpu", nodeID}, float32(allocated.Flattened.Cpu.CpuShares)) - } + metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels) for _, n := range allocated.Flattened.Networks { - if !c.config.DisableTaggedMetrics { - labels := append(baseLabels, metrics.Label{ - Name: "device", - Value: n.Device, - }) - metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocated", "network", n.Device, nodeID}, float32(n.MBits)) - } + labels := append(baseLabels, metrics.Label{ + Name: "device", + Value: n.Device, + }) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels) } // Emit unallocated @@ -2890,17 +2849,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics. unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares - if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "unallocated", "memory", nodeID}, float32(unallocatedMem)) - metrics.SetGauge([]string{"client", "unallocated", "disk", nodeID}, float32(unallocatedDisk)) - metrics.SetGauge([]string{"client", "unallocated", "cpu", nodeID}, float32(unallocatedCpu)) - } + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels) totalComparable := total.Comparable() for _, n := range totalComparable.Flattened.Networks { @@ -2912,28 +2863,17 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics. } unallocatedMbits := n.MBits - usedMbits - if !c.config.DisableTaggedMetrics { - labels := append(baseLabels, metrics.Label{ - Name: "device", - Value: n.Device, - }) - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "unallocated", "network", n.Device, nodeID}, float32(unallocatedMbits)) - } + labels := append(baseLabels, metrics.Label{ + Name: "device", + Value: n.Device, + }) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels) } } // No labels are required so we emit with only a key/value syntax func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) { - if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels) - } - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "uptime"}, float32(hStats.Uptime)) - } + metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels) } // emitHostStats pushes host resource usage stats to remote metrics collection sinks @@ -2975,21 +2915,11 @@ func (c *Client) emitClientMetrics() { } } - if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels) - } - - if c.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocations", "migrating", nodeID}, float32(migrating)) - metrics.SetGauge([]string{"client", "allocations", "blocked", nodeID}, float32(blocked)) - metrics.SetGauge([]string{"client", "allocations", "pending", nodeID}, float32(pending)) - metrics.SetGauge([]string{"client", "allocations", "running", nodeID}, float32(running)) - metrics.SetGauge([]string{"client", "allocations", "terminal", nodeID}, float32(terminal)) - } + metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels) } // labels takes the base labels and appends the node state diff --git a/client/config/config.go b/client/config/config.go index b05719edbf3c..91869183e93b 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -205,20 +205,12 @@ type Config struct { // ACLPolicyTTL is how long we cache policy values for ACLPolicyTTL time.Duration - // DisableTaggedMetrics determines whether metrics will be displayed via a - // key/value/tag format, or simply a key/value format - DisableTaggedMetrics bool - // DisableRemoteExec disables remote exec targeting tasks on this client DisableRemoteExec bool // TemplateConfig includes configuration for template rendering TemplateConfig *ClientTemplateConfig - // BackwardsCompatibleMetrics determines whether to show methods of - // displaying metrics for older versions, or to only show the new format - BackwardsCompatibleMetrics bool - // RPCHoldTimeout is how long an RPC can be "held" before it is errored. // This is used to paper over a loss of leadership by instead holding RPCs, // so that the caller experiences a slow response rather than an error. @@ -321,18 +313,16 @@ func DefaultConfig() *Config { GCInodeUsageThreshold: 70, GCMaxAllocs: 50, NoHostUUID: true, - DisableTaggedMetrics: false, DisableRemoteExec: false, TemplateConfig: &ClientTemplateConfig{ FunctionDenylist: []string{"plugin"}, DisableSandbox: false, }, - BackwardsCompatibleMetrics: false, - RPCHoldTimeout: 5 * time.Second, - CNIPath: "/opt/cni/bin", - CNIConfigDir: "/opt/cni/config", - CNIInterfacePrefix: "eth", - HostNetworks: map[string]*structs.ClientHostNetworkConfig{}, + RPCHoldTimeout: 5 * time.Second, + CNIPath: "/opt/cni/bin", + CNIConfigDir: "/opt/cni/config", + CNIInterfacePrefix: "eth", + HostNetworks: map[string]*structs.ClientHostNetworkConfig{}, } } diff --git a/command/agent/agent.go b/command/agent/agent.go index 3791c1e9db68..016b77d16ebf 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -387,9 +387,7 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { // Setup telemetry related config conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval - conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics - conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics // Parse Limits timeout from a string into durations if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil { @@ -616,8 +614,6 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics - conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics - conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics // Set the TLS related configs conf.TLSConfig = agentConfig.TLSConfig diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index ab91fcb59c8a..d9290bc59304 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -521,8 +521,6 @@ func TestAgent_Client_TelemetryConfiguration(t *testing.T) { conf := DefaultConfig() conf.DevMode = true - conf.Telemetry.DisableTaggedMetrics = true - conf.Telemetry.BackwardsCompatibleMetrics = true a := &Agent{config: conf} @@ -534,8 +532,6 @@ func TestAgent_Client_TelemetryConfiguration(t *testing.T) { assert.Equal(c.StatsCollectionInterval, telemetry.collectionInterval) assert.Equal(c.PublishNodeMetrics, telemetry.PublishNodeMetrics) assert.Equal(c.PublishAllocationMetrics, telemetry.PublishAllocationMetrics) - assert.Equal(c.DisableTaggedMetrics, telemetry.DisableTaggedMetrics) - assert.Equal(c.BackwardsCompatibleMetrics, telemetry.BackwardsCompatibleMetrics) } // TestAgent_HTTPCheck asserts Agent.agentHTTPCheck properly alters the HTTP diff --git a/command/agent/command.go b/command/agent/command.go index d94671e4e061..0ed3ff33814e 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -980,8 +980,7 @@ func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { metricsConf.EnableHostname = !telConfig.DisableHostname // Prefer the hostname as a label. - metricsConf.EnableHostnameLabel = !telConfig.DisableHostname && - !telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics + metricsConf.EnableHostnameLabel = !telConfig.DisableHostname if telConfig.UseNodeName { metricsConf.HostName = config.NodeName diff --git a/command/agent/config.go b/command/agent/config.go index 939dcaf4288b..5fd3d9487d7a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -561,14 +561,6 @@ type Telemetry struct { PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"` PublishNodeMetrics bool `hcl:"publish_node_metrics"` - // DisableTaggedMetrics disables a new version of generating metrics which - // uses tags - DisableTaggedMetrics bool `hcl:"disable_tagged_metrics"` - - // BackwardsCompatibleMetrics allows for generating metrics in a simple - // key/value structure as done in older versions of Nomad - BackwardsCompatibleMetrics bool `hcl:"backwards_compatible_metrics"` - // PrefixFilter allows for filtering out metrics from being collected PrefixFilter []string `hcl:"prefix_filter"` @@ -1642,14 +1634,6 @@ func (a *Telemetry) Merge(b *Telemetry) *Telemetry { result.CirconusBrokerSelectTag = b.CirconusBrokerSelectTag } - if b.DisableTaggedMetrics { - result.DisableTaggedMetrics = b.DisableTaggedMetrics - } - - if b.BackwardsCompatibleMetrics { - result.BackwardsCompatibleMetrics = b.BackwardsCompatibleMetrics - } - if b.PrefixFilter != nil { result.PrefixFilter = b.PrefixFilter } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index f0d09b5f15d2..91108710c24e 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -180,8 +180,6 @@ var basicConfig = &Config{ collectionInterval: 3 * time.Second, PublishAllocationMetrics: true, PublishNodeMetrics: true, - DisableTaggedMetrics: true, - BackwardsCompatibleMetrics: true, }, LeaveOnInt: true, LeaveOnTerm: true, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index a54af7f94ea6..32115d32dbc2 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -67,8 +67,6 @@ func TestConfig_Merge(t *testing.T) { DataDogTags: []string{"cat1:tag1", "cat2:tag2"}, PrometheusMetrics: true, DisableHostname: false, - DisableTaggedMetrics: true, - BackwardsCompatibleMetrics: true, CirconusAPIToken: "0", CirconusAPIApp: "nomadic", CirconusAPIURL: "http://api.circonus.com/v2", @@ -256,8 +254,6 @@ func TestConfig_Merge(t *testing.T) { DisableHostname: true, PublishNodeMetrics: true, PublishAllocationMetrics: true, - DisableTaggedMetrics: true, - BackwardsCompatibleMetrics: true, CirconusAPIToken: "1", CirconusAPIApp: "nomad", CirconusAPIURL: "https://api.circonus.com/v2", diff --git a/command/agent/metrics_endpoint_test.go b/command/agent/metrics_endpoint_test.go index d83a15003dc4..0cdc8cfb0484 100644 --- a/command/agent/metrics_endpoint_test.go +++ b/command/agent/metrics_endpoint_test.go @@ -73,8 +73,6 @@ func TestHTTP_FreshClientAllocMetrics(t *testing.T) { httpTest(t, func(c *Config) { c.Telemetry.PublishAllocationMetrics = true c.Telemetry.PublishNodeMetrics = true - c.Telemetry.BackwardsCompatibleMetrics = false - c.Telemetry.DisableTaggedMetrics = false }, func(s *TestAgent) { // Create the job, wait for it to finish job := mock.BatchJob() diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index 4e2036b4f021..43499861f305 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -177,15 +177,13 @@ audit { } telemetry { - statsite_address = "127.0.0.1:1234" - statsd_address = "127.0.0.1:2345" - prometheus_metrics = true - disable_hostname = true - collection_interval = "3s" - publish_allocation_metrics = true - publish_node_metrics = true - disable_tagged_metrics = true - backwards_compatible_metrics = true + statsite_address = "127.0.0.1:1234" + statsd_address = "127.0.0.1:2345" + prometheus_metrics = true + disable_hostname = true + collection_interval = "3s" + publish_allocation_metrics = true + publish_node_metrics = true } leave_on_interrupt = true diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index b202eb07935d..e97f4d3f01ab 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -313,10 +313,8 @@ "syslog_facility": "LOCAL1", "telemetry": [ { - "backwards_compatible_metrics": true, "collection_interval": "3s", "disable_hostname": true, - "disable_tagged_metrics": true, "prometheus_metrics": true, "publish_allocation_metrics": true, "publish_node_metrics": true, diff --git a/nomad/config.go b/nomad/config.go index 14e92adf33fb..764ebb1ed894 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -298,18 +298,10 @@ type Config struct { // publishes metrics which are periodic in nature like updating gauges StatsCollectionInterval time.Duration - // DisableTaggedMetrics determines whether metrics will be displayed via a - // key/value/tag format, or simply a key/value format - DisableTaggedMetrics bool - // DisableDispatchedJobSummaryMetrics allows for ignore dispatched jobs when // publishing Job summary metrics DisableDispatchedJobSummaryMetrics bool - // BackwardsCompatibleMetrics determines whether to show methods of - // displaying metrics for older versions, or to only show the new format - BackwardsCompatibleMetrics bool - // AutopilotConfig is used to apply the initial autopilot config when // bootstrapping. AutopilotConfig *structs.AutopilotConfig diff --git a/nomad/leader.go b/nomad/leader.go index 8ad929b181eb..8576219ddcac 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -769,65 +769,55 @@ func (s *Server) publishJobSummaryMetrics(stopCh chan struct{}) { func (s *Server) iterateJobSummaryMetrics(summary *structs.JobSummary) { for name, tgSummary := range summary.Summary { - if !s.config.DisableTaggedMetrics { - labels := []metrics.Label{ - { - Name: "job", - Value: summary.JobID, - }, - { - Name: "task_group", - Value: name, - }, - { - Name: "namespace", - Value: summary.Namespace, - }, - } - - if strings.Contains(summary.JobID, "/dispatch-") { - jobInfo := strings.Split(summary.JobID, "/dispatch-") - labels = append(labels, metrics.Label{ - Name: "parent_id", - Value: jobInfo[0], - }, metrics.Label{ - Name: "dispatch_id", - Value: jobInfo[1], - }) - } - - if strings.Contains(summary.JobID, "/periodic-") { - jobInfo := strings.Split(summary.JobID, "/periodic-") - labels = append(labels, metrics.Label{ - Name: "parent_id", - Value: jobInfo[0], - }, metrics.Label{ - Name: "periodic_id", - Value: jobInfo[1], - }) - } + labels := []metrics.Label{ + { + Name: "job", + Value: summary.JobID, + }, + { + Name: "task_group", + Value: name, + }, + { + Name: "namespace", + Value: summary.Namespace, + }, + } - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "queued"}, - float32(tgSummary.Queued), labels) - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "complete"}, - float32(tgSummary.Complete), labels) - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "failed"}, - float32(tgSummary.Failed), labels) - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "running"}, - float32(tgSummary.Running), labels) - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "starting"}, - float32(tgSummary.Starting), labels) - metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "lost"}, - float32(tgSummary.Lost), labels) + if strings.Contains(summary.JobID, "/dispatch-") { + jobInfo := strings.Split(summary.JobID, "/dispatch-") + labels = append(labels, metrics.Label{ + Name: "parent_id", + Value: jobInfo[0], + }, metrics.Label{ + Name: "dispatch_id", + Value: jobInfo[1], + }) } - if s.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "queued"}, float32(tgSummary.Queued)) - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "complete"}, float32(tgSummary.Complete)) - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "failed"}, float32(tgSummary.Failed)) - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "running"}, float32(tgSummary.Running)) - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "starting"}, float32(tgSummary.Starting)) - metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "lost"}, float32(tgSummary.Lost)) + + if strings.Contains(summary.JobID, "/periodic-") { + jobInfo := strings.Split(summary.JobID, "/periodic-") + labels = append(labels, metrics.Label{ + Name: "parent_id", + Value: jobInfo[0], + }, metrics.Label{ + Name: "periodic_id", + Value: jobInfo[1], + }) } + + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "queued"}, + float32(tgSummary.Queued), labels) + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "complete"}, + float32(tgSummary.Complete), labels) + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "failed"}, + float32(tgSummary.Failed), labels) + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "running"}, + float32(tgSummary.Running), labels) + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "starting"}, + float32(tgSummary.Starting), labels) + metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "lost"}, + float32(tgSummary.Lost), labels) } } diff --git a/website/pages/docs/configuration/telemetry.mdx b/website/pages/docs/configuration/telemetry.mdx index 680553eca377..2ed3db7e16c9 100644 --- a/website/pages/docs/configuration/telemetry.mdx +++ b/website/pages/docs/configuration/telemetry.mdx @@ -50,17 +50,6 @@ The following options are available on all telemetry configurations. - `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish runtime metrics of nodes. -- `backwards_compatible_metrics` `(bool: false)` - Specifies if Nomad should - publish metrics that are backwards compatible with versions below 0.7, as - post version 0.7, Nomad emits tagged metrics. All new metrics will - only be added to tagged metrics. Note that this option is used to transition - monitoring to tagged metrics and will eventually be deprecated. - -- `disable_tagged_metrics` `(bool: false)` - Specifies if Nomad should not emit - tagged metrics and only emit metrics compatible with versions below Nomad - 0.7. Note that this option is used to transition monitoring to tagged - metrics and will eventually be deprecated. - - `filter_default` `(bool: true)` - This controls whether to allow metrics that have not been specified by the filter. Defaults to true, which will allow all metrics when no filters are provided. When set to false with no filters, no diff --git a/website/pages/docs/upgrade/upgrade-specific.mdx b/website/pages/docs/upgrade/upgrade-specific.mdx index 241b0981bea9..7bd98d8664d9 100644 --- a/website/pages/docs/upgrade/upgrade-specific.mdx +++ b/website/pages/docs/upgrade/upgrade-specific.mdx @@ -24,6 +24,12 @@ When stopping tasks running with the Docker task driver, Nomad documents that a versions of Nomad would issue `SIGINT` instead. Starting again with Nomad v0.13.0 `SIGTERM` will be sent by default when stopping Docker tasks. +### Deprecated metrics have been removed + +Nomad v0.7.0 added supported for tagged metrics and deprecated untagged metrics. +There was support for configuring backwards-compatible metrics. This support has +been removed with v0.13.0, and all metrics will be emitted with tags. + ### Null characters in region, datacenter, job name/ID, task group name, and task names Starting with Nomad v0.13.0, jobs will fail validation if any of the following