diff --git a/command/alloc_status.go b/command/alloc_status.go index 4e21d18adff2..d8a5e841b655 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" "github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" "github.com/posener/complete" ) @@ -586,7 +587,13 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage) } if ms := ru.ResourceUsage.MemoryStats; ms != nil { - memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage) + // Nomad uses RSS as the top-level metric to report, for historical reasons, + // but it's not always measured (e.g. with cgroup-v2) + usage := ms.RSS + if usage == 0 && !helper.SliceStringContains(ms.Measured, "RSS") { + usage = ms.Usage + } + memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(usage), memUsage) } deviceStats = ru.ResourceUsage.DeviceStats } diff --git a/drivers/docker/util/stats_posix.go b/drivers/docker/util/stats_posix.go index f27bd2d9a002..356096395763 100644 --- a/drivers/docker/util/stats_posix.go +++ b/drivers/docker/util/stats_posix.go @@ -12,17 +12,28 @@ import ( var ( DockerMeasuredCPUStats = []string{"Throttled Periods", "Throttled Time", "Percent"} - DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"} + + // cgroup-v2 only exposes a subset of memory stats + DockerCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage"} + DockerCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} ) func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage { + measuredMems := DockerCgroupV1MeasuredMemStats + + // use a simple heuristic to check if cgroup-v2 is used. + // go-dockerclient doesn't distinguish between 0 and not-present value + if s.MemoryStats.Stats.Rss == 0 && s.MemoryStats.MaxUsage == 0 && s.MemoryStats.Usage != 0 { + measuredMems = DockerCgroupV2MeasuredMemStats + } + ms := &cstructs.MemoryStats{ RSS: s.MemoryStats.Stats.Rss, Cache: s.MemoryStats.Stats.Cache, Swap: s.MemoryStats.Stats.Swap, Usage: s.MemoryStats.Usage, MaxUsage: s.MemoryStats.MaxUsage, - Measured: DockerMeasuredMemStats, + Measured: measuredMems, } cs := &cstructs.CpuStats{ diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index 0952d332e2f4..ef302caf3f74 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -39,8 +39,11 @@ const ( ) var ( - // ExecutorCgroupMeasuredMemStats is the list of memory stats captured by the executor - ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} + // ExecutorCgroupV1MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v1 + ExecutorCgroupV1MeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} + + // ExecutorCgroupV2MeasuredMemStats is the list of memory stats captured by the executor with cgroup-v2. cgroup-v2 exposes different memory stats and no longer reports rss or max usage. + ExecutorCgroupV2MeasuredMemStats = []string{"Cache", "Swap", "Usage"} // ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} @@ -342,6 +345,12 @@ func (l *LibcontainerExecutor) Stats(ctx context.Context, interval time.Duration func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { defer close(ch) timer := time.NewTimer(0) + + measuredMemStats := ExecutorCgroupV1MeasuredMemStats + if cgroups.IsCgroup2UnifiedMode() { + measuredMemStats = ExecutorCgroupV2MeasuredMemStats + } + for { select { case <-ctx.Done(): @@ -379,7 +388,7 @@ func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, MaxUsage: maxUsage, KernelUsage: stats.MemoryStats.KernelUsage.Usage, KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, - Measured: ExecutorCgroupMeasuredMemStats, + Measured: measuredMemStats, } // CPU Related Stats