diff --git a/docs/changelog/77128.yaml b/docs/changelog/77128.yaml
new file mode 100644
index 0000000000000..63133b6c76acc
--- /dev/null
+++ b/docs/changelog/77128.yaml
@@ -0,0 +1,7 @@
+pr: 77128
+summary: Handle cgroups v2 in `OsProbe`
+area: Infra/Core
+type: enhancement
+issues:
+ - 77126
+ - 76812
diff --git a/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java b/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java
index d5fd91a427bac..45689f0fed691 100644
--- a/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java
+++ b/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java
@@ -849,7 +849,6 @@ public void test131InitProcessHasCorrectPID() {
/**
* Check that Elasticsearch reports per-node cgroup information.
*/
- @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812")
public void test140CgroupOsStatsAreAvailable() throws Exception {
waitForElasticsearch(installation, USERNAME, PASSWORD);
diff --git a/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java b/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java
index 20cbaeac464f9..db07be0d1eb86 100644
--- a/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java
+++ b/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java
@@ -247,12 +247,18 @@ private static void verifyDefaultInstallation(Installation es, Distribution dist
/**
* Starts Elasticsearch, without checking that startup is successful.
*/
- public static Shell.Result runElasticsearchStartCommand(Shell sh) throws IOException {
+ public static Shell.Result runElasticsearchStartCommand(Shell sh) {
if (isSystemd()) {
+ Packages.JournaldWrapper journald = new Packages.JournaldWrapper(sh);
sh.run("systemctl daemon-reload");
sh.run("systemctl enable elasticsearch.service");
sh.run("systemctl is-enabled elasticsearch.service");
- return sh.runIgnoreExitCode("systemctl start elasticsearch.service");
+ Result exitCode = sh.runIgnoreExitCode("systemctl start elasticsearch.service");
+ if (exitCode.isSuccess() == false) {
+ logger.warn(sh.runIgnoreExitCode("systemctl status elasticsearch.service").stdout);
+ logger.warn(journald.getLogs().stdout);
+ }
+ return exitCode;
}
return sh.runIgnoreExitCode("service elasticsearch start");
}
diff --git a/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
index a20b7c7e27396..8ee5a945d490d 100644
--- a/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
+++ b/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
@@ -30,25 +30,31 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* The {@link OsProbe} class retrieves information about the physical and swap size of the machine
* memory, as well as the system load average and cpu load.
*
- * In some exceptional cases, it's possible the underlying native methods used by
+ *
In some exceptional cases, it's possible the underlying native methods used by
* {@link #getFreePhysicalMemorySize()}, {@link #getTotalPhysicalMemorySize()},
* {@link #getFreeSwapSpaceSize()}, and {@link #getTotalSwapSpaceSize()} can return a
* negative value. Because of this, we prevent those methods from returning negative values,
* returning 0 instead.
*
- * The OS can report a negative number in a number of cases:
- * - Non-supported OSes (HP-UX, or AIX)
- * - A failure of macOS to initialize host statistics
- * - An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
- * - An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
- * - An error case retrieving these values from a linux kernel
- * - A non-standard libc implementation not implementing the required values
- * For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725
+ *
The OS can report a negative number in a number of cases:
+ *
+ *
+ * - Non-supported OSes (HP-UX, or AIX)
+ *
- A failure of macOS to initialize host statistics
+ *
- An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
+ *
- An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
+ *
- An error case retrieving these values from a linux kernel
+ *
- A non-standard libc implementation not implementing the required values
+ *
+ *
+ * For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725
*/
public class OsProbe {
@@ -178,7 +184,7 @@ final double[] getSystemLoadAverage() {
final String procLoadAvg = readProcLoadavg();
assert procLoadAvg.matches("(\\d+\\.\\d+\\s+){3}\\d+/\\d+\\s+\\d+");
final String[] fields = procLoadAvg.split("\\s+");
- return new double[]{Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2])};
+ return new double[] { Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2]) };
} catch (final IOException e) {
if (logger.isDebugEnabled()) {
logger.debug("error reading /proc/loadavg", e);
@@ -192,7 +198,7 @@ final double[] getSystemLoadAverage() {
}
try {
final double oneMinuteLoadAverage = (double) getSystemLoadAverage.invoke(osMxBean);
- return new double[]{oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1};
+ return new double[] { oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1 };
} catch (IllegalAccessException | InvocationTargetException e) {
if (logger.isDebugEnabled()) {
logger.debug("error reading one minute load average from operating system", e);
@@ -318,6 +324,23 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"));
}
+ private long[] getCgroupV2CpuLimit(String controlGroup) throws IOException {
+ String entry = readCgroupV2CpuLimit(controlGroup);
+ String[] parts = entry.split("\\s+");
+ assert parts.length == 2 : "Expected 2 fields in [cpu.max]";
+
+ long[] values = new long[2];
+
+ values[0] = "max".equals(parts[0]) ? -1L : Long.parseLong(parts[0]);
+ values[1] = Long.parseLong(parts[1]);
+ return values;
+ }
+
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.max")
+ String readCgroupV2CpuLimit(String controlGroup) throws IOException {
+ return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "cpu.max"));
+ }
+
/**
* The total period of time in microseconds for how frequently the Elasticsearch control group's access to CPU resources will be
* reallocated.
@@ -454,6 +477,35 @@ String readSysFsCgroupMemoryLimitInBytes(final String controlGroup) throws IOExc
return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.limit_in_bytes"));
}
+ /**
+ * The maximum amount of user memory (including file cache).
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is String
.
+ * (The alternative would have been BigInteger
but then it would not be possible to index
+ * the OS stats document into Elasticsearch without losing information, as BigInteger
is
+ * not a supported Elasticsearch type.)
+ *
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
+ * @return the maximum amount of user memory (including file cache)
+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
+ */
+ private String getCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException {
+ return readSysFsCgroupV2MemoryLimitInBytes(controlGroup);
+ }
+
+ /**
+ * Returns the line from {@code memory.max} for the control group to which the Elasticsearch process belongs for the
+ * {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache).
+ *
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
+ * @return the line from {@code memory.max}
+ * @throws IOException if an I/O exception occurs reading {@code memory.max} for the control group
+ */
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/memory.max")
+ String readSysFsCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException {
+ return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.max"));
+ }
+
/**
* The total current memory usage by processes in the cgroup (in bytes).
* If there is no limit then some Linux versions return the maximum value that can be stored in an
@@ -483,6 +535,35 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc
return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.usage_in_bytes"));
}
+ /**
+ * The total current memory usage by processes in the cgroup (in bytes).
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is String
.
+ * (The alternative would have been BigInteger
but then it would not be possible to index
+ * the OS stats document into Elasticsearch without losing information, as BigInteger
is
+ * not a supported Elasticsearch type.)
+ *
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
+ * @return the total current memory usage by processes in the cgroup (in bytes)
+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
+ */
+ private String getCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException {
+ return readSysFsCgroupV2MemoryUsageInBytes(controlGroup);
+ }
+
+ /**
+ * Returns the line from {@code memory.current} for the control group to which the Elasticsearch process belongs for the
+ * {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes).
+ *
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
+ * @return the line from {@code memory.current}
+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
+ */
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/memory.current")
+ String readSysFsCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException {
+ return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.current"));
+ }
+
/**
* Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu},
* {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}.
@@ -490,20 +571,60 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc
* @return {@code true} if the stats are available, otherwise {@code false}
*/
@SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory")
- boolean areCgroupStatsAvailable() {
+ boolean areCgroupStatsAvailable() throws IOException {
if (Files.exists(PathUtils.get("/proc/self/cgroup")) == false) {
return false;
}
- if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpu")) == false) {
- return false;
- }
- if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct")) == false) {
- return false;
+
+ List lines = readProcSelfCgroup();
+
+ // cgroup v2
+ if (lines.size() == 1 && lines.get(0).startsWith("0::")) {
+ return Stream.of("/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/memory.stat").allMatch(path -> Files.exists(PathUtils.get(path)));
}
- if (Files.exists(PathUtils.get("/sys/fs/cgroup/memory")) == false) {
- return false;
+
+ return Stream.of("/sys/fs/cgroup/cpu", "/sys/fs/cgroup/cpuacct", "/sys/fs/cgroup/memory")
+ .allMatch(path -> Files.exists(PathUtils.get(path)));
+ }
+
+ /**
+ * The CPU statistics for all tasks in the Elasticsearch control group.
+ *
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
+ * @return the CPU statistics
+ * @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
+ */
+ @SuppressForbidden(reason = "Uses PathUtils.get to generate meaningful assertion messages")
+ private Map getCgroupV2CpuStats(String controlGroup) throws IOException {
+ final List lines = readCgroupV2CpuStats(controlGroup);
+ final Map stats = new HashMap<>();
+
+ for (String line : lines) {
+ String[] parts = line.split("\\s+");
+ assert parts.length == 2 : "Corrupt cpu.stat line: [" + line + "]";
+ stats.put(parts[0], Long.parseLong(parts[1]));
}
- return true;
+
+ final List expectedKeys = List.of("system_usec", "usage_usec", "user_usec");
+ expectedKeys.forEach(key -> {
+ assert stats.containsKey(key) : "[" + key + "] missing from " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat");
+ assert stats.get(key) != -1 : stats.get(key);
+ });
+
+ final List optionalKeys = List.of("nr_periods", "nr_throttled", "throttled_usec");
+ optionalKeys.forEach(key -> {
+ if (stats.containsKey(key) == false) {
+ stats.put(key, 0L);
+ }
+ assert stats.get(key) != -1L : "[" + key + "] in " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat") + " is -1";
+ });
+
+ return stats;
+ }
+
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.stat")
+ List readCgroupV2CpuStats(final String controlGroup) throws IOException {
+ return Files.readAllLines(PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat"));
}
/**
@@ -515,45 +636,79 @@ private OsStats.Cgroup getCgroup() {
try {
if (areCgroupStatsAvailable() == false) {
return null;
- } else {
- final Map controllerMap = getControlGroups();
- assert controllerMap.isEmpty() == false;
+ }
+
+ final Map controllerMap = getControlGroups();
+ assert controllerMap.isEmpty() == false;
- final String cpuAcctControlGroup = controllerMap.get("cpuacct");
+ final String cpuAcctControlGroup;
+ final long cgroupCpuAcctUsageNanos;
+ final long cgroupCpuAcctCpuCfsPeriodMicros;
+ final long cgroupCpuAcctCpuCfsQuotaMicros;
+ final String cpuControlGroup;
+ final OsStats.Cgroup.CpuStat cpuStat;
+ final String memoryControlGroup;
+ final String cgroupMemoryLimitInBytes;
+ final String cgroupMemoryUsageInBytes;
+
+ if (controllerMap.size() == 1 && controllerMap.containsKey("")) {
+ // There's a single hierarchy for all controllers
+ cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap.get("");
+
+ // `cpuacct` was merged with `cpu` in v2
+ final Map cpuStatsMap = getCgroupV2CpuStats(cpuControlGroup);
+
+ cgroupCpuAcctUsageNanos = cpuStatsMap.get("usage_usec");
+
+ long[] cpuLimits = getCgroupV2CpuLimit(cpuControlGroup);
+ cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits[0];
+ cgroupCpuAcctCpuCfsPeriodMicros = cpuLimits[1];
+
+ cpuStat = new OsStats.Cgroup.CpuStat(
+ cpuStatsMap.get("nr_periods"),
+ cpuStatsMap.get("nr_throttled"),
+ cpuStatsMap.get("throttled_usec")
+ );
+
+ cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes(memoryControlGroup);
+ cgroupMemoryUsageInBytes = getCgroupV2MemoryUsageInBytes(memoryControlGroup);
+ } else {
+ cpuAcctControlGroup = controllerMap.get("cpuacct");
if (cpuAcctControlGroup == null) {
logger.debug("no [cpuacct] data found in cgroup stats");
return null;
}
- final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup);
+ cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup);
- final String cpuControlGroup = controllerMap.get("cpu");
+ cpuControlGroup = controllerMap.get("cpu");
if (cpuControlGroup == null) {
logger.debug("no [cpu] data found in cgroup stats");
return null;
}
- final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup);
- final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup);
- final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup);
+ cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup);
+ cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup);
+ cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup);
- final String memoryControlGroup = controllerMap.get("memory");
+ memoryControlGroup = controllerMap.get("memory");
if (memoryControlGroup == null) {
logger.debug("no [memory] data found in cgroup stats");
return null;
}
- final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup);
- final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup);
-
- return new OsStats.Cgroup(
- cpuAcctControlGroup,
- cgroupCpuAcctUsageNanos,
- cpuControlGroup,
- cgroupCpuAcctCpuCfsPeriodMicros,
- cgroupCpuAcctCpuCfsQuotaMicros,
- cpuStat,
- memoryControlGroup,
- cgroupMemoryLimitInBytes,
- cgroupMemoryUsageInBytes);
+ cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup);
+ cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup);
}
+
+ return new OsStats.Cgroup(
+ cpuAcctControlGroup,
+ cgroupCpuAcctUsageNanos,
+ cpuControlGroup,
+ cgroupCpuAcctCpuCfsPeriodMicros,
+ cgroupCpuAcctCpuCfsQuotaMicros,
+ cpuStat,
+ memoryControlGroup,
+ cgroupMemoryLimitInBytes,
+ cgroupMemoryUsageInBytes
+ );
} catch (final IOException e) {
logger.debug("error reading control group stats", e);
return null;
@@ -576,13 +731,14 @@ public static OsProbe getInstance() {
OsInfo osInfo(long refreshInterval, int allocatedProcessors) throws IOException {
return new OsInfo(
- refreshInterval,
- Runtime.getRuntime().availableProcessors(),
- allocatedProcessors,
- Constants.OS_NAME,
- getPrettyName(),
- Constants.OS_ARCH,
- Constants.OS_VERSION);
+ refreshInterval,
+ Runtime.getRuntime().availableProcessors(),
+ allocatedProcessors,
+ Constants.OS_NAME,
+ getPrettyName(),
+ Constants.OS_ARCH,
+ Constants.OS_VERSION
+ );
}
private String getPrettyName() throws IOException {
@@ -594,11 +750,13 @@ private String getPrettyName() throws IOException {
* wrapped in single- or double-quotes.
*/
final List etcOsReleaseLines = readOsRelease();
- final List prettyNameLines =
- etcOsReleaseLines.stream().filter(line -> line.startsWith("PRETTY_NAME")).collect(Collectors.toList());
+ final List prettyNameLines = etcOsReleaseLines.stream()
+ .filter(line -> line.startsWith("PRETTY_NAME"))
+ .collect(Collectors.toList());
assert prettyNameLines.size() <= 1 : prettyNameLines;
- final Optional maybePrettyNameLine =
- prettyNameLines.size() == 1 ? Optional.of(prettyNameLines.get(0)) : Optional.empty();
+ final Optional maybePrettyNameLine = prettyNameLines.size() == 1
+ ? Optional.of(prettyNameLines.get(0))
+ : Optional.empty();
if (maybePrettyNameLine.isPresent()) {
// we trim since some OS contain trailing space, for example, Oracle Linux Server 6.9 has a trailing space after the quote
final String trimmedPrettyNameLine = maybePrettyNameLine.get().trim();
@@ -695,11 +853,15 @@ boolean isDebian8() throws IOException {
return Constants.LINUX && getPrettyName().equals("Debian GNU/Linux 8 (jessie)");
}
+ OsStats.Cgroup getCgroup(boolean isLinux) {
+ return isLinux ? getCgroup() : null;
+ }
+
public OsStats osStats() {
final OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
final OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
final OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
- final OsStats.Cgroup cgroup = Constants.LINUX ? getCgroup() : null;
+ final OsStats.Cgroup cgroup = getCgroup(Constants.LINUX);
return new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
}
diff --git a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy
index 8ffc0d0eea47d..5ae15e74ec2d4 100644
--- a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy
+++ b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy
@@ -144,14 +144,11 @@ grant {
permission java.io.FilePermission "/proc/self/mountinfo", "read";
permission java.io.FilePermission "/proc/diskstats", "read";
- // control group stats on Linux
+ // control group stats on Linux. cgroup v2 stats are in an unpredicable
+ // location under `/sys/fs/cgroup`, so unfortunately we have to allow
+ // read access to the entire directory hierarchy.
permission java.io.FilePermission "/proc/self/cgroup", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
- permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
+ permission java.io.FilePermission "/sys/fs/cgroup/-", "read";
// system memory on Linux systems affected by JDK bug (#66629)
permission java.io.FilePermission "/proc/meminfo", "read";
diff --git a/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java b/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
index 8be3723d72cc3..ac802cf738500 100644
--- a/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
+++ b/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
@@ -43,7 +43,7 @@ public void testOsInfo() throws IOException {
final OsProbe osProbe = new OsProbe() {
@Override
- List readOsRelease() throws IOException {
+ List readOsRelease() {
assert Constants.LINUX : Constants.OS_NAME;
if (prettyName != null) {
final String quote = randomFrom("\"", "'", "");
@@ -78,8 +78,10 @@ public void testOsStats() {
OsStats stats = osProbe.osStats();
assertNotNull(stats);
assertThat(stats.getTimestamp(), greaterThan(0L));
- assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1),
- is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))));
+ assertThat(
+ stats.getCpu().getPercent(),
+ anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))
+ );
double[] loadAverage = stats.getCpu().getLoadAverage();
if (loadAverage != null) {
assertThat(loadAverage.length, equalTo(3));
@@ -173,16 +175,14 @@ String readProcLoadavg() {
}
public void testCgroupProbe() {
- assumeTrue("test runs on Linux only", Constants.LINUX);
-
- final boolean areCgroupStatsAvailable = randomBoolean();
+ final int availableCgroupsVersion = randomFrom(0, 1, 2);
final String hierarchy = randomAlphaOfLength(16);
- final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy);
+ final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy);
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
- if (areCgroupStatsAvailable) {
+ if (availableCgroupsVersion > 0) {
assertNotNull(cgroup);
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
@@ -200,17 +200,14 @@ public void testCgroupProbe() {
}
public void testCgroupProbeWithMissingCpuAcct() {
- assumeTrue("test runs on Linux only", Constants.LINUX);
-
final String hierarchy = randomAlphaOfLength(16);
// This cgroup data is missing a line about cpuacct
- List procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
- .stream()
+ List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(",cpuacct", ""))
.collect(Collectors.toList());
- final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
+ final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
@@ -218,18 +215,14 @@ public void testCgroupProbeWithMissingCpuAcct() {
}
public void testCgroupProbeWithMissingCpu() {
- assumeTrue("test runs on Linux only", Constants.LINUX);
-
final String hierarchy = randomAlphaOfLength(16);
// This cgroup data is missing a line about cpu
- List procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
- .stream()
+ List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(":cpu,", ":"))
.collect(Collectors.toList());
-
- final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
+ final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
@@ -237,17 +230,14 @@ public void testCgroupProbeWithMissingCpu() {
}
public void testCgroupProbeWithMissingMemory() {
- assumeTrue("test runs on Linux only", Constants.LINUX);
-
final String hierarchy = randomAlphaOfLength(16);
// This cgroup data is missing a line about memory
- List procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
- .stream()
+ List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.filter(line -> line.contains(":memory:") == false)
.collect(Collectors.toList());
- final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
+ final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
@@ -255,6 +245,8 @@ public void testCgroupProbeWithMissingMemory() {
}
public void testGetTotalMemFromProcMeminfo() throws Exception {
+ int cgroupsVersion = randomFrom(1, 2);
+
// missing MemTotal line
var meminfoLines = Arrays.asList(
"MemFree: 8467692 kB",
@@ -265,7 +257,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
- OsProbe probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
+ OsProbe probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
// MemTotal line with invalid value
@@ -279,7 +271,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
- probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
+ probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
// MemTotal line with invalid unit
@@ -293,7 +285,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
- probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
+ probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
// MemTotal line with random valid value
@@ -308,7 +300,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
- probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
+ probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L));
}
@@ -319,7 +311,13 @@ public void testGetTotalMemoryOnDebian8() throws Exception {
assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L));
}
- private static List getProcSelfGroupLines(String hierarchy) {
+ private static List getProcSelfGroupLines(int cgroupsVersion, String hierarchy) {
+ // It doesn't really matter if cgroupsVersion == 0 here
+
+ if (cgroupsVersion == 2) {
+ return List.of("0::/" + hierarchy);
+ }
+
return Arrays.asList(
"10:freezer:/",
"9:net_cls,net_prio:/",
@@ -331,32 +329,40 @@ private static List getProcSelfGroupLines(String hierarchy) {
"3:perf_event:/",
"2:cpu,cpuacct,cpuset:/" + hierarchy,
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope",
- "0::/cgroup2");
+ "0::/cgroup2"
+ );
}
- private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) {
- List procSelfCgroupLines = getProcSelfGroupLines(hierarchy);
+ private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) {
+ List procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy);
- return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines);
+ return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines);
}
/**
* Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon
* the arguments to this method.
*
- * @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself.
+ * @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this
+ * for itself.
* @param hierarchy a mock value used to generate a cgroup hierarchy.
* @param procSelfCgroupLines the lines that will be used as the content of /proc/self/cgroup
* @param procMeminfoLines lines that will be used as the content of /proc/meminfo
* @return a test instance
*/
private static OsProbe buildStubOsProbe(
- final boolean areCgroupStatsAvailable,
+ final int availableCgroupsVersion,
final String hierarchy,
List procSelfCgroupLines,
List procMeminfoLines
) {
return new OsProbe() {
+ @Override
+ OsStats.Cgroup getCgroup(boolean isLinux) {
+ // Pretend we're always on Linux so that we can run the cgroup tests
+ return super.getCgroup(true);
+ }
+
@Override
List readProcSelfCgroup() {
return procSelfCgroupLines;
@@ -382,10 +388,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {
@Override
List readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
- return Arrays.asList(
- "nr_periods 17992",
- "nr_throttled 1311",
- "throttled_time 139298645489");
+ return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489");
}
@Override
@@ -403,22 +406,50 @@ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) {
@Override
boolean areCgroupStatsAvailable() {
- return areCgroupStatsAvailable;
+ return availableCgroupsVersion > 0;
}
@Override
- List readProcMeminfo() throws IOException {
+ List readProcMeminfo() {
return procMeminfoLines;
}
+
+ @Override
+ String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ // This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
+ return "18446744073709551615";
+ }
+
+ @Override
+ String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ return "4796416";
+ }
+
+ @Override
+ List readCgroupV2CpuStats(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ return List.of(
+ "usage_usec 364869866063112",
+ "user_usec 34636",
+ "system_usec 9896",
+ "nr_periods 17992",
+ "nr_throttled 1311",
+ "throttled_usec 139298645489"
+ );
+ }
+
+ @Override
+ String readCgroupV2CpuLimit(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ return "50000 100000";
+ }
};
}
- private static OsProbe buildStubOsProbe(
- final boolean areCgroupStatsAvailable,
- final String hierarchy,
- List procSelfCgroupLines
- ) {
- return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines, List.of());
+ private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy, List procSelfCgroupLines) {
+ return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines, List.of());
}
}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
index 864eefdb567cd..226aabc9bb02c 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -1260,7 +1260,7 @@ static long machineMemoryFromStats(OsStats stats) {
OsStats.Cgroup cgroup = stats.getCgroup();
if (cgroup != null) {
String containerLimitStr = cgroup.getMemoryLimitInBytes();
- if (containerLimitStr != null) {
+ if (containerLimitStr != null && containerLimitStr.equals("max") == false) {
BigInteger containerLimit = new BigInteger(containerLimitStr);
if ((containerLimit.compareTo(BigInteger.valueOf(mem)) < 0 && containerLimit.compareTo(BigInteger.ZERO) > 0)
// mem <= 0 means the value couldn't be obtained for some reason