Skip to content

Commit

Permalink
Handle cgroups v2 in OsProbe (#77128)
Browse files Browse the repository at this point in the history
Closes #76812. Closes #77126.

OsProbe was only capable of handle cgroup data in the v1 format.
However, Debian 11 uses cgroups v2 by default, and Elasticsearch isn't
capable of reporting any cgroup information. Therefore, add support for
the v2 layout.

Note that we have to open access to all of /sys/fs/cgroup because with
cgroups v2, the files we need are in an unpredictably location.
  • Loading branch information
pugnascotia authored Sep 3, 2021
1 parent 385b97f commit d238374
Show file tree
Hide file tree
Showing 7 changed files with 315 additions and 113 deletions.
7 changes: 7 additions & 0 deletions docs/changelog/77128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 77128
summary: Handle cgroups v2 in `OsProbe`
area: Infra/Core
type: enhancement
issues:
- 77126
- 76812
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,6 @@ public void test131InitProcessHasCorrectPID() {
/**
* Check that Elasticsearch reports per-node cgroup information.
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812")
public void test140CgroupOsStatsAreAvailable() throws Exception {
waitForElasticsearch(installation, USERNAME, PASSWORD);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,18 @@ private static void verifyDefaultInstallation(Installation es, Distribution dist
/**
* Starts Elasticsearch, without checking that startup is successful.
*/
public static Shell.Result runElasticsearchStartCommand(Shell sh) throws IOException {
public static Shell.Result runElasticsearchStartCommand(Shell sh) {
if (isSystemd()) {
Packages.JournaldWrapper journald = new Packages.JournaldWrapper(sh);
sh.run("systemctl daemon-reload");
sh.run("systemctl enable elasticsearch.service");
sh.run("systemctl is-enabled elasticsearch.service");
return sh.runIgnoreExitCode("systemctl start elasticsearch.service");
Result exitCode = sh.runIgnoreExitCode("systemctl start elasticsearch.service");
if (exitCode.isSuccess() == false) {
logger.warn(sh.runIgnoreExitCode("systemctl status elasticsearch.service").stdout);
logger.warn(journald.getLogs().stdout);
}
return exitCode;
}
return sh.runIgnoreExitCode("service elasticsearch start");
}
Expand Down
272 changes: 217 additions & 55 deletions server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,11 @@ grant {
permission java.io.FilePermission "/proc/self/mountinfo", "read";
permission java.io.FilePermission "/proc/diskstats", "read";

// control group stats on Linux
// control group stats on Linux. cgroup v2 stats are in an unpredicable
// location under `/sys/fs/cgroup`, so unfortunately we have to allow
// read access to the entire directory hierarchy.
permission java.io.FilePermission "/proc/self/cgroup", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/-", "read";

// system memory on Linux systems affected by JDK bug (#66629)
permission java.io.FilePermission "/proc/meminfo", "read";
Expand Down
125 changes: 78 additions & 47 deletions server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public void testOsInfo() throws IOException {
final OsProbe osProbe = new OsProbe() {

@Override
List<String> readOsRelease() throws IOException {
List<String> readOsRelease() {
assert Constants.LINUX : Constants.OS_NAME;
if (prettyName != null) {
final String quote = randomFrom("\"", "'", "");
Expand Down Expand Up @@ -78,8 +78,10 @@ public void testOsStats() {
OsStats stats = osProbe.osStats();
assertNotNull(stats);
assertThat(stats.getTimestamp(), greaterThan(0L));
assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1),
is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))));
assertThat(
stats.getCpu().getPercent(),
anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))
);
double[] loadAverage = stats.getCpu().getLoadAverage();
if (loadAverage != null) {
assertThat(loadAverage.length, equalTo(3));
Expand Down Expand Up @@ -173,16 +175,14 @@ String readProcLoadavg() {
}

public void testCgroupProbe() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final boolean areCgroupStatsAvailable = randomBoolean();
final int availableCgroupsVersion = randomFrom(0, 1, 2);
final String hierarchy = randomAlphaOfLength(16);

final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy);
final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

if (areCgroupStatsAvailable) {
if (availableCgroupsVersion > 0) {
assertNotNull(cgroup);
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
Expand All @@ -200,61 +200,53 @@ public void testCgroupProbe() {
}

public void testCgroupProbeWithMissingCpuAcct() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about cpuacct
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(",cpuacct", ""))
.collect(Collectors.toList());

final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testCgroupProbeWithMissingCpu() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about cpu
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.map(line -> line.replaceFirst(":cpu,", ":"))
.collect(Collectors.toList());


final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testCgroupProbeWithMissingMemory() {
assumeTrue("test runs on Linux only", Constants.LINUX);

final String hierarchy = randomAlphaOfLength(16);

// This cgroup data is missing a line about memory
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
.stream()
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
.filter(line -> line.contains(":memory:") == false)
.collect(Collectors.toList());

final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);

final OsStats.Cgroup cgroup = probe.osStats().getCgroup();

assertNull(cgroup);
}

public void testGetTotalMemFromProcMeminfo() throws Exception {
int cgroupsVersion = randomFrom(1, 2);

// missing MemTotal line
var meminfoLines = Arrays.asList(
"MemFree: 8467692 kB",
Expand All @@ -265,7 +257,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
OsProbe probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
OsProbe probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with invalid value
Expand All @@ -279,7 +271,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with invalid unit
Expand All @@ -293,7 +285,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));

// MemTotal line with random valid value
Expand All @@ -308,7 +300,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
"Active: 43637908 kB",
"Inactive: 8130280 kB"
);
probe = buildStubOsProbe(true, "", List.of(), meminfoLines);
probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines);
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L));
}

Expand All @@ -319,7 +311,13 @@ public void testGetTotalMemoryOnDebian8() throws Exception {
assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L));
}

private static List<String> getProcSelfGroupLines(String hierarchy) {
private static List<String> getProcSelfGroupLines(int cgroupsVersion, String hierarchy) {
// It doesn't really matter if cgroupsVersion == 0 here

if (cgroupsVersion == 2) {
return List.of("0::/" + hierarchy);
}

return Arrays.asList(
"10:freezer:/",
"9:net_cls,net_prio:/",
Expand All @@ -331,32 +329,40 @@ private static List<String> getProcSelfGroupLines(String hierarchy) {
"3:perf_event:/",
"2:cpu,cpuacct,cpuset:/" + hierarchy,
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope",
"0::/cgroup2");
"0::/cgroup2"
);
}

private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) {
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy);
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) {
List<String> procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy);

return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines);
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines);
}

/**
* Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon
* the arguments to this method.
*
* @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself.
* @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this
* for itself.
* @param hierarchy a mock value used to generate a cgroup hierarchy.
* @param procSelfCgroupLines the lines that will be used as the content of <code>/proc/self/cgroup</code>
* @param procMeminfoLines lines that will be used as the content of <code>/proc/meminfo</code>
* @return a test instance
*/
private static OsProbe buildStubOsProbe(
final boolean areCgroupStatsAvailable,
final int availableCgroupsVersion,
final String hierarchy,
List<String> procSelfCgroupLines,
List<String> procMeminfoLines
) {
return new OsProbe() {
@Override
OsStats.Cgroup getCgroup(boolean isLinux) {
// Pretend we're always on Linux so that we can run the cgroup tests
return super.getCgroup(true);
}

@Override
List<String> readProcSelfCgroup() {
return procSelfCgroupLines;
Expand All @@ -382,10 +388,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {

@Override
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
return Arrays.asList(
"nr_periods 17992",
"nr_throttled 1311",
"throttled_time 139298645489");
return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489");
}

@Override
Expand All @@ -403,22 +406,50 @@ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) {

@Override
boolean areCgroupStatsAvailable() {
return areCgroupStatsAvailable;
return availableCgroupsVersion > 0;
}

@Override
List<String> readProcMeminfo() throws IOException {
List<String> readProcMeminfo() {
return procMeminfoLines;
}

@Override
String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
// This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
return "18446744073709551615";
}

@Override
String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "4796416";
}

@Override
List<String> readCgroupV2CpuStats(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return List.of(
"usage_usec 364869866063112",
"user_usec 34636",
"system_usec 9896",
"nr_periods 17992",
"nr_throttled 1311",
"throttled_usec 139298645489"
);
}

@Override
String readCgroupV2CpuLimit(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "50000 100000";
}
};
}

private static OsProbe buildStubOsProbe(
final boolean areCgroupStatsAvailable,
final String hierarchy,
List<String> procSelfCgroupLines
) {
return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines, List.of());
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy, List<String> procSelfCgroupLines) {
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines, List.of());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,7 @@ static long machineMemoryFromStats(OsStats stats) {
OsStats.Cgroup cgroup = stats.getCgroup();
if (cgroup != null) {
String containerLimitStr = cgroup.getMemoryLimitInBytes();
if (containerLimitStr != null) {
if (containerLimitStr != null && containerLimitStr.equals("max") == false) {
BigInteger containerLimit = new BigInteger(containerLimitStr);
if ((containerLimit.compareTo(BigInteger.valueOf(mem)) < 0 && containerLimit.compareTo(BigInteger.ZERO) > 0)
// mem <= 0 means the value couldn't be obtained for some reason
Expand Down

0 comments on commit d238374

Please sign in to comment.