From 9bc603e7d903a400d1dd81eb368ae8466e5647f3 Mon Sep 17 00:00:00 2001 From: Josh Dover Date: Wed, 9 Sep 2020 14:01:29 -0600 Subject: [PATCH] Move metrics to setup and add cgroup metrics (#76730) --- .../core/server/kibana-plugin-core-server.md | 2 +- ....metricsservicesetup.collectioninterval.md | 13 ++ ...rver.metricsservicesetup.getopsmetrics_.md | 24 +++ ...-plugin-core-server.metricsservicesetup.md | 10 + ...gin-core-server.opsmetrics.collected_at.md | 13 ++ .../kibana-plugin-core-server.opsmetrics.md | 1 + ...ana-plugin-core-server.opsosmetrics.cpu.md | 22 ++ ...plugin-core-server.opsosmetrics.cpuacct.md | 16 ++ .../kibana-plugin-core-server.opsosmetrics.md | 2 + docs/setup/settings.asciidoc | 16 +- .../core_app/status/lib/load_status.test.ts | 1 + .../config/deprecation/core_deprecations.ts | 4 +- src/core/server/legacy/legacy_service.ts | 1 + .../server/metrics/collectors/cgroup.test.ts | 115 +++++++++++ src/core/server/metrics/collectors/cgroup.ts | 194 ++++++++++++++++++ .../metrics/collectors/collector.mock.ts | 33 +++ src/core/server/metrics/collectors/index.ts | 2 +- .../metrics/collectors/os.test.mocks.ts | 25 +++ src/core/server/metrics/collectors/os.test.ts | 8 + src/core/server/metrics/collectors/os.ts | 32 ++- src/core/server/metrics/collectors/types.ts | 27 +++ .../server/metrics/metrics_service.mock.ts | 26 ++- src/core/server/metrics/metrics_service.ts | 28 ++- src/core/server/metrics/ops_config.ts | 4 + .../metrics/ops_metrics_collector.test.ts | 3 +- .../server/metrics/ops_metrics_collector.ts | 6 +- src/core/server/metrics/types.ts | 15 +- src/core/server/plugins/plugin_context.ts | 1 + src/core/server/server.api.md | 21 +- .../server/collectors/ops_stats/index.test.ts | 1 + .../ops_stats/ops_stats_collector.ts | 8 +- 31 files changed, 629 insertions(+), 45 deletions(-) create mode 100644 docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.collectioninterval.md create mode 100644 docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md create mode 100644 docs/development/core/server/kibana-plugin-core-server.opsmetrics.collected_at.md create mode 100644 docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpu.md create mode 100644 docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpuacct.md create mode 100644 src/core/server/metrics/collectors/cgroup.test.ts create mode 100644 src/core/server/metrics/collectors/cgroup.ts create mode 100644 src/core/server/metrics/collectors/collector.mock.ts create mode 100644 src/core/server/metrics/collectors/os.test.mocks.ts diff --git a/docs/development/core/server/kibana-plugin-core-server.md b/docs/development/core/server/kibana-plugin-core-server.md index 89330d2a86f76..dfffdffb08a08 100644 --- a/docs/development/core/server/kibana-plugin-core-server.md +++ b/docs/development/core/server/kibana-plugin-core-server.md @@ -123,7 +123,7 @@ The plugin integrates with the core system via lifecycle events: `setup` | [LoggerFactory](./kibana-plugin-core-server.loggerfactory.md) | The single purpose of LoggerFactory interface is to define a way to retrieve a context-based logger instance. | | [LoggingServiceSetup](./kibana-plugin-core-server.loggingservicesetup.md) | Provides APIs to plugins for customizing the plugin's logger. | | [LogMeta](./kibana-plugin-core-server.logmeta.md) | Contextual metadata | -| [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) | | +| [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) | APIs to retrieves metrics gathered and exposed by the core platform. | | [NodesVersionCompatibility](./kibana-plugin-core-server.nodesversioncompatibility.md) | | | [OnPostAuthToolkit](./kibana-plugin-core-server.onpostauthtoolkit.md) | A tool set defining an outcome of OnPostAuth interceptor for incoming request. | | [OnPreAuthToolkit](./kibana-plugin-core-server.onpreauthtoolkit.md) | A tool set defining an outcome of OnPreAuth interceptor for incoming request. | diff --git a/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.collectioninterval.md b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.collectioninterval.md new file mode 100644 index 0000000000000..6f05526b66c83 --- /dev/null +++ b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.collectioninterval.md @@ -0,0 +1,13 @@ + + +[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) > [collectionInterval](./kibana-plugin-core-server.metricsservicesetup.collectioninterval.md) + +## MetricsServiceSetup.collectionInterval property + +Interval metrics are collected in milliseconds + +Signature: + +```typescript +readonly collectionInterval: number; +``` diff --git a/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md new file mode 100644 index 0000000000000..61107fbf20ad9 --- /dev/null +++ b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md @@ -0,0 +1,24 @@ + + +[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) > [getOpsMetrics$](./kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md) + +## MetricsServiceSetup.getOpsMetrics$ property + +Retrieve an observable emitting the [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) gathered. The observable will emit an initial value during core's `start` phase, and a new value every fixed interval of time, based on the `opts.interval` configuration property. + +Signature: + +```typescript +getOpsMetrics$: () => Observable; +``` + +## Example + + +```ts +core.metrics.getOpsMetrics$().subscribe(metrics => { + // do something with the metrics +}) + +``` + diff --git a/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.md b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.md index 0bec919797b6f..5fcb1417dea0e 100644 --- a/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.md +++ b/docs/development/core/server/kibana-plugin-core-server.metricsservicesetup.md @@ -4,8 +4,18 @@ ## MetricsServiceSetup interface +APIs to retrieves metrics gathered and exposed by the core platform. + Signature: ```typescript export interface MetricsServiceSetup ``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [collectionInterval](./kibana-plugin-core-server.metricsservicesetup.collectioninterval.md) | number | Interval metrics are collected in milliseconds | +| [getOpsMetrics$](./kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md) | () => Observable<OpsMetrics> | Retrieve an observable emitting the [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) gathered. The observable will emit an initial value during core's start phase, and a new value every fixed interval of time, based on the opts.interval configuration property. | + diff --git a/docs/development/core/server/kibana-plugin-core-server.opsmetrics.collected_at.md b/docs/development/core/server/kibana-plugin-core-server.opsmetrics.collected_at.md new file mode 100644 index 0000000000000..25125569b7b38 --- /dev/null +++ b/docs/development/core/server/kibana-plugin-core-server.opsmetrics.collected_at.md @@ -0,0 +1,13 @@ + + +[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) > [collected\_at](./kibana-plugin-core-server.opsmetrics.collected_at.md) + +## OpsMetrics.collected\_at property + +Time metrics were recorded at. + +Signature: + +```typescript +collected_at: Date; +``` diff --git a/docs/development/core/server/kibana-plugin-core-server.opsmetrics.md b/docs/development/core/server/kibana-plugin-core-server.opsmetrics.md index d2d4782385c06..9803c0fbd53cc 100644 --- a/docs/development/core/server/kibana-plugin-core-server.opsmetrics.md +++ b/docs/development/core/server/kibana-plugin-core-server.opsmetrics.md @@ -16,6 +16,7 @@ export interface OpsMetrics | Property | Type | Description | | --- | --- | --- | +| [collected\_at](./kibana-plugin-core-server.opsmetrics.collected_at.md) | Date | Time metrics were recorded at. | | [concurrent\_connections](./kibana-plugin-core-server.opsmetrics.concurrent_connections.md) | OpsServerMetrics['concurrent_connections'] | number of current concurrent connections to the server | | [os](./kibana-plugin-core-server.opsmetrics.os.md) | OpsOsMetrics | OS related metrics | | [process](./kibana-plugin-core-server.opsmetrics.process.md) | OpsProcessMetrics | Process related metrics | diff --git a/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpu.md b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpu.md new file mode 100644 index 0000000000000..095c45266a251 --- /dev/null +++ b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpu.md @@ -0,0 +1,22 @@ + + +[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsOsMetrics](./kibana-plugin-core-server.opsosmetrics.md) > [cpu](./kibana-plugin-core-server.opsosmetrics.cpu.md) + +## OpsOsMetrics.cpu property + +cpu cgroup metrics, undefined when not running in a cgroup + +Signature: + +```typescript +cpu?: { + control_group: string; + cfs_period_micros: number; + cfs_quota_micros: number; + stat: { + number_of_elapsed_periods: number; + number_of_times_throttled: number; + time_throttled_nanos: number; + }; + }; +``` diff --git a/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpuacct.md b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpuacct.md new file mode 100644 index 0000000000000..140646a0d1a35 --- /dev/null +++ b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.cpuacct.md @@ -0,0 +1,16 @@ + + +[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsOsMetrics](./kibana-plugin-core-server.opsosmetrics.md) > [cpuacct](./kibana-plugin-core-server.opsosmetrics.cpuacct.md) + +## OpsOsMetrics.cpuacct property + +cpu accounting metrics, undefined when not running in a cgroup + +Signature: + +```typescript +cpuacct?: { + control_group: string; + usage_nanos: number; + }; +``` diff --git a/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.md b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.md index 5fedb76a9c8d7..8938608531139 100644 --- a/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.md +++ b/docs/development/core/server/kibana-plugin-core-server.opsosmetrics.md @@ -16,6 +16,8 @@ export interface OpsOsMetrics | Property | Type | Description | | --- | --- | --- | +| [cpu](./kibana-plugin-core-server.opsosmetrics.cpu.md) | {
control_group: string;
cfs_period_micros: number;
cfs_quota_micros: number;
stat: {
number_of_elapsed_periods: number;
number_of_times_throttled: number;
time_throttled_nanos: number;
};
} | cpu cgroup metrics, undefined when not running in a cgroup | +| [cpuacct](./kibana-plugin-core-server.opsosmetrics.cpuacct.md) | {
control_group: string;
usage_nanos: number;
} | cpu accounting metrics, undefined when not running in a cgroup | | [distro](./kibana-plugin-core-server.opsosmetrics.distro.md) | string | The os distrib. Only present for linux platforms | | [distroRelease](./kibana-plugin-core-server.opsosmetrics.distrorelease.md) | string | The os distrib release, prefixed by the os distrib. Only present for linux platforms | | [load](./kibana-plugin-core-server.opsosmetrics.load.md) | {
'1m': number;
'5m': number;
'15m': number;
} | cpu load metrics | diff --git a/docs/setup/settings.asciidoc b/docs/setup/settings.asciidoc index 4a931aabd3646..f03022e9e9f00 100644 --- a/docs/setup/settings.asciidoc +++ b/docs/setup/settings.asciidoc @@ -20,12 +20,12 @@ which may cause a delay before pages start being served. Set to `false` to disable Console. *Default: `true`* | `cpu.cgroup.path.override:` - | Override for cgroup cpu path when mounted in a -manner that is inconsistent with `/proc/self/cgroup`. + | *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuPath` +and the old name will no longer be supported as of 8.0. | `cpuacct.cgroup.path.override:` - | Override for cgroup cpuacct path when mounted -in a manner that is inconsistent with `/proc/self/cgroup`. + | *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuAcctPath` +and the old name will no longer be supported as of 8.0. | `csp.rules:` | A https://w3c.github.io/webappsec-csp/[content-security-policy] template @@ -438,6 +438,14 @@ not saved in {es}. *Default: `data`* | Set the interval in milliseconds to sample system and process performance metrics. The minimum value is 100. *Default: `5000`* +| `ops.cGroupOverrides.cpuPath:` + | Override for cgroup cpu path when mounted in a +manner that is inconsistent with `/proc/self/cgroup`. + +| `ops.cGroupOverrides.cpuAcctPath:` + | Override for cgroup cpuacct path when mounted +in a manner that is inconsistent with `/proc/self/cgroup`. + | `server.basePath:` | Enables you to specify a path to mount {kib} at if you are running behind a proxy. Use the `server.rewriteBasePath` setting to tell {kib} diff --git a/src/core/public/core_app/status/lib/load_status.test.ts b/src/core/public/core_app/status/lib/load_status.test.ts index 3a444a4448467..5a9f982e106a7 100644 --- a/src/core/public/core_app/status/lib/load_status.test.ts +++ b/src/core/public/core_app/status/lib/load_status.test.ts @@ -57,6 +57,7 @@ const mockedResponse: StatusResponse = { ], }, metrics: { + collected_at: new Date('2020-01-01 01:00:00'), collection_interval_in_millis: 1000, os: { platform: 'darwin' as const, diff --git a/src/core/server/config/deprecation/core_deprecations.ts b/src/core/server/config/deprecation/core_deprecations.ts index e4e881ab24372..2b8b8e383da24 100644 --- a/src/core/server/config/deprecation/core_deprecations.ts +++ b/src/core/server/config/deprecation/core_deprecations.ts @@ -113,7 +113,7 @@ const mapManifestServiceUrlDeprecation: ConfigDeprecation = (settings, fromPath, return settings; }; -export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromRoot }) => [ +export const coreDeprecationProvider: ConfigDeprecationProvider = ({ rename, unusedFromRoot }) => [ unusedFromRoot('savedObjects.indexCheckTimeout'), unusedFromRoot('server.xsrf.token'), unusedFromRoot('maps.manifestServiceUrl'), @@ -136,6 +136,8 @@ export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromR unusedFromRoot('optimize.workers'), unusedFromRoot('optimize.profile'), unusedFromRoot('optimize.validateSyntaxOfNodeModules'), + rename('cpu.cgroup.path.override', 'ops.cGroupOverrides.cpuPath'), + rename('cpuacct.cgroup.path.override', 'ops.cGroupOverrides.cpuAcctPath'), configPathDeprecation, dataPathDeprecation, rewriteBasePathDeprecation, diff --git a/src/core/server/legacy/legacy_service.ts b/src/core/server/legacy/legacy_service.ts index b95644590b4e9..ba3eb28f90c5c 100644 --- a/src/core/server/legacy/legacy_service.ts +++ b/src/core/server/legacy/legacy_service.ts @@ -264,6 +264,7 @@ export class LegacyService implements CoreService { getTypeRegistry: startDeps.core.savedObjects.getTypeRegistry, }, metrics: { + collectionInterval: startDeps.core.metrics.collectionInterval, getOpsMetrics$: startDeps.core.metrics.getOpsMetrics$, }, uiSettings: { asScopedToClient: startDeps.core.uiSettings.asScopedToClient }, diff --git a/src/core/server/metrics/collectors/cgroup.test.ts b/src/core/server/metrics/collectors/cgroup.test.ts new file mode 100644 index 0000000000000..39f917b9f0ba1 --- /dev/null +++ b/src/core/server/metrics/collectors/cgroup.test.ts @@ -0,0 +1,115 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import mockFs from 'mock-fs'; +import { OsCgroupMetricsCollector } from './cgroup'; + +describe('OsCgroupMetricsCollector', () => { + afterEach(() => mockFs.restore()); + + it('returns empty object when no cgroup file present', async () => { + mockFs({ + '/proc/self': { + /** empty directory */ + }, + }); + + const collector = new OsCgroupMetricsCollector({}); + expect(await collector.collect()).toEqual({}); + }); + + it('collects default cgroup data', async () => { + mockFs({ + '/proc/self/cgroup': ` +123:memory:/groupname +123:cpu:/groupname +123:cpuacct:/groupname + `, + '/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage': '111', + '/sys/fs/cgroup/cpu/groupname/cpu.cfs_period_us': '222', + '/sys/fs/cgroup/cpu/groupname/cpu.cfs_quota_us': '333', + '/sys/fs/cgroup/cpu/groupname/cpu.stat': ` +nr_periods 444 +nr_throttled 555 +throttled_time 666 + `, + }); + + const collector = new OsCgroupMetricsCollector({}); + expect(await collector.collect()).toMatchInlineSnapshot(` + Object { + "cpu": Object { + "cfs_period_micros": 222, + "cfs_quota_micros": 333, + "control_group": "/groupname", + "stat": Object { + "number_of_elapsed_periods": 444, + "number_of_times_throttled": 555, + "time_throttled_nanos": 666, + }, + }, + "cpuacct": Object { + "control_group": "/groupname", + "usage_nanos": 111, + }, + } + `); + }); + + it('collects override cgroup data', async () => { + mockFs({ + '/proc/self/cgroup': ` +123:memory:/groupname +123:cpu:/groupname +123:cpuacct:/groupname + `, + '/sys/fs/cgroup/cpuacct/xxcustomcpuacctxx/cpuacct.usage': '111', + '/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_period_us': '222', + '/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_quota_us': '333', + '/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.stat': ` +nr_periods 444 +nr_throttled 555 +throttled_time 666 + `, + }); + + const collector = new OsCgroupMetricsCollector({ + cpuAcctPath: 'xxcustomcpuacctxx', + cpuPath: 'xxcustomcpuxx', + }); + expect(await collector.collect()).toMatchInlineSnapshot(` + Object { + "cpu": Object { + "cfs_period_micros": 222, + "cfs_quota_micros": 333, + "control_group": "xxcustomcpuxx", + "stat": Object { + "number_of_elapsed_periods": 444, + "number_of_times_throttled": 555, + "time_throttled_nanos": 666, + }, + }, + "cpuacct": Object { + "control_group": "xxcustomcpuacctxx", + "usage_nanos": 111, + }, + } + `); + }); +}); diff --git a/src/core/server/metrics/collectors/cgroup.ts b/src/core/server/metrics/collectors/cgroup.ts new file mode 100644 index 0000000000000..867ea44dff1ae --- /dev/null +++ b/src/core/server/metrics/collectors/cgroup.ts @@ -0,0 +1,194 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import fs from 'fs'; +import { join as joinPath } from 'path'; +import { MetricsCollector, OpsOsMetrics } from './types'; + +type OsCgroupMetrics = Pick; + +interface OsCgroupMetricsCollectorOptions { + cpuPath?: string; + cpuAcctPath?: string; +} + +export class OsCgroupMetricsCollector implements MetricsCollector { + /** Used to prevent unnecessary file reads on systems not using cgroups. */ + private noCgroupPresent = false; + private cpuPath?: string; + private cpuAcctPath?: string; + + constructor(private readonly options: OsCgroupMetricsCollectorOptions) {} + + public async collect(): Promise { + try { + await this.initializePaths(); + if (this.noCgroupPresent || !this.cpuAcctPath || !this.cpuPath) { + return {}; + } + + const [cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat] = await Promise.all([ + readCPUAcctUsage(this.cpuAcctPath), + readCPUFsPeriod(this.cpuPath), + readCPUFsQuota(this.cpuPath), + readCPUStat(this.cpuPath), + ]); + + return { + cpuacct: { + control_group: this.cpuAcctPath, + usage_nanos: cpuAcctUsage, + }, + + cpu: { + control_group: this.cpuPath, + cfs_period_micros: cpuFsPeriod, + cfs_quota_micros: cpuFsQuota, + stat: cpuStat, + }, + }; + } catch (err) { + if (err.code === 'ENOENT') { + this.noCgroupPresent = true; + return {}; + } else { + throw err; + } + } + } + + public reset() {} + + private async initializePaths() { + // Perform this setup lazily on the first collect call and then memoize the results. + // Makes the assumption this data doesn't change while the process is running. + if (this.cpuPath && this.cpuAcctPath) { + return; + } + + // Only read the file if both options are undefined. + if (!this.options.cpuPath || !this.options.cpuAcctPath) { + const cgroups = await readControlGroups(); + this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU]; + this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT]; + } else { + this.cpuPath = this.options.cpuPath; + this.cpuAcctPath = this.options.cpuAcctPath; + } + + // prevents undefined cgroup paths + if (!this.cpuPath || !this.cpuAcctPath) { + this.noCgroupPresent = true; + } + } +} + +const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)'); +const CONTROLLER_SEPARATOR_RE = ','; + +const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup'; +const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu'; +const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct'; + +const GROUP_CPUACCT = 'cpuacct'; +const CPUACCT_USAGE_FILE = 'cpuacct.usage'; + +const GROUP_CPU = 'cpu'; +const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us'; +const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us'; +const CPU_STATS_FILE = 'cpu.stat'; + +async function readControlGroups() { + const data = await fs.promises.readFile(PROC_SELF_CGROUP_FILE); + + return data + .toString() + .split(/\n/) + .reduce((acc, line) => { + const matches = line.match(CONTROL_GROUP_RE); + + if (matches !== null) { + const controllers = matches[1].split(CONTROLLER_SEPARATOR_RE); + controllers.forEach((controller) => { + acc[controller] = matches[2]; + }); + } + + return acc; + }, {} as Record); +} + +async function fileContentsToInteger(path: string) { + const data = await fs.promises.readFile(path); + return parseInt(data.toString(), 10); +} + +function readCPUAcctUsage(controlGroup: string) { + return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE)); +} + +function readCPUFsPeriod(controlGroup: string) { + return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE)); +} + +function readCPUFsQuota(controlGroup: string) { + return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE)); +} + +async function readCPUStat(controlGroup: string) { + const stat = { + number_of_elapsed_periods: -1, + number_of_times_throttled: -1, + time_throttled_nanos: -1, + }; + + try { + const data = await fs.promises.readFile( + joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE) + ); + return data + .toString() + .split(/\n/) + .reduce((acc, line) => { + const fields = line.split(/\s+/); + + switch (fields[0]) { + case 'nr_periods': + acc.number_of_elapsed_periods = parseInt(fields[1], 10); + break; + + case 'nr_throttled': + acc.number_of_times_throttled = parseInt(fields[1], 10); + break; + + case 'throttled_time': + acc.time_throttled_nanos = parseInt(fields[1], 10); + break; + } + + return acc; + }, stat); + } catch (err) { + if (err.code === 'ENOENT') { + return stat; + } + + throw err; + } +} diff --git a/src/core/server/metrics/collectors/collector.mock.ts b/src/core/server/metrics/collectors/collector.mock.ts new file mode 100644 index 0000000000000..2a942e1fafe63 --- /dev/null +++ b/src/core/server/metrics/collectors/collector.mock.ts @@ -0,0 +1,33 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { MetricsCollector } from './types'; + +const createCollector = (collectReturnValue: any = {}): jest.Mocked> => { + const collector: jest.Mocked> = { + collect: jest.fn().mockResolvedValue(collectReturnValue), + reset: jest.fn(), + }; + + return collector; +}; + +export const metricsCollectorMock = { + create: createCollector, +}; diff --git a/src/core/server/metrics/collectors/index.ts b/src/core/server/metrics/collectors/index.ts index f58ab02e63881..4540cb79be74b 100644 --- a/src/core/server/metrics/collectors/index.ts +++ b/src/core/server/metrics/collectors/index.ts @@ -18,6 +18,6 @@ */ export { OpsProcessMetrics, OpsOsMetrics, OpsServerMetrics, MetricsCollector } from './types'; -export { OsMetricsCollector } from './os'; +export { OsMetricsCollector, OpsMetricsCollectorOptions } from './os'; export { ProcessMetricsCollector } from './process'; export { ServerMetricsCollector } from './server'; diff --git a/src/core/server/metrics/collectors/os.test.mocks.ts b/src/core/server/metrics/collectors/os.test.mocks.ts new file mode 100644 index 0000000000000..ee02b8c802151 --- /dev/null +++ b/src/core/server/metrics/collectors/os.test.mocks.ts @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { metricsCollectorMock } from './collector.mock'; + +export const cgroupCollectorMock = metricsCollectorMock.create(); +jest.doMock('./cgroup', () => ({ + OsCgroupMetricsCollector: jest.fn(() => cgroupCollectorMock), +})); diff --git a/src/core/server/metrics/collectors/os.test.ts b/src/core/server/metrics/collectors/os.test.ts index 7d5a6da90b7d6..5e52cecb76be3 100644 --- a/src/core/server/metrics/collectors/os.test.ts +++ b/src/core/server/metrics/collectors/os.test.ts @@ -20,6 +20,7 @@ jest.mock('getos', () => (cb: Function) => cb(null, { dist: 'distrib', release: 'release' })); import os from 'os'; +import { cgroupCollectorMock } from './os.test.mocks'; import { OsMetricsCollector } from './os'; describe('OsMetricsCollector', () => { @@ -27,6 +28,8 @@ describe('OsMetricsCollector', () => { beforeEach(() => { collector = new OsMetricsCollector(); + cgroupCollectorMock.collect.mockReset(); + cgroupCollectorMock.reset.mockReset(); }); afterEach(() => { @@ -96,4 +99,9 @@ describe('OsMetricsCollector', () => { '15m': fifteenMinLoad, }); }); + + it('calls the cgroup sub-collector', async () => { + await collector.collect(); + expect(cgroupCollectorMock.collect).toHaveBeenCalled(); + }); }); diff --git a/src/core/server/metrics/collectors/os.ts b/src/core/server/metrics/collectors/os.ts index 59bef9d8ddd2b..eae49278405a9 100644 --- a/src/core/server/metrics/collectors/os.ts +++ b/src/core/server/metrics/collectors/os.ts @@ -21,10 +21,22 @@ import os from 'os'; import getosAsync, { LinuxOs } from 'getos'; import { promisify } from 'util'; import { OpsOsMetrics, MetricsCollector } from './types'; +import { OsCgroupMetricsCollector } from './cgroup'; const getos = promisify(getosAsync); +export interface OpsMetricsCollectorOptions { + cpuPath?: string; + cpuAcctPath?: string; +} + export class OsMetricsCollector implements MetricsCollector { + private readonly cgroupCollector: OsCgroupMetricsCollector; + + constructor(options: OpsMetricsCollectorOptions = {}) { + this.cgroupCollector = new OsCgroupMetricsCollector(options); + } + public async collect(): Promise { const platform = os.platform(); const load = os.loadavg(); @@ -43,20 +55,30 @@ export class OsMetricsCollector implements MetricsCollector { used_in_bytes: os.totalmem() - os.freemem(), }, uptime_in_millis: os.uptime() * 1000, + ...(await this.getDistroStats(platform)), + ...(await this.cgroupCollector.collect()), }; + return metrics; + } + + public reset() {} + + private async getDistroStats( + platform: string + ): Promise> { if (platform === 'linux') { try { const distro = (await getos()) as LinuxOs; - metrics.distro = distro.dist; - metrics.distroRelease = `${distro.dist}-${distro.release}`; + return { + distro: distro.dist, + distroRelease: `${distro.dist}-${distro.release}`, + }; } catch (e) { // ignore errors } } - return metrics; + return {}; } - - public reset() {} } diff --git a/src/core/server/metrics/collectors/types.ts b/src/core/server/metrics/collectors/types.ts index 73e8975a6b362..77ea13a1f0787 100644 --- a/src/core/server/metrics/collectors/types.ts +++ b/src/core/server/metrics/collectors/types.ts @@ -85,6 +85,33 @@ export interface OpsOsMetrics { }; /** the OS uptime */ uptime_in_millis: number; + + /** cpu accounting metrics, undefined when not running in a cgroup */ + cpuacct?: { + /** name of this process's cgroup */ + control_group: string; + /** cpu time used by this process's cgroup */ + usage_nanos: number; + }; + + /** cpu cgroup metrics, undefined when not running in a cgroup */ + cpu?: { + /** name of this process's cgroup */ + control_group: string; + /** the length of the cfs period */ + cfs_period_micros: number; + /** total available run-time within a cfs period */ + cfs_quota_micros: number; + /** current stats on the cfs periods */ + stat: { + /** number of cfs periods that elapsed */ + number_of_elapsed_periods: number; + /** number of times the cgroup has been throttled */ + number_of_times_throttled: number; + /** total amount of time the cgroup has been throttled for */ + time_throttled_nanos: number; + }; + }; } /** diff --git a/src/core/server/metrics/metrics_service.mock.ts b/src/core/server/metrics/metrics_service.mock.ts index 769f6ee2a549a..2af653004a479 100644 --- a/src/core/server/metrics/metrics_service.mock.ts +++ b/src/core/server/metrics/metrics_service.mock.ts @@ -21,20 +21,18 @@ import { MetricsService } from './metrics_service'; import { InternalMetricsServiceSetup, InternalMetricsServiceStart, + MetricsServiceSetup, MetricsServiceStart, } from './types'; const createInternalSetupContractMock = () => { - const setupContract: jest.Mocked = {}; - return setupContract; -}; - -const createStartContractMock = () => { - const startContract: jest.Mocked = { + const setupContract: jest.Mocked = { + collectionInterval: 30000, getOpsMetrics$: jest.fn(), }; - startContract.getOpsMetrics$.mockReturnValue( + setupContract.getOpsMetrics$.mockReturnValue( new BehaviorSubject({ + collected_at: new Date('2020-01-01 01:00:00'), process: { memory: { heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 }, @@ -56,11 +54,21 @@ const createStartContractMock = () => { concurrent_connections: 1, }) ); + return setupContract; +}; + +const createSetupContractMock = () => { + const startContract: jest.Mocked = createInternalSetupContractMock(); return startContract; }; const createInternalStartContractMock = () => { - const startContract: jest.Mocked = createStartContractMock(); + const startContract: jest.Mocked = createInternalSetupContractMock(); + return startContract; +}; + +const createStartContractMock = () => { + const startContract: jest.Mocked = createInternalSetupContractMock(); return startContract; }; @@ -77,7 +85,7 @@ const createMock = () => { export const metricsServiceMock = { create: createMock, - createSetupContract: createStartContractMock, + createSetupContract: createSetupContractMock, createStartContract: createStartContractMock, createInternalSetupContract: createInternalSetupContractMock, createInternalStartContract: createInternalStartContractMock, diff --git a/src/core/server/metrics/metrics_service.ts b/src/core/server/metrics/metrics_service.ts index f28fb21aaac0d..d4696b3aa9aaf 100644 --- a/src/core/server/metrics/metrics_service.ts +++ b/src/core/server/metrics/metrics_service.ts @@ -17,7 +17,7 @@ * under the License. */ -import { Subject } from 'rxjs'; +import { ReplaySubject } from 'rxjs'; import { first } from 'rxjs/operators'; import { CoreService } from '../../types'; import { CoreContext } from '../core_context'; @@ -37,26 +37,21 @@ export class MetricsService private readonly logger: Logger; private metricsCollector?: OpsMetricsCollector; private collectInterval?: NodeJS.Timeout; - private metrics$ = new Subject(); + private metrics$ = new ReplaySubject(); + private service?: InternalMetricsServiceSetup; constructor(private readonly coreContext: CoreContext) { this.logger = coreContext.logger.get('metrics'); } public async setup({ http }: MetricsServiceSetupDeps): Promise { - this.metricsCollector = new OpsMetricsCollector(http.server); - return {}; - } - - public async start(): Promise { - if (!this.metricsCollector) { - throw new Error('#setup() needs to be run first'); - } const config = await this.coreContext.configService .atPath(opsConfig.path) .pipe(first()) .toPromise(); + this.metricsCollector = new OpsMetricsCollector(http.server, config.cGroupOverrides); + await this.refreshMetrics(); this.collectInterval = setInterval(() => { @@ -65,9 +60,20 @@ export class MetricsService const metricsObservable = this.metrics$.asObservable(); - return { + this.service = { + collectionInterval: config.interval.asMilliseconds(), getOpsMetrics$: () => metricsObservable, }; + + return this.service; + } + + public async start(): Promise { + if (!this.service) { + throw new Error('#setup() needs to be run first'); + } + + return this.service; } private async refreshMetrics() { diff --git a/src/core/server/metrics/ops_config.ts b/src/core/server/metrics/ops_config.ts index bd6ae5cc5474d..5f3f67e931c38 100644 --- a/src/core/server/metrics/ops_config.ts +++ b/src/core/server/metrics/ops_config.ts @@ -23,6 +23,10 @@ export const opsConfig = { path: 'ops', schema: schema.object({ interval: schema.duration({ defaultValue: '5s' }), + cGroupOverrides: schema.object({ + cpuPath: schema.maybe(schema.string()), + cpuAcctPath: schema.maybe(schema.string()), + }), }), }; diff --git a/src/core/server/metrics/ops_metrics_collector.test.ts b/src/core/server/metrics/ops_metrics_collector.test.ts index 9e76895b14578..7aa3f7cd3baf0 100644 --- a/src/core/server/metrics/ops_metrics_collector.test.ts +++ b/src/core/server/metrics/ops_metrics_collector.test.ts @@ -30,7 +30,7 @@ describe('OpsMetricsCollector', () => { beforeEach(() => { const hapiServer = httpServiceMock.createInternalSetupContract().server; - collector = new OpsMetricsCollector(hapiServer); + collector = new OpsMetricsCollector(hapiServer, {}); mockOsCollector.collect.mockResolvedValue('osMetrics'); }); @@ -51,6 +51,7 @@ describe('OpsMetricsCollector', () => { expect(mockServerCollector.collect).toHaveBeenCalledTimes(1); expect(metrics).toEqual({ + collected_at: expect.any(Date), process: 'processMetrics', os: 'osMetrics', requests: 'serverRequestsMetrics', diff --git a/src/core/server/metrics/ops_metrics_collector.ts b/src/core/server/metrics/ops_metrics_collector.ts index 525515dba1457..af74caa6cb386 100644 --- a/src/core/server/metrics/ops_metrics_collector.ts +++ b/src/core/server/metrics/ops_metrics_collector.ts @@ -21,6 +21,7 @@ import { Server as HapiServer } from 'hapi'; import { ProcessMetricsCollector, OsMetricsCollector, + OpsMetricsCollectorOptions, ServerMetricsCollector, MetricsCollector, } from './collectors'; @@ -31,9 +32,9 @@ export class OpsMetricsCollector implements MetricsCollector { private readonly osCollector: OsMetricsCollector; private readonly serverCollector: ServerMetricsCollector; - constructor(server: HapiServer) { + constructor(server: HapiServer, opsOptions: OpsMetricsCollectorOptions) { this.processCollector = new ProcessMetricsCollector(); - this.osCollector = new OsMetricsCollector(); + this.osCollector = new OsMetricsCollector(opsOptions); this.serverCollector = new ServerMetricsCollector(server); } @@ -44,6 +45,7 @@ export class OpsMetricsCollector implements MetricsCollector { this.serverCollector.collect(), ]); return { + collected_at: new Date(), process, os, ...server, diff --git a/src/core/server/metrics/types.ts b/src/core/server/metrics/types.ts index cbf0acacd6bab..c177b3ed25115 100644 --- a/src/core/server/metrics/types.ts +++ b/src/core/server/metrics/types.ts @@ -20,14 +20,15 @@ import { Observable } from 'rxjs'; import { OpsProcessMetrics, OpsOsMetrics, OpsServerMetrics } from './collectors'; -// eslint-disable-next-line @typescript-eslint/no-empty-interface -export interface MetricsServiceSetup {} /** * APIs to retrieves metrics gathered and exposed by the core platform. * * @public */ -export interface MetricsServiceStart { +export interface MetricsServiceSetup { + /** Interval metrics are collected in milliseconds */ + readonly collectionInterval: number; + /** * Retrieve an observable emitting the {@link OpsMetrics} gathered. * The observable will emit an initial value during core's `start` phase, and a new value every fixed interval of time, @@ -42,6 +43,12 @@ export interface MetricsServiceStart { */ getOpsMetrics$: () => Observable; } +/** + * {@inheritdoc MetricsServiceSetup} + * + * @public + */ +export type MetricsServiceStart = MetricsServiceSetup; export type InternalMetricsServiceSetup = MetricsServiceSetup; export type InternalMetricsServiceStart = MetricsServiceStart; @@ -53,6 +60,8 @@ export type InternalMetricsServiceStart = MetricsServiceStart; * @public */ export interface OpsMetrics { + /** Time metrics were recorded at. */ + collected_at: Date; /** Process related metrics */ process: OpsProcessMetrics; /** OS related metrics */ diff --git a/src/core/server/plugins/plugin_context.ts b/src/core/server/plugins/plugin_context.ts index fa2659ca130a0..5c389855d9ea2 100644 --- a/src/core/server/plugins/plugin_context.ts +++ b/src/core/server/plugins/plugin_context.ts @@ -233,6 +233,7 @@ export function createPluginStartContext( getTypeRegistry: deps.savedObjects.getTypeRegistry, }, metrics: { + collectionInterval: deps.metrics.collectionInterval, getOpsMetrics$: deps.metrics.getOpsMetrics$, }, uiSettings: { diff --git a/src/core/server/server.api.md b/src/core/server/server.api.md index 37023a0a8ef67..b86cc14636b8c 100644 --- a/src/core/server/server.api.md +++ b/src/core/server/server.api.md @@ -1531,10 +1531,10 @@ export interface LogRecord { timestamp: Date; } -// Warning: (ae-missing-release-tag) "MetricsServiceSetup" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal) -// -// @public (undocumented) +// @public export interface MetricsServiceSetup { + readonly collectionInterval: number; + getOpsMetrics$: () => Observable; } // @public @deprecated (undocumented) @@ -1621,6 +1621,7 @@ export interface OnPreRoutingToolkit { // @public export interface OpsMetrics { + collected_at: Date; concurrent_connections: OpsServerMetrics['concurrent_connections']; os: OpsOsMetrics; process: OpsProcessMetrics; @@ -1630,6 +1631,20 @@ export interface OpsMetrics { // @public export interface OpsOsMetrics { + cpu?: { + control_group: string; + cfs_period_micros: number; + cfs_quota_micros: number; + stat: { + number_of_elapsed_periods: number; + number_of_times_throttled: number; + time_throttled_nanos: number; + }; + }; + cpuacct?: { + control_group: string; + usage_nanos: number; + }; distro?: string; distroRelease?: string; load: { diff --git a/src/plugins/kibana_usage_collection/server/collectors/ops_stats/index.test.ts b/src/plugins/kibana_usage_collection/server/collectors/ops_stats/index.test.ts index 359d3a396665d..a527d4d03c6fc 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/ops_stats/index.test.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/ops_stats/index.test.ts @@ -39,6 +39,7 @@ describe('telemetry_ops_stats', () => { const callCluster = jest.fn(); const metric: OpsMetrics = { + collected_at: new Date('2020-01-01 01:00:00'), process: { memory: { heap: { diff --git a/src/plugins/kibana_usage_collection/server/collectors/ops_stats/ops_stats_collector.ts b/src/plugins/kibana_usage_collection/server/collectors/ops_stats/ops_stats_collector.ts index 6e8b71d675f7b..d3be601540582 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/ops_stats/ops_stats_collector.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/ops_stats/ops_stats_collector.ts @@ -18,13 +18,13 @@ */ import { Observable } from 'rxjs'; -import { cloneDeep } from 'lodash'; +import { cloneDeep, omit } from 'lodash'; import moment from 'moment'; import { OpsMetrics } from 'kibana/server'; import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { KIBANA_STATS_TYPE } from '../../../common/constants'; -interface OpsStatsMetrics extends Omit { +interface OpsStatsMetrics extends Omit { timestamp: string; response_times: { average: number; @@ -52,9 +52,9 @@ export function getOpsStatsCollector( // @ts-expect-error delete metrics.requests.statusCodes; lastMetrics = { - ...metrics, + ...omit(metrics, ['collected_at']), response_times: responseTimes, - timestamp: moment.utc().toISOString(), + timestamp: moment.utc(metrics.collected_at).toISOString(), }; });