This repository has been archived by the owner on Aug 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add in tutorial on adding in additional node metrics for GKE nodes us…
…ing the cloud monitoring agent
- Loading branch information
echiugoog
committed
Jun 19, 2020
1 parent
88cd2a2
commit 5a9b0fb
Showing
8 changed files
with
204 additions
and
0 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
tutorials/gke-node-agent-metrics-cloud-monitoring/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Base image for containerized monitoring agent | ||
ARG BASE_IMAGE_TAG=latest | ||
FROM marketplace.gcr.io/google/debian9:${BASE_IMAGE_TAG} | ||
|
||
USER root | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
curl \ | ||
gnupg2 \ | ||
ca-certificates | ||
|
||
ADD https://dl.google.com/cloudagents/install-monitoring-agent.sh /install-monitoring-agent.sh | ||
|
||
RUN bash /install-monitoring-agent.sh | ||
|
||
RUN apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/*_* | ||
|
||
|
||
COPY collectd.conf /etc/collectd/collectd.conf | ||
COPY run.sh /run.sh | ||
|
||
RUN ["chmod", "+x", "/run.sh"] | ||
|
||
CMD /run.sh |
28 changes: 28 additions & 0 deletions
28
tutorials/gke-node-agent-metrics-cloud-monitoring/agent.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
apiVersion: apps/v1 | ||
kind: DaemonSet | ||
metadata: | ||
name: [IMAGE_NAME] | ||
labels: | ||
k8s-app: [IMAGE_NAME] | ||
spec: | ||
selector: | ||
matchLabels: | ||
name: [IMAGE_NAME] | ||
template: | ||
metadata: | ||
labels: | ||
name: [IMAGE_NAME] | ||
spec: | ||
containers: | ||
- name: [IMAGE_NAME] | ||
image: gcr.io/[PROJECT_ID]/[IMAGE_NAME] | ||
securityContext: | ||
privileged: true | ||
volumeMounts: | ||
- name: host | ||
mountPath: /mnt/host | ||
readOnly: true | ||
volumes: | ||
- name: host | ||
hostPath: | ||
path: / |
6 changes: 6 additions & 0 deletions
6
tutorials/gke-node-agent-metrics-cloud-monitoring/cloudbuild.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
steps: | ||
- name: 'gcr.io/cloud-builders/docker' | ||
args: ['build', '-t', 'gcr.io/[PROJECT_ID]/[IMAGE_NAME]', '.'] | ||
- name: 'gcr.io/cloud-builders/docker' | ||
args: ['push', 'gcr.io/[PROJECT_ID]/[IMAGE_NAME]'] | ||
images: ['gcr.io/[PROJECT_ID]/[IMAGE_NAME]'] |
78 changes: 78 additions & 0 deletions
78
tutorials/gke-node-agent-metrics-cloud-monitoring/collectd.conf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
Interval 60 | ||
|
||
Hostname "" | ||
|
||
# The Stackdriver agent does not use fully qualified domain names. | ||
FQDNLookup false | ||
|
||
# if you have other config, especially for plugins, you can drop them | ||
# into this directory | ||
Include "/etc/collectd/collectd.d/" | ||
|
||
LoadPlugin df | ||
<Plugin "df"> | ||
FSType "devfs" | ||
IgnoreSelected true | ||
ReportByDevice true | ||
ValuesPercentage true | ||
</Plugin> | ||
|
||
LoadPlugin cpu | ||
<Plugin "cpu"> | ||
ValuesPercentage true | ||
ReportByCpu false | ||
</Plugin> | ||
LoadPlugin swap | ||
<Plugin "swap"> | ||
ValuesPercentage true | ||
</Plugin> | ||
LoadPlugin interface | ||
LoadPlugin disk | ||
LoadPlugin load | ||
LoadPlugin memory | ||
<Plugin "memory"> | ||
ValuesPercentage true | ||
</Plugin> | ||
LoadPlugin processes | ||
LoadPlugin tcpconns | ||
|
||
<Plugin "processes"> | ||
ProcessMatch "all" ".*" | ||
Detail "ps_cputime" | ||
Detail "ps_disk_octets" | ||
Detail "ps_rss" | ||
Detail "ps_vm" | ||
</Plugin> | ||
|
||
<Plugin "disk"> | ||
# No config - collectd fails parsing configuration if tag is empty. | ||
</Plugin> | ||
|
||
<Plugin "tcpconns"> | ||
AllPortsSummary true | ||
</Plugin> | ||
|
||
LoadPlugin match_regex | ||
LoadPlugin target_set | ||
LoadPlugin stackdriver_agent | ||
LoadPlugin write_gcm | ||
LoadPlugin write_log | ||
LoadPlugin aggregation | ||
LoadPlugin match_throttle_metadata_keys | ||
|
||
<Plugin "write_log"> | ||
Format JSON | ||
</Plugin> | ||
|
||
PostCacheChain "PostCache" | ||
<Chain "PostCache"> | ||
<Rule "otherwise"> | ||
<Match "throttle_metadata_keys"> | ||
OKToThrottle true | ||
</Match> | ||
<Target "write"> | ||
Plugin "write_gcm" | ||
Plugin "write_log" | ||
</Target> | ||
</Rule> | ||
</Chain> |
Binary file added
BIN
+84.7 KB
tutorials/gke-node-agent-metrics-cloud-monitoring/images/sd-agent-metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+41.1 KB
tutorials/gke-node-agent-metrics-cloud-monitoring/images/sd-explorer.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 59 additions & 0 deletions
59
tutorials/gke-node-agent-metrics-cloud-monitoring/index.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
--- | ||
title: Collecting additional GKE Node metrics using collectd to Cloud Monitoring | ||
description: Learn how to deploy the Cloud Monitoring agent on GKE nodes to expose additional VM metrics on GKE nodes | ||
author: aaronsutton, echiugoog | ||
tags: Google Container Engine, GKE, Cloud Monitoring, collectd, host metrics, VM metrics | ||
--- | ||
Today only a few metrics are avaialble by default on GKE Nodes. | ||
|
||
You can deploy a Cloud Monitoring agent to expose additional metrics for added visbiility into the health of your GKE nodes | ||
|
||
## Objectives | ||
Expose additional host metrics using the Cloud Monitoring agent on GKE Nodes. e.g. [Cloud Monitoring Agent Metrics](https://cloud.google.com/monitoring/api/metrics_agent) | ||
|
||
Host metrics available today: | ||
* CPU Usage | ||
* Disk I/O | ||
* Network traffic | ||
|
||
Additional metrics added with Cloud Monitoring agent: | ||
* CPU Load | ||
* CPU Steal | ||
* Memory Usage | ||
* Swap Usage | ||
* Disk Usage | ||
* Open TCP Connections | ||
* Processes | ||
|
||
Even more metrics can be added by customizing `collectd.conf` to meet your needs. | ||
|
||
## Before you begin | ||
* You have an existing project and GKE cluster created - [quickstart tutorial](https://cloud.google.com/kubernetes-engine/docs/quickstart) | ||
* Install the [Google Cloud SDK](https://cloud.google.com/sdk/) | ||
|
||
## Getting started | ||
* Clone this repository - | ||
|
||
## Build the container iamge | ||
* Update `cloudbuild.yaml` | ||
* Where: | ||
* `[PROJECT_ID]` is your Google Cloud project ID | ||
* `[IMAGE_NAME]` is the desired name of the container image | ||
* submit cloud build, this will publish to Container Registry (GCR) on | ||
completion: `gcloud builds submit --config cloudbuild-ec.yaml .` | ||
|
||
## Deploy the daemonset | ||
* Update `agent.yaml` | ||
* Where: | ||
* `[PROJECT_ID]` is your Google Cloud project ID | ||
* `[IMAGE_NAME]` is the name of the container image used above when building | ||
the container image | ||
* Deploy `kubectl apply -f agent.yaml` | ||
|
||
|
||
## (optional) Customize the Cloud Monitoring agent | ||
* Edit `collectd.conf` to add in additional desired metrics | ||
* Rebuild container image and redeploy daemonset | ||
* NB: Add in any new dependencies that may be required for metric collection | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
configuration_file="/etc/collectd/collectd.conf" | ||
monitored_resource=$(curl --silent -f -H 'Metadata-Flavor: Google' http://169.254.169.254/computeMetadata/v1/instance/id 2>/dev/null) | ||
|
||
sed -i "s/%MONITORED_RESOURCE%/$monitored_resource/" "$configuration_file" | ||
|
||
/opt/stackdriver/collectd/sbin/stackdriver-collectd -f -C "$configuration_file" |