This repository has been archived by the owner on Aug 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tutorial for adding additional node metrics for GKE nodes using monit…
…oring agent in a container (#1308) * Add in tutorial on adding in additional node metrics for GKE nodes using the cloud monitoring agent * fix frontmatter for multiple authors * fix frontmatter * first pass for structure and copy-editing * fix a couple of spelling mistakes * adding more explicit information about cloning repository * added details about viewing metrics * updated image links to public bucket * editing pass on new material * Update index.md Co-authored-by: Edwin Chiu <edwinchiu@google.com> Co-authored-by: Todd Kopriva <43478937+ToddKopriva@users.noreply.github.com> Co-authored-by: Aaron Sutton <aaron@asutton.net>
- Loading branch information
1 parent
31f0791
commit ae657cc
Showing
8 changed files
with
257 additions
and
0 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
tutorials/gke-node-agent-metrics-cloud-monitoring/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Base image for containerized monitoring agent | ||
ARG BASE_IMAGE_TAG=latest | ||
FROM marketplace.gcr.io/google/debian9:${BASE_IMAGE_TAG} | ||
|
||
USER root | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
curl \ | ||
gnupg2 \ | ||
ca-certificates | ||
|
||
ADD https://dl.google.com/cloudagents/install-monitoring-agent.sh /install-monitoring-agent.sh | ||
|
||
RUN bash /install-monitoring-agent.sh | ||
|
||
RUN apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/*_* | ||
|
||
|
||
COPY collectd.conf /etc/collectd/collectd.conf | ||
COPY run.sh /run.sh | ||
|
||
RUN ["chmod", "+x", "/run.sh"] | ||
|
||
CMD /run.sh |
28 changes: 28 additions & 0 deletions
28
tutorials/gke-node-agent-metrics-cloud-monitoring/agent.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
apiVersion: apps/v1 | ||
kind: DaemonSet | ||
metadata: | ||
name: [IMAGE_NAME] | ||
labels: | ||
k8s-app: [IMAGE_NAME] | ||
spec: | ||
selector: | ||
matchLabels: | ||
name: [IMAGE_NAME] | ||
template: | ||
metadata: | ||
labels: | ||
name: [IMAGE_NAME] | ||
spec: | ||
containers: | ||
- name: [IMAGE_NAME] | ||
image: gcr.io/[PROJECT_ID]/[IMAGE_NAME] | ||
securityContext: | ||
privileged: true | ||
volumeMounts: | ||
- name: host | ||
mountPath: /mnt/host | ||
readOnly: true | ||
volumes: | ||
- name: host | ||
hostPath: | ||
path: / |
6 changes: 6 additions & 0 deletions
6
tutorials/gke-node-agent-metrics-cloud-monitoring/cloudbuild.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
steps: | ||
- name: 'gcr.io/cloud-builders/docker' | ||
args: ['build', '-t', 'gcr.io/[PROJECT_ID]/[IMAGE_NAME]', '.'] | ||
- name: 'gcr.io/cloud-builders/docker' | ||
args: ['push', 'gcr.io/[PROJECT_ID]/[IMAGE_NAME]'] | ||
images: ['gcr.io/[PROJECT_ID]/[IMAGE_NAME]'] |
78 changes: 78 additions & 0 deletions
78
tutorials/gke-node-agent-metrics-cloud-monitoring/collectd.conf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
Interval 60 | ||
|
||
Hostname "" | ||
|
||
# The Stackdriver agent does not use fully qualified domain names. | ||
FQDNLookup false | ||
|
||
# if you have other config, especially for plugins, you can drop them | ||
# into this directory | ||
Include "/etc/collectd/collectd.d/" | ||
|
||
LoadPlugin df | ||
<Plugin "df"> | ||
FSType "devfs" | ||
IgnoreSelected true | ||
ReportByDevice true | ||
ValuesPercentage true | ||
</Plugin> | ||
|
||
LoadPlugin cpu | ||
<Plugin "cpu"> | ||
ValuesPercentage true | ||
ReportByCpu false | ||
</Plugin> | ||
LoadPlugin swap | ||
<Plugin "swap"> | ||
ValuesPercentage true | ||
</Plugin> | ||
LoadPlugin interface | ||
LoadPlugin disk | ||
LoadPlugin load | ||
LoadPlugin memory | ||
<Plugin "memory"> | ||
ValuesPercentage true | ||
</Plugin> | ||
LoadPlugin processes | ||
LoadPlugin tcpconns | ||
|
||
<Plugin "processes"> | ||
ProcessMatch "all" ".*" | ||
Detail "ps_cputime" | ||
Detail "ps_disk_octets" | ||
Detail "ps_rss" | ||
Detail "ps_vm" | ||
</Plugin> | ||
|
||
<Plugin "disk"> | ||
# No config - collectd fails parsing configuration if tag is empty. | ||
</Plugin> | ||
|
||
<Plugin "tcpconns"> | ||
AllPortsSummary true | ||
</Plugin> | ||
|
||
LoadPlugin match_regex | ||
LoadPlugin target_set | ||
LoadPlugin stackdriver_agent | ||
LoadPlugin write_gcm | ||
LoadPlugin write_log | ||
LoadPlugin aggregation | ||
LoadPlugin match_throttle_metadata_keys | ||
|
||
<Plugin "write_log"> | ||
Format JSON | ||
</Plugin> | ||
|
||
PostCacheChain "PostCache" | ||
<Chain "PostCache"> | ||
<Rule "otherwise"> | ||
<Match "throttle_metadata_keys"> | ||
OKToThrottle true | ||
</Match> | ||
<Target "write"> | ||
Plugin "write_gcm" | ||
Plugin "write_log" | ||
</Target> | ||
</Rule> | ||
</Chain> |
Binary file added
BIN
+84.7 KB
tutorials/gke-node-agent-metrics-cloud-monitoring/images/sd-agent-metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+41.1 KB
tutorials/gke-node-agent-metrics-cloud-monitoring/images/sd-explorer.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
112 changes: 112 additions & 0 deletions
112
tutorials/gke-node-agent-metrics-cloud-monitoring/index.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
--- | ||
title: Collect additional GKE node metrics using collectd with Cloud Monitoring | ||
description: Learn how to deploy the Cloud Monitoring agent on GKE nodes to expose additional VM metrics on GKE nodes. | ||
author: aaronsutton,echiugoog | ||
tags: host metrics | ||
date_published: 2020-08-07 | ||
--- | ||
|
||
Only a few metrics are available by default on GKE nodes. You can deploy a Cloud Monitoring agent to expose additional metrics for added visibility into the | ||
health of your GKE nodes. | ||
|
||
## Objectives | ||
|
||
Expose additional host metrics using the Cloud Monitoring agent on GKE nodes. | ||
|
||
Host metrics available by default: | ||
|
||
* CPU usage | ||
* Disk I/O | ||
* Network traffic | ||
|
||
Metrics added with the Cloud Monitoring agent: | ||
|
||
* CPU load | ||
* CPU steal | ||
* Memory usage | ||
* Swap usage | ||
* Disk usage | ||
* Open TCP connections | ||
* Processes | ||
|
||
For details about the metrics exposed by the Cloud Monitoring agent, see [Agent metrics](https://cloud.google.com/monitoring/api/metrics_agent). | ||
|
||
Even more metrics can be added by customizing | ||
[`collectd.conf`](https://github.com/GoogleCloudPlatform/community/blob/master/tutorials/gke-node-agent-metrics-cloud-monitoring/collectd.conf) to meet your | ||
needs. | ||
|
||
## Before you begin | ||
|
||
1. Create a Google Cloud project and GKE cluster, as shown in [this quickstart tutorial](https://cloud.google.com/kubernetes-engine/docs/quickstart). | ||
1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/). | ||
1. Clone this repository: | ||
|
||
git clone https://github.com/GoogleCloudPlatform/community.git | ||
|
||
The files for this tutorial are in the | ||
[`/tutorials/gke-node-agent-metrics-cloud-monitoring`](https://github.com/GoogleCloudPlatform/community/blob/master/tutorials/gke-node-agent-metrics-cloud-monitoring) directory. | ||
|
||
## Build the container iamge | ||
|
||
1. Update `cloudbuild.yaml` by replacing the following values: | ||
|
||
* `[PROJECT_ID]` is your Google Cloud project ID. | ||
* `[IMAGE_NAME]` is the name of the container image. | ||
|
||
1. Build the container image with Cloud Build: | ||
|
||
gcloud builds submit --config cloudbuild.yaml . | ||
|
||
When the build finishes, the image will be published to Container Registry. | ||
|
||
## Deploy the daemonset | ||
|
||
1. Update `agent.yaml` by replacing the following values: | ||
|
||
* `[PROJECT_ID]` is your Google Cloud project ID | ||
* `[IMAGE_NAME]` is the name of the container image that you used when building the container image. | ||
|
||
1. Deploy: | ||
|
||
kubectl apply -f agent.yaml | ||
|
||
1. Check that the daemonset deployed and is ready: | ||
|
||
kubectl get ds | ||
|
||
The output should be similar to the following, where [IMAGE_NAME] is the name of your container image: | ||
|
||
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE | ||
[IMAGE_NAME] 1 1 1 1 1 <none> 29s | ||
|
||
## (optional) Customize the Cloud Monitoring agent | ||
|
||
1. Edit `collectd.conf` to expose additional metrics. | ||
1. Add any new dependencies required for metric collection. | ||
1. Rebuild the container image and redeploy the daemonset. | ||
|
||
## Viewing the metrics | ||
|
||
After deploying the daemonset, the additional metrics should begin to flow to Cloud Monitoring automatically. To view the metrics, go to the | ||
[**Monitoring**](https://console.cloud.google.com/monitoring) page in the Cloud Console. | ||
|
||
One way of examining metrics is using the [Metrics Explorer](https://console.cloud.google.com/monitoring/metrics-explorer). Because the new metrics being | ||
collected are GKE node metrics, they are visible for the Compute Engine VM instance resource type with the metric names beginning with `agent.googleapis.com`: | ||
|
||
![Metrics explorer](https://storage.googleapis.com/gcp-community/tutorials/gke-node-agent-metrics-cloud-monitoring/sd-explorer.png) | ||
|
||
If you take a detailed look at the node itself within Cloud Monitoring, you can see the additional metrics graphed within the VM instance dashboard agent tab. | ||
Go to the [**Dashboards**](https://console.cloud.google.com/monitoring/dashboards) page, and then click **VM Instances** and the instance you're interested in | ||
viewing metrics for. | ||
|
||
![Monitoring agent metrics](https://storage.googleapis.com/gcp-community/tutorials/gke-node-agent-metrics-cloud-monitoring/sd-agent-metrics.png) | ||
|
||
## Cleanup | ||
|
||
1. Delete the daemonset: | ||
|
||
kubectl delete ds [IMAGE_NAME] | ||
|
||
1. Delete the cluster you created in the **Before you begin** section: | ||
|
||
gcloud container clusters delete [CLUSTER_NAME] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
configuration_file="/etc/collectd/collectd.conf" | ||
monitored_resource=$(curl --silent -f -H 'Metadata-Flavor: Google' http://169.254.169.254/computeMetadata/v1/instance/id 2>/dev/null) | ||
|
||
sed -i "s/%MONITORED_RESOURCE%/$monitored_resource/" "$configuration_file" | ||
|
||
/opt/stackdriver/collectd/sbin/stackdriver-collectd -f -C "$configuration_file" |