Skip to content

Commit

Permalink
New metric exclusions (#8139)
Browse files Browse the repository at this point in the history
* Added prestop hook to fullnode template. Increased baklava memory

* Included new exclusions for prometheus remotewrite and stackdriver for the more frequent unused metrics

* Refined exclusion

* Update prometheus config

* Added encrypted grafana cloud credentials
  • Loading branch information
jcortejoso committed Jul 15, 2021
1 parent b099f50 commit b411b44
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 15 deletions.
Binary file modified .env.mnemonic.alfajores.enc
Binary file not shown.
Binary file modified .env.mnemonic.baklava.enc
Binary file not shown.
76 changes: 68 additions & 8 deletions packages/celotool/src/lib/prometheus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import {
fetchEnv,
fetchEnvOrFallback,
getDynamicEnvVarValue,
isProduction,
} from './env-utils'
import {
installGenericHelmChart,
Expand All @@ -33,7 +32,7 @@ const kubeServiceAccountName = releaseName
// Container registry with latest tags: https://console.cloud.google.com/gcr/images/stackdriver-prometheus/GLOBAL/stackdriver-prometheus-sidecar?gcrImageListsize=30
const sidecarImageTag = '0.8.2'
// Prometheus container registry with latest tags: https://hub.docker.com/r/prom/prometheus/tags
const prometheusImageTag = 'v2.25.0'
const prometheusImageTag = 'v2.27.1'

const grafanaHelmChartPath = '../helm-charts/grafana'
const grafanaReleaseName = 'grafana'
Expand Down Expand Up @@ -111,6 +110,30 @@ async function helmParameters(context?: string, clusterConfig?: BaseClusterConfi
'__name__!~"workqueue_.+"',
'__name__!~"nginx_.+"',
'__name__!~"etcd_.+"',
'__name__!~"erlang_.+"',
'__name__!~"container_tasks_state"',
'__name__!~"storage_.+"',
'__name__!~"container_memory_[^w].*"',
'__name__!~"rest_client_.+"',
'__name__!~"container_fs_.+"',
'__name__!~"container_file_.+"',
'__name__!~"container_spec_.+"',
'__name__!~"container_start_.+"',
'__name__!~"container_last_.+"',
'__name__!~"kube_pod_[^cs].+"',
'__name__!~"kube_pod_container_[^r].+"',
'__name__!~"kube_pod_container_status_waiting_reason"',
'__name__!~"kube_pod_container_status_terminated_reason"',
'__name__!~"kube_pod_container_status_last_terminated_reason"',
'__name__!~"container_network_.+"',
'__name__!~"container_cpu_user_seconds_total"',
'__name__!~"container_cpu_load_average_10s"',
'__name__!~"container_cpu_system_seconds_total"',
'__name__!~"container_sockets"',
'__name__!~"container_processes"',
'__name__!~"container_threads"',
'__name__!~"container_threads_max"',
'__name__!~"kube_node_status_condition"',
]

const usingGCP = !clusterConfig || clusterConfig.cloudProvider === CloudProvider.GCP
Expand Down Expand Up @@ -153,15 +176,52 @@ async function helmParameters(context?: string, clusterConfig?: BaseClusterConfi
]

if (fetchEnvOrFallback(envVar.PROMETHEUS_REMOTE_WRITE_URL, '') !== '') {
const droppedRemoteWriteSeries = [
'apiserver_.+',
'etcd_.+',
'nginx_.+',
'erlang_.+',
'kubelet_[^v].+',
'container_tasks_state',
'storage_.+',
'container_memory_[^w].*',
'rest_client_.+',
'container_fs_.+',
'container_file_.+',
'container_spec_.+',
'container_start_.+',
'container_last_.+',
'kube_pod_container_status_waiting_reason',
'kube_pod_container_status_terminated_reason',
'kube_pod_status_phase',
'container_network_.+',
'container_cpu_user_seconds_total',
'container_cpu_load_average_10s',
'container_cpu_system_seconds_total',
'container_sockets',
'container_processes',
'container_threads',
'container_threads_max',
'kube_node_status_condition',
'kube_pod_container_status_last_terminated_reason',
'kube_pod_container_[^r].+',
'kube_pod_[^cs].+',
'workqueue_.+',
'kube_secret_.+',
]
params.push(
`--set remote_write[0].url=${fetchEnv(envVar.PROMETHEUS_REMOTE_WRITE_URL)}`,
`--set remote_write[0].basic_auth.username=${fetchEnv(
`--set remote_write[0].url='${fetchEnv(envVar.PROMETHEUS_REMOTE_WRITE_URL)}'`,
`--set remote_write[0].basic_auth.username='${fetchEnv(
envVar.PROMETHEUS_REMOTE_WRITE_USERNAME
)}`,
`--set remote_write[0].basic_auth.password=${fetchEnv(
)}'`,
`--set remote_write[0].basic_auth.password='${fetchEnv(
envVar.PROMETHEUS_REMOTE_WRITE_PASSWORD
)}`,
`--set enable_alerts="${isProduction()}"`
)}'`,
`--set remote_write[0].write_relabel_configs[0].source_labels='[__name__]'`,
`--set remote_write[0].write_relabel_configs[0].regex='(${droppedRemoteWriteSeries.join(
'|'
)})'`,
`--set remote_write[0].write_relabel_configs[0].action='drop'`
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ data:
# Label the metrics with a custom label if using multiple prometheus for same environments
external_labels:
cluster_name: {{ .Values.cluster }}
enable_alerts: {{ .Values.enable_alerts }}
{{- with .Values.remote_write }}
remote_write:
{{ toYaml . | indent 6 }}
{{- end }}
Expand Down
10 changes: 5 additions & 5 deletions packages/helm-charts/prometheus-stackdriver/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ serviceAccount:
# If not set and create is true, a name is generated using the fullname template
name: ""

remote_write:
- url: https://prometheus-us-central1.grafana.net/api/prom/push
basic_auth:
username: 51505
password: eyIxJ...
remote_write: []
# - url: https://prometheus-us-central1.grafana.net/api/prom/push
# basic_auth:
# username: 51505
# password: ey...

0 comments on commit b411b44

Please sign in to comment.