From 2c857e77416ff319594406c914d2a3a2a33739f1 Mon Sep 17 00:00:00 2001 From: knrt10 Date: Tue, 24 Aug 2021 12:31:26 +0530 Subject: [PATCH 1/3] prometheus-operator config update Change configuration for alertmanager and operator. Now user can configure it as a block. Signed-off-by: knrt10 --- .../components/prometheus-operator.md | 119 +++++++++--------- .../prometheus-operator/component.go | 33 +++-- .../prometheus-operator/template.go | 18 +-- 3 files changed, 92 insertions(+), 78 deletions(-) diff --git a/docs/configuration-reference/components/prometheus-operator.md b/docs/configuration-reference/components/prometheus-operator.md index e406e13f3..a52a9d1fa 100644 --- a/docs/configuration-reference/components/prometheus-operator.md +++ b/docs/configuration-reference/components/prometheus-operator.md @@ -11,15 +11,15 @@ instances. ## Prerequisites -* A Lokomotive cluster with a +- A Lokomotive cluster with a [PersistentVolume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) plugin, e.g. [OpenEBS](openebs-operator.md) or one of the [built-in](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#types-of-persistent-volumes) plugins. -* An ingress controller such as [Contour](contour.md) for HTTP ingress. +- An ingress controller such as [Contour](contour.md) for HTTP ingress. -* The [cert-manager component](cert-manager.md) is required for enabling HTTPS. +- The [cert-manager component](cert-manager.md) is required for enabling HTTPS. ## Configuration @@ -59,11 +59,13 @@ component "prometheus-operator" { } } - alertmanager_retention = "360h" - alertmanager_external_url = "https://api.example.com/alertmanager" - alertmanager_config = file("alertmanager-config.yaml") - alertmanager_node_selector = { - "kubernetes.io/hostname" = "worker3" + alertmanager { + retention = "360h" + external_url = "https://api.example.com/alertmanager" + config = file("alertmanager-config.yaml") + node_selector = { + "kubernetes.io/hostname" = "worker3" + } } } ``` @@ -75,22 +77,22 @@ information. **Note**: Make sure the whole file is indented two spaces. That is, there are two spaces before the top level block. ```yaml - config: - global: - resolve_timeout: 5m - route: - group_by: +config: + global: + resolve_timeout: 5m + route: + group_by: - job - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - receiver: 'null' - routes: + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: "null" + routes: - match: alertname: Watchdog - receiver: 'null' - receivers: - - name: 'null' + receiver: "null" + receivers: + - name: "null" ``` **NOTE**: Ensure the file `alertmanager_config.yaml` is added to `.gitignore` to avoid any accidental exposure @@ -122,41 +124,40 @@ EOF Table of all the arguments accepted by the component. -| Argument | Description | Default | Type | Required | -|----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------:|:--------:| -| `namespace` | Namespace to deploy the Prometheus Operator. | `monitoring` | string | false | -| `grafana.admin_password` | Password for `admin` user in Grafana. If not provided it is auto generated and stored in secret `prometheus-operator-grafana`. | - | string | false | -| `grafana.secret_env` | Sensitive environment variables passed to Grafana pod and stored as secret. Read more on manipulating `grafana.ini` using env var [here](https://grafana.com/docs/grafana/latest/installation/configuration/#configure-with-environment-variables). | - | map(string) | false | -| `grafana.ingress.host` | Ingress URL host to expose Grafana over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | -| `grafana.ingress.class` | Ingress class to use for Grafana ingress. | `contour` | string | false | -| `grafana.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | -| `prometheus_operator_node_selector` | Node selector to specify nodes where the Prometheus Operator pods should be deployed. | {} | map(string) | false | -| `prometheus.metrics_retention` | Time duration Prometheus shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h\|d\|w\|y)` (milliseconds, seconds, minutes, hours, days, weeks and years). | `10d` | string | false | -| `prometheus.node_selector` | Node selector to specify nodes where the Prometheus pods should be deployed. | {} | map(string) | false | -| `prometheus.storage_size` | Storage capacity for the Prometheus in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | -| `prometheus.watch_labeled_service_monitors` | By default prometheus operator watches only the ServiceMonitor objects in the cluster that are labeled `release: prometheus-operator`. If set to `false` then all the ServiceMonitors will be watched. | `true` | bool | false | -| `prometheus.watch_labeled_prometheus_rules` | By default prometheus operator watches only the PrometheusRule objects in the cluster that are labeled `release: prometheus-operator` and `app: kube-prometheus-stack`. If set to `false` then all the PrometheusRule will be watched. | `true` | bool | false | -| `prometheus.external_labels` | This is the Prometheus parameter with the same name. The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). | - | map(string) | false | -| `prometheus.ingress.host` | Ingress URL host to expose Prometheus over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | -| `prometheus.ingress.class` | Ingress class to use for Prometheus ingress. | `contour` | string | false | -| `prometheus.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | -| `prometheus.external_url` | The URL on which Prometheus will be accessible. If not provided, the URL is taken from `prometheus.ingress.host` with `https` as a scheme. | - | string | false | -| `alertmanager_retention` | Time duration Alertmanager shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h)` (milliseconds, seconds, minutes and hours). | `120h` | string | false | -| `alertmanager_external_url` | The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. | "" | string | false | -| `alertmanager_config` | Provide YAML file path to configure Alertmanager. See [https://prometheus.io/docs/alerting/configuration/#configuration-file](https://prometheus.io/docs/alerting/configuration/#configuration-file). | `{"global":{"resolve_timeout":"5m"},"route":{"group_by":["job"],"group_wait":"30s","group_interval":"5m","repeat_interval":"12h","receiver":"null","routes":[{"match":{"alertname":"Watchdog"},"receiver":"null"}]},"receivers":[{"name":"null"}]}` | string | false | -| `alertmanager_node_selector` | Node selector to specify nodes where the AlertManager pods should be deployed. | {} | map(string) | false | -| `alertmanager_storage_size` | Storage capacity for the Alertmanager in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | -| `disable_webhooks` | Disables validation and mutation webhooks. This might be required on older versions of Kubernetes to install successfully. | false | bool | false | -| `monitor` | Block, which allows to disable scraping of individual Kubernetes components. | - | object | false | -| `monitor.etcd` | Controls if the default Prometheus instance should scrape etcd metrics. | true | bool | false | -| `monitor.kube_controller_manager` | Controls if the default Prometheus instance should scrape kube-controller-manager metrics. | true | bool | false | -| `monitor.kube_scheduler` | Controls if the default Prometheus instance should scrape kube-scheduler metrics. | true | bool | false | -| `monitor.kube_proxy` | Controls if the default Prometheus instance should scrape kube-proxy metrics. | true | bool | false | -| `monitor.kubelet` | Controls if the default Prometheus instance should scrape kubelet metrics. | true | bool | false | -| `coredns` | Block, which allows to customize, how CoreDNS is scraped. | - | object | false | -| `coredns.selector` | Defines, how CoreDNS pods should be selected for scraping. | {"k8s-app":"coredns","tier":"control-plane"} | map(string) | false | -| `storage_class` | Storage Class to use for the storage allowed for Prometheus and Alertmanager. | - | string | false | - +| Argument | Description | Default | Type | Required | +|-------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------:|:--------:| +| `namespace` | Namespace to deploy the Prometheus Operator. | `monitoring` | string | false | +| `grafana.admin_password` | Password for `admin` user in Grafana. If not provided it is auto generated and stored in secret `prometheus-operator-grafana`. | - | string | false | +| `grafana.secret_env` | Sensitive environment variables passed to Grafana pod and stored as secret. Read more on manipulating `grafana.ini` using env var [here](https://grafana.com/docs/grafana/latest/installation/configuration/#configure-with-environment-variables). | - | map(string) | false | +| `grafana.ingress.host` | Ingress URL host to expose Grafana over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | +| `grafana.ingress.class` | Ingress class to use for Grafana ingress. | `contour` | string | false | +| `grafana.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | +| `operator.node_selector` | Node selector to specify nodes where the Prometheus Operator pods should be deployed. | {} | map(string) | false | +| `prometheus.metrics_retention` | Time duration Prometheus shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h\|d\|w\|y)` (milliseconds, seconds, minutes, hours, days, weeks and years). | `10d` | string | false | +| `prometheus.node_selector` | Node selector to specify nodes where the Prometheus pods should be deployed. | {} | map(string) | false | +| `prometheus.storage_size` | Storage capacity for the Prometheus in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | +| `prometheus.watch_labeled_service_monitors` | By default prometheus operator watches only the ServiceMonitor objects in the cluster that are labeled `release: prometheus-operator`. If set to `false` then all the ServiceMonitors will be watched. | `true` | bool | false | +| `prometheus.watch_labeled_prometheus_rules` | By default prometheus operator watches only the PrometheusRule objects in the cluster that are labeled `release: prometheus-operator` and `app: kube-prometheus-stack`. If set to `false` then all the PrometheusRule will be watched. | `true` | bool | false | +| `prometheus.external_labels` | This is the Prometheus parameter with the same name. The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). | - | map(string) | false | +| `prometheus.ingress.host` | Ingress URL host to expose Prometheus over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | +| `prometheus.ingress.class` | Ingress class to use for Prometheus ingress. | `contour` | string | false | +| `prometheus.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | +| `prometheus.external_url` | The URL on which Prometheus will be accessible. If not provided, the URL is taken from `prometheus.ingress.host` with `https` as a scheme. | - | string | false | +| `alertmanager.retention` | Time duration Alertmanager shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h)` (milliseconds, seconds, minutes and hours). | `120h` | string | false | +| `alertmanager.external_url` | The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. | "" | string | false | +| `alertmanager.config` | Provide YAML file path to configure Alertmanager. See [https://prometheus.io/docs/alerting/configuration/#configuration-file](https://prometheus.io/docs/alerting/configuration/#configuration-file). | `{"global":{"resolve_timeout":"5m"},"route":{"group_by":["job"],"group_wait":"30s","group_interval":"5m","repeat_interval":"12h","receiver":"null","routes":[{"match":{"alertname":"Watchdog"},"receiver":"null"}]},"receivers":[{"name":"null"}]}` | string | false | +| `alertmanager.node_selector` | Node selector to specify nodes where the AlertManager pods should be deployed. | {} | map(string) | false | +| `alertmanager.storage_size` | Storage capacity for the Alertmanager in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | +| `disable_webhooks` | Disables validation and mutation webhooks. This might be required on older versions of Kubernetes to install successfully. | false | bool | false | +| `monitor` | Block, which allows to disable scraping of individual Kubernetes components. | - | object | false | +| `monitor.etcd` | Controls if the default Prometheus instance should scrape etcd metrics. | true | bool | false | +| `monitor.kube_controller_manager` | Controls if the default Prometheus instance should scrape kube-controller-manager metrics. | true | bool | false | +| `monitor.kube_scheduler` | Controls if the default Prometheus instance should scrape kube-scheduler metrics. | true | bool | false | +| `monitor.kube_proxy` | Controls if the default Prometheus instance should scrape kube-proxy metrics. | true | bool | false | +| `monitor.kubelet` | Controls if the default Prometheus instance should scrape kubelet metrics. | true | bool | false | +| `coredns` | Block, which allows to customize, how CoreDNS is scraped. | - | object | false | +| `coredns.selector` | Defines, how CoreDNS pods should be selected for scraping. | {"k8s-app":"coredns","tier":"control-plane"} | map(string) | false | +| `storage_class` | Storage Class to use for the storage allowed for Prometheus and Alertmanager. | - | string | false | ## Applying @@ -181,11 +182,11 @@ metadata: namespace: openebs spec: endpoints: - - path: /metrics - port: exporter + - path: /metrics + port: exporter namespaceSelector: matchNames: - - openebs + - openebs selector: matchLabels: openebs.io/cas-type: cstor diff --git a/pkg/components/prometheus-operator/component.go b/pkg/components/prometheus-operator/component.go index 6f486bd92..54c3155d5 100644 --- a/pkg/components/prometheus-operator/component.go +++ b/pkg/components/prometheus-operator/component.go @@ -55,6 +55,11 @@ type Grafana struct { Ingress *types.Ingress `hcl:"ingress,block"` } +// Operator object collects sub component Prometheus operator related information. +type Operator struct { + NodeSelector map[string]string `hcl:"node_selector,optional"` +} + // Prometheus object collects sub component Prometheus related information. type Prometheus struct { MetricsRetention string `hcl:"metrics_retention,optional"` @@ -67,20 +72,25 @@ type Prometheus struct { ExternalURL string `hcl:"external_url,optional"` } +// AlertManager object collects sub component AlertManager related information. +type AlertManager struct { + Config string `hcl:"config,optional"` + ExternalURL string `hcl:"external_url,optional"` + NodeSelector map[string]string `hcl:"node_selector,optional"` + Retention string `hcl:"retention,optional"` + StorageSize string `hcl:"storage_size,optional"` +} + type component struct { Grafana *Grafana `hcl:"grafana,block"` Namespace string `hcl:"namespace,optional"` - PrometheusOperatorNodeSelector map[string]string `hcl:"prometheus_operator_node_selector,optional"` + Operator *Operator `hcl:"operator,block"` Prometheus *Prometheus `hcl:"prometheus,block"` - AlertManagerRetention string `hcl:"alertmanager_retention,optional"` - AlertManagerExternalURL string `hcl:"alertmanager_external_url,optional"` - AlertManagerConfig string `hcl:"alertmanager_config,optional"` - AlertManagerNodeSelector map[string]string `hcl:"alertmanager_node_selector,optional"` - AlertManagerStorageSize string `hcl:"alertmanager_storage_size,optional"` + AlertManager *AlertManager `hcl:"alertmanager,block"` StorageClass string `hcl:"storage_class,optional"` @@ -120,10 +130,13 @@ func NewConfig() *component { WatchLabeledServiceMonitors: true, WatchLabeledPrometheusRules: true, }, - AlertManagerRetention: "120h", - AlertManagerConfig: defaultAlertManagerConfig, - AlertManagerStorageSize: "50Gi", - Namespace: "monitoring", + AlertManager: &AlertManager{ + Retention: "120h", + Config: defaultAlertManagerConfig, + StorageSize: "50Gi", + }, + Namespace: "monitoring", + Operator: &Operator{}, Monitor: &Monitor{ Etcd: true, KubeControllerManager: true, diff --git a/pkg/components/prometheus-operator/template.go b/pkg/components/prometheus-operator/template.go index fb236692d..8aabb10e7 100644 --- a/pkg/components/prometheus-operator/template.go +++ b/pkg/components/prometheus-operator/template.go @@ -22,13 +22,13 @@ global: seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' alertmanager: -{{.AlertManagerConfig}} +{{.AlertManager.Config}} alertmanagerSpec: - retention: {{.AlertManagerRetention}} - externalUrl: {{.AlertManagerExternalURL}} - {{ if .AlertManagerNodeSelector }} + retention: {{.AlertManager.Retention}} + externalUrl: {{.AlertManager.ExternalURL}} + {{ if .AlertManager.NodeSelector }} nodeSelector: - {{ range $key, $value := .AlertManagerNodeSelector }} + {{ range $key, $value := .AlertManager.NodeSelector }} {{ $key }}: {{ $value }} {{ end }} {{ end }} @@ -45,7 +45,7 @@ alertmanager: accessModes: ["ReadWriteOnce"] resources: requests: - storage: "{{.AlertManagerStorageSize}}" + storage: "{{.AlertManager.StorageSize}}" grafana: plugins: "grafana-piechart-panel" @@ -88,7 +88,7 @@ kubeEtcd: enabled: {{.Monitor.Etcd}} prometheus-node-exporter: service: {} -{{ if (or .PrometheusOperatorNodeSelector .DisableWebhooks) }} +{{ if (or .Operator.NodeSelector .DisableWebhooks) }} prometheusOperator: {{- if .DisableWebhooks }} tlsProxy: @@ -96,9 +96,9 @@ prometheusOperator: admissionWebhooks: enabled: false {{- end }} - {{- if .PrometheusOperatorNodeSelector }} + {{- if .Operator.NodeSelector }} nodeSelector: - {{ range $key, $value := .PrometheusOperatorNodeSelector }} + {{ range $key, $value := .Operator.NodeSelector }} {{ $key }}: {{ $value }} {{ end }} {{- end }} From 4443cad8c9cc1aaac8ed8a6acd5213328b8c648a Mon Sep 17 00:00:00 2001 From: knrt10 Date: Tue, 24 Aug 2021 12:31:57 +0530 Subject: [PATCH 2/3] prometheus-operator: add tolerations Add user-configurable tolerations for prometheus-operator and it's components. Co-authored-by: Mateusz Gozdek Signed-off-by: knrt10 --- .../components/prometheus-operator.md | 93 ++++++++++++------- .../prometheus-operator/component.go | 43 +++++++-- .../prometheus-operator/template.go | 27 ++++-- 3 files changed, 113 insertions(+), 50 deletions(-) diff --git a/docs/configuration-reference/components/prometheus-operator.md b/docs/configuration-reference/components/prometheus-operator.md index a52a9d1fa..a6334be63 100644 --- a/docs/configuration-reference/components/prometheus-operator.md +++ b/docs/configuration-reference/components/prometheus-operator.md @@ -41,6 +41,21 @@ component "prometheus-operator" { } } + operator { + tolerations { + key = "lokomotive.io/operator" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + admission_webhook_tolerations { + key = "lokomotive.io/operator-admission-webhook" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } + prometheus { metrics_retention = "14d" storage_size = "50Gi" @@ -66,6 +81,12 @@ component "prometheus-operator" { node_selector = { "kubernetes.io/hostname" = "worker3" } + tolerations { + key = "lokomotive.io/alertmanager" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } } } ``` @@ -124,40 +145,44 @@ EOF Table of all the arguments accepted by the component. -| Argument | Description | Default | Type | Required | -|-------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------:|:--------:| -| `namespace` | Namespace to deploy the Prometheus Operator. | `monitoring` | string | false | -| `grafana.admin_password` | Password for `admin` user in Grafana. If not provided it is auto generated and stored in secret `prometheus-operator-grafana`. | - | string | false | -| `grafana.secret_env` | Sensitive environment variables passed to Grafana pod and stored as secret. Read more on manipulating `grafana.ini` using env var [here](https://grafana.com/docs/grafana/latest/installation/configuration/#configure-with-environment-variables). | - | map(string) | false | -| `grafana.ingress.host` | Ingress URL host to expose Grafana over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | -| `grafana.ingress.class` | Ingress class to use for Grafana ingress. | `contour` | string | false | -| `grafana.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | -| `operator.node_selector` | Node selector to specify nodes where the Prometheus Operator pods should be deployed. | {} | map(string) | false | -| `prometheus.metrics_retention` | Time duration Prometheus shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h\|d\|w\|y)` (milliseconds, seconds, minutes, hours, days, weeks and years). | `10d` | string | false | -| `prometheus.node_selector` | Node selector to specify nodes where the Prometheus pods should be deployed. | {} | map(string) | false | -| `prometheus.storage_size` | Storage capacity for the Prometheus in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | -| `prometheus.watch_labeled_service_monitors` | By default prometheus operator watches only the ServiceMonitor objects in the cluster that are labeled `release: prometheus-operator`. If set to `false` then all the ServiceMonitors will be watched. | `true` | bool | false | -| `prometheus.watch_labeled_prometheus_rules` | By default prometheus operator watches only the PrometheusRule objects in the cluster that are labeled `release: prometheus-operator` and `app: kube-prometheus-stack`. If set to `false` then all the PrometheusRule will be watched. | `true` | bool | false | -| `prometheus.external_labels` | This is the Prometheus parameter with the same name. The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). | - | map(string) | false | -| `prometheus.ingress.host` | Ingress URL host to expose Prometheus over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | -| `prometheus.ingress.class` | Ingress class to use for Prometheus ingress. | `contour` | string | false | -| `prometheus.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | -| `prometheus.external_url` | The URL on which Prometheus will be accessible. If not provided, the URL is taken from `prometheus.ingress.host` with `https` as a scheme. | - | string | false | -| `alertmanager.retention` | Time duration Alertmanager shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h)` (milliseconds, seconds, minutes and hours). | `120h` | string | false | -| `alertmanager.external_url` | The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. | "" | string | false | -| `alertmanager.config` | Provide YAML file path to configure Alertmanager. See [https://prometheus.io/docs/alerting/configuration/#configuration-file](https://prometheus.io/docs/alerting/configuration/#configuration-file). | `{"global":{"resolve_timeout":"5m"},"route":{"group_by":["job"],"group_wait":"30s","group_interval":"5m","repeat_interval":"12h","receiver":"null","routes":[{"match":{"alertname":"Watchdog"},"receiver":"null"}]},"receivers":[{"name":"null"}]}` | string | false | -| `alertmanager.node_selector` | Node selector to specify nodes where the AlertManager pods should be deployed. | {} | map(string) | false | -| `alertmanager.storage_size` | Storage capacity for the Alertmanager in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | -| `disable_webhooks` | Disables validation and mutation webhooks. This might be required on older versions of Kubernetes to install successfully. | false | bool | false | -| `monitor` | Block, which allows to disable scraping of individual Kubernetes components. | - | object | false | -| `monitor.etcd` | Controls if the default Prometheus instance should scrape etcd metrics. | true | bool | false | -| `monitor.kube_controller_manager` | Controls if the default Prometheus instance should scrape kube-controller-manager metrics. | true | bool | false | -| `monitor.kube_scheduler` | Controls if the default Prometheus instance should scrape kube-scheduler metrics. | true | bool | false | -| `monitor.kube_proxy` | Controls if the default Prometheus instance should scrape kube-proxy metrics. | true | bool | false | -| `monitor.kubelet` | Controls if the default Prometheus instance should scrape kubelet metrics. | true | bool | false | -| `coredns` | Block, which allows to customize, how CoreDNS is scraped. | - | object | false | -| `coredns.selector` | Defines, how CoreDNS pods should be selected for scraping. | {"k8s-app":"coredns","tier":"control-plane"} | map(string) | false | -| `storage_class` | Storage Class to use for the storage allowed for Prometheus and Alertmanager. | - | string | false | +| Argument | Description | Default | Type | Required | +|-------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------:|:--------:| +| `namespace` | Namespace to deploy the Prometheus Operator. | `monitoring` | string | false | +| `grafana.admin_password` | Password for `admin` user in Grafana. If not provided it is auto generated and stored in secret `prometheus-operator-grafana`. | - | string | false | +| `grafana.secret_env` | Sensitive environment variables passed to Grafana pod and stored as secret. Read more on manipulating `grafana.ini` using env var [here](https://grafana.com/docs/grafana/latest/installation/configuration/#configure-with-environment-variables). | - | map(string) | false | +| `grafana.ingress.host` | Ingress URL host to expose Grafana over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | +| `grafana.ingress.class` | Ingress class to use for Grafana ingress. | `contour` | string | false | +| `grafana.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | +| `operator.node_selector` | Node selector to specify nodes where the Prometheus Operator pods should be deployed. | {} | map(string) | false | +| `operator.tolerations` | Toleration that prometheus operator will tolerate. | - | list(object({key = string, effect = string, operator = string, value = string, toleration_seconds = string })) | false | +| `operator.admission_webhook_tolerations` | Toleration that prometheus operator admission webhook patch job will tolerate. | - | list(object({key = string, effect = string, operator = string, value = string, toleration_seconds = string })) | false | +| `prometheus.metrics_retention` | Time duration Prometheus shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h\|d\|w\|y)` (milliseconds, seconds, minutes, hours, days, weeks and years). | `10d` | string | false | +| `prometheus.node_selector` | Node selector to specify nodes where the Prometheus pods should be deployed. | {} | map(string) | false | +| `prometheus.tolerations` | Toleration that prometheus pods will tolerate. | - | list(object({key = string, effect = string, operator = string, value = string, toleration_seconds = string })) | false | +| `prometheus.storage_size` | Storage capacity for the Prometheus in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | +| `prometheus.watch_labeled_service_monitors` | By default prometheus operator watches only the ServiceMonitor objects in the cluster that are labeled `release: prometheus-operator`. If set to `false` then all the ServiceMonitors will be watched. | `true` | bool | false | +| `prometheus.watch_labeled_prometheus_rules` | By default prometheus operator watches only the PrometheusRule objects in the cluster that are labeled `release: prometheus-operator` and `app: kube-prometheus-stack`. If set to `false` then all the PrometheusRule will be watched. | `true` | bool | false | +| `prometheus.external_labels` | This is the Prometheus parameter with the same name. The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). | - | map(string) | false | +| `prometheus.ingress.host` | Ingress URL host to expose Prometheus over the internet. **NOTE:** When running on Equinix Metal, a DNS entry pointing at the ingress controller needs to be created. | - | string | true | +| `prometheus.ingress.class` | Ingress class to use for Prometheus ingress. | `contour` | string | false | +| `prometheus.ingress.certmanager_cluster_issuer` | `ClusterIssuer` to be used by cert-manager while issuing TLS certificates. Supported values: `letsencrypt-production`, `letsencrypt-staging`. | `letsencrypt-production` | string | false | +| `prometheus.external_url` | The URL on which Prometheus will be accessible. If not provided, the URL is taken from `prometheus.ingress.host` with `https` as a scheme. | - | string | false | +| `alertmanager.retention` | Time duration Alertmanager shall retain data for. Must match the regular expression `[0-9]+(ms\|s\|m\|h)` (milliseconds, seconds, minutes and hours). | `120h` | string | false | +| `alertmanager.external_url` | The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. | "" | string | false | +| `alertmanager.config` | Provide YAML file path to configure Alertmanager. See [https://prometheus.io/docs/alerting/configuration/#configuration-file](https://prometheus.io/docs/alerting/configuration/#configuration-file). | `{"global":{"resolve_timeout":"5m"},"route":{"group_by":["job"],"group_wait":"30s","group_interval":"5m","repeat_interval":"12h","receiver":"null","routes":[{"match":{"alertname":"Watchdog"},"receiver":"null"}]},"receivers":[{"name":"null"}]}` | string | false | +| `alertmanager.node_selector` | Node selector to specify nodes where the AlertManager pods should be deployed. | {} | map(string) | false | +| `alertmanager.tolerations` | Toleration that AlertManager will tolerate. | - | list(object({key = string, effect = string, operator = string, value = string, toleration_seconds = string })) | false | +| `alertmanager.storage_size` | Storage capacity for the Alertmanager in bytes. You can express storage as a fixed-point integer using one of these suffixes: E, P, T, G, M, K. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. | "50Gi" | string | false | +| `disable_webhooks` | Disables validation and mutation webhooks. This might be required on older versions of Kubernetes to install successfully. | false | bool | false | +| `monitor` | Block, which allows to disable scraping of individual Kubernetes components. | - | object | false | +| `monitor.etcd` | Controls if the default Prometheus instance should scrape etcd metrics. | true | bool | false | +| `monitor.kube_controller_manager` | Controls if the default Prometheus instance should scrape kube-controller-manager metrics. | true | bool | false | +| `monitor.kube_scheduler` | Controls if the default Prometheus instance should scrape kube-scheduler metrics. | true | bool | false | +| `monitor.kube_proxy` | Controls if the default Prometheus instance should scrape kube-proxy metrics. | true | bool | false | +| `monitor.kubelet` | Controls if the default Prometheus instance should scrape kubelet metrics. | true | bool | false | +| `coredns` | Block, which allows to customize, how CoreDNS is scraped. | - | object | false | +| `coredns.selector` | Defines, how CoreDNS pods should be selected for scraping. | {"k8s-app":"coredns","tier":"control-plane"} | map(string) | false | +| `storage_class` | Storage Class to use for the storage allowed for Prometheus and Alertmanager. | - | string | false | ## Applying diff --git a/pkg/components/prometheus-operator/component.go b/pkg/components/prometheus-operator/component.go index 54c3155d5..d648eb247 100644 --- a/pkg/components/prometheus-operator/component.go +++ b/pkg/components/prometheus-operator/component.go @@ -57,7 +57,11 @@ type Grafana struct { // Operator object collects sub component Prometheus operator related information. type Operator struct { - NodeSelector map[string]string `hcl:"node_selector,optional"` + AdmissionWebhookTolerations []util.Toleration `hcl:"admission_webhook_tolerations,block"` + AdmissionWebhookTolerationsRaw string + NodeSelector map[string]string `hcl:"node_selector,optional"` + Tolerations []util.Toleration `hcl:"tolerations,block"` + TolerationsRaw string } // Prometheus object collects sub component Prometheus related information. @@ -70,15 +74,19 @@ type Prometheus struct { Ingress *types.Ingress `hcl:"ingress,block"` ExternalLabels map[string]string `hcl:"external_labels,optional"` ExternalURL string `hcl:"external_url,optional"` + Tolerations []util.Toleration `hcl:"tolerations,block"` + TolerationsRaw string } // AlertManager object collects sub component AlertManager related information. type AlertManager struct { - Config string `hcl:"config,optional"` - ExternalURL string `hcl:"external_url,optional"` - NodeSelector map[string]string `hcl:"node_selector,optional"` - Retention string `hcl:"retention,optional"` - StorageSize string `hcl:"storage_size,optional"` + Config string `hcl:"config,optional"` + ExternalURL string `hcl:"external_url,optional"` + NodeSelector map[string]string `hcl:"node_selector,optional"` + Retention string `hcl:"retention,optional"` + StorageSize string `hcl:"storage_size,optional"` + Tolerations []util.Toleration `hcl:"tolerations,block"` + TolerationsRaw string } type component struct { @@ -136,7 +144,6 @@ func NewConfig() *component { StorageSize: "50Gi", }, Namespace: "monitoring", - Operator: &Operator{}, Monitor: &Monitor{ Etcd: true, KubeControllerManager: true, @@ -213,6 +220,28 @@ func (c *component) RenderManifests() (map[string]string, error) { return nil, fmt.Errorf("retrieving chart from assets: %w", err) } + c.Prometheus.TolerationsRaw, err = util.RenderTolerations(c.Prometheus.Tolerations) + if err != nil { + return nil, fmt.Errorf("rendering prometheus tolerations: %w", err) + } + + if c.Operator != nil { + c.Operator.TolerationsRaw, err = util.RenderTolerations(c.Operator.Tolerations) + if err != nil { + return nil, fmt.Errorf("rendering operator tolerations: %w", err) + } + + c.Operator.AdmissionWebhookTolerationsRaw, err = util.RenderTolerations(c.Operator.AdmissionWebhookTolerations) //nolint:lll + if err != nil { + return nil, fmt.Errorf("rendering operator admission webhook tolerations: %w", err) + } + } + + c.AlertManager.TolerationsRaw, err = util.RenderTolerations(c.AlertManager.Tolerations) + if err != nil { + return nil, fmt.Errorf("rendering alertmanager tolerations: %w", err) + } + values, err := template.Render(chartValuesTmpl, c) if err != nil { return nil, fmt.Errorf("rendering chart values template: %w", err) diff --git a/pkg/components/prometheus-operator/template.go b/pkg/components/prometheus-operator/template.go index 8aabb10e7..687000e3d 100644 --- a/pkg/components/prometheus-operator/template.go +++ b/pkg/components/prometheus-operator/template.go @@ -32,6 +32,9 @@ alertmanager: {{ $key }}: {{ $value }} {{ end }} {{ end }} + {{ if .AlertManager.Tolerations }} + tolerations: {{ .AlertManager.TolerationsRaw }} + {{ end }} storage: volumeClaimTemplate: # This is done to reduce the name length of PVC that is autogenerated if metadata.Name is @@ -88,21 +91,24 @@ kubeEtcd: enabled: {{.Monitor.Etcd}} prometheus-node-exporter: service: {} -{{ if (or .Operator.NodeSelector .DisableWebhooks) }} prometheusOperator: - {{- if .DisableWebhooks }} - tlsProxy: - enabled: false admissionWebhooks: - enabled: false - {{- end }} - {{- if .Operator.NodeSelector }} + enabled: {{ not .DisableWebhooks }} + {{- with .Operator }} + {{- if .AdmissionWebhookTolerations }} + patch: + tolerations: {{ .AdmissionWebhookTolerationsRaw }} + {{- end }} + {{- if .NodeSelector }} nodeSelector: - {{ range $key, $value := .Operator.NodeSelector }} + {{ range $key, $value := .NodeSelector }} {{ $key }}: {{ $value }} {{ end }} {{- end }} -{{ end }} + {{- if .Tolerations }} + tolerations: {{ .TolerationsRaw }} + {{- end }} + {{- end }} prometheus: {{ if .Prometheus.Ingress }} ingress: @@ -131,6 +137,9 @@ prometheus: {{ $key }}: {{ $value }} {{ end }} {{ end }} + {{ if .Prometheus.Tolerations }} + tolerations: {{ .Prometheus.TolerationsRaw }} + {{ end }} {{ if .Prometheus.ExternalLabels }} externalLabels: {{ range $key, $value := .Prometheus.ExternalLabels}} From 4307660563377bccd058cdd03d841b8edb0aae43 Mon Sep 17 00:00:00 2001 From: knrt10 Date: Wed, 28 Jul 2021 10:58:30 +0530 Subject: [PATCH 3/3] prometheus-operator: Add pkg test for tolerations This add test for new toleration configuration introduced as block. Signed-off-by: knrt10 --- .../prometheus-operator/component_test.go | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/pkg/components/prometheus-operator/component_test.go b/pkg/components/prometheus-operator/component_test.go index c53e29cf4..38d64da61 100644 --- a/pkg/components/prometheus-operator/component_test.go +++ b/pkg/components/prometheus-operator/component_test.go @@ -24,6 +24,8 @@ import ( //nolint:funlen func TestRenderManifest(t *testing.T) { + t.Parallel() + tests := []struct { desc string hcl string @@ -89,6 +91,52 @@ component "prometheus-operator" { } } } +`, + wantErr: false, + }, + { + desc: "prometheus operator and admission webhook toleration are set correctly", + hcl: ` +component "prometheus-operator" { + operator { + admission_webhook_tolerations { + key = "lokomotive.io/operator-admission-webhook" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + tolerations { + key = "lokomotive.io/operator" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } +} +`, + wantErr: false, + }, + { + desc: "prometheus and alertmanager toleration are set correctly", + hcl: ` +component "prometheus-operator" { + alertmanager { + tolerations { + key = "lokomotive.io/alertmanager" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } + prometheus{ + tolerations { + key = "lokomotive.io/prometheus" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } +} `, wantErr: false, }, @@ -97,6 +145,7 @@ component "prometheus-operator" { for _, tc := range tests { tc := tc t.Run(tc.desc, func(t *testing.T) { + t.Parallel() b, d := util.GetComponentBody(tc.hcl, Name) if d != nil { t.Fatalf("error getting component body: %v", d) @@ -131,6 +180,8 @@ component "prometheus-operator" { //nolint:funlen func TestConversion(t *testing.T) { + t.Parallel() + testCases := []struct { name string inputConfig string @@ -210,6 +261,46 @@ providers: path: /tmp/dashboards`, jsonPath: "{.data.provider\\.yaml}", }, + { + name: "prometheus operator tolerations", + inputConfig: ` + component "prometheus-operator" { + operator { + tolerations { + key = "lokomotive.io/operator" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } + } + `, + expectedManifestName: k8sutil.ObjectMetadata{ + Version: "apps/v1", Kind: "Deployment", Name: "prometheus-operator-kube-p-operator", + }, + expected: "lokomotive.io/operator", + jsonPath: "{.spec.template.spec.tolerations[0].key}", + }, + { + name: "prometheus operator alertmanager tolerations", + inputConfig: ` + component "prometheus-operator" { + alertmanager { + tolerations { + key = "lokomotive.io/alertmanager" + operator = "Equal" + value = "test" + effect = "NoSchedule" + } + } + } + `, + expectedManifestName: k8sutil.ObjectMetadata{ + Version: "monitoring.coreos.com/v1", Kind: "Alertmanager", Name: "prometheus-operator-kube-p-alertmanager", + }, + expected: "lokomotive.io/alertmanager", + jsonPath: "{.spec.tolerations[0].key}", + }, } for _, tc := range testCases {