Skip to content

Commit

Permalink
Update magnum k8s monitoring infra
Browse files Browse the repository at this point in the history
* Prometheus-server now runs only on master nodes.
* Update prometheus-operator helm chart and tag.
* Update prometheus-adapter version.
* Deprecation notice for prometheus_monitoring component.

Since v1.20 (changes are gradually introduced in next versions)
k8s components are exposing (by default) metrics on 127.0.0.1
and are changing expose port.

* Fix metrics address and ports post 1.20
* Extended memory limits to 64M
* Changed expose addr to 127.0.0.1
* Changed expose ports

task: 41569
story: 2006765

Signed-off-by: Diogo Guerra <diogo.filipe.tomas.guerra@cern.ch>
Co-authored-by: Grzegorz Bialas <grzegorz@stackhpc.com>
Change-Id: I05e8c2be4e4c8e66a166b485ec7851875dca8b1c
(cherry picked from commit c92f605)
(cherry picked from commit 32b22e5)
  • Loading branch information
Diogo Guerra authored and markgoddard committed Aug 15, 2022
1 parent fa2f9d9 commit 5b56182
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ CERT_DIR=/etc/kubernetes/certs

# kube-proxy config
PROXY_KUBECONFIG=/etc/kubernetes/proxy-kubeconfig.yaml
KUBE_PROXY_ARGS="--kubeconfig=${PROXY_KUBECONFIG} --cluster-cidr=${PODS_NETWORK_CIDR} --hostname-override=${INSTANCE_NAME}"
KUBE_PROXY_ARGS="--kubeconfig=${PROXY_KUBECONFIG} --cluster-cidr=${PODS_NETWORK_CIDR} --hostname-override=${INSTANCE_NAME} --metrics-bind-address=0.0.0.0"
cat > /etc/kubernetes/proxy << EOF
KUBE_PROXY_ARGS="${KUBE_PROXY_ARGS} ${KUBEPROXY_OPTIONS}"
EOF
Expand Down Expand Up @@ -404,6 +404,8 @@ KUBE_CONTROLLER_MANAGER_ARGS="--leader-elect=true --kubeconfig=/etc/kubernetes/a
KUBE_CONTROLLER_MANAGER_ARGS="$KUBE_CONTROLLER_MANAGER_ARGS --cluster-name=${CLUSTER_UUID}"
KUBE_CONTROLLER_MANAGER_ARGS="${KUBE_CONTROLLER_MANAGER_ARGS} --allocate-node-cidrs=true"
KUBE_CONTROLLER_MANAGER_ARGS="${KUBE_CONTROLLER_MANAGER_ARGS} --cluster-cidr=${PODS_NETWORK_CIDR}"
KUBE_CONTROLLER_MANAGER_ARGS="${KUBE_CONTROLLER_MANAGER_ARGS} --secure-port=10257"
KUBE_CONTROLLER_MANAGER_ARGS="${KUBE_CONTROLLER_MANAGER_ARGS} --authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics"
KUBE_CONTROLLER_MANAGER_ARGS="$KUBE_CONTROLLER_MANAGER_ARGS $KUBECONTROLLER_OPTIONS"
if [ -n "${ADMISSION_CONTROL_LIST}" ] && [ "${TLS_DISABLED}" == "False" ]; then
KUBE_CONTROLLER_MANAGER_ARGS="$KUBE_CONTROLLER_MANAGER_ARGS --service-account-private-key-file=$CERT_DIR/service_account_private.key --root-ca-file=$CERT_DIR/ca.crt"
Expand All @@ -426,7 +428,7 @@ sed -i '
/^KUBE_CONTROLLER_MANAGER_ARGS=/ s#\(KUBE_CONTROLLER_MANAGER_ARGS\).*#\1="'"${KUBE_CONTROLLER_MANAGER_ARGS}"'"#
' /etc/kubernetes/controller-manager

sed -i '/^KUBE_SCHEDULER_ARGS=/ s#=.*#="--leader-elect=true --kubeconfig=/etc/kubernetes/admin.conf"#' /etc/kubernetes/scheduler
sed -i '/^KUBE_SCHEDULER_ARGS=/ s#=.*#="--leader-elect=true --kubeconfig=/etc/kubernetes/admin.conf --authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics "#' /etc/kubernetes/scheduler

$ssh_cmd mkdir -p /etc/kubernetes/manifests
KUBELET_ARGS="--register-node=true --pod-manifest-path=/etc/kubernetes/manifests --hostname-override=${INSTANCE_NAME}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,10 @@ ${APP_INGRESS_ANNOTATIONS}
endpoints: ${KUBE_MASTERS_PRIVATE}
## If using kubeControllerManager.endpoints only the port and targetPort are used
service:
port: 10252
targetPort: 10252
port: 10257
targetPort: 10257
# selector:
# component: kube-controller-manager
serviceMonitor:
## Enable scraping kube-controller-manager over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
https: "True"
# Skip TLS certificate validation when scraping
insecureSkipVerify: "True"
# Name of the server to use when validating TLS certificate
serverName: null
coreDns:
enabled: true
Expand Down Expand Up @@ -224,28 +216,17 @@ ${APP_INGRESS_ANNOTATIONS}
endpoints: ${KUBE_MASTERS_PRIVATE}
## If using kubeScheduler.endpoints only the port and targetPort are used
service:
port: 10251
targetPort: 10251
port: 10259
targetPort: 10259
# selector:
# component: kube-scheduler
serviceMonitor:
## Enable scraping kube-scheduler over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
https: "True"
## Skip TLS certificate validation when scraping
insecureSkipVerify: "True"
## Name of the server to use when validating TLS certificate
serverName: null
scheme: https
insecureSkipVerify: true
kubeProxy:
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
endpoints: ${KUBE_MASTERS_PRIVATE} # masters + minions
serviceMonitor:
## Enable scraping kube-proxy over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
https: "True"
## Skip TLS certificate validation when scraping
insecureSkipVerify: "True"
kube-state-metrics:
priorityClassName: "system-cluster-critical"
Expand All @@ -272,7 +253,7 @@ ${APP_INGRESS_ANNOTATIONS}
requests:
cpu: 2m
limits:
memory: 30M
memory: 64M
# clusterDomain: ${CLUSTER_ROOT_DOMAIN_NAME}
priorityClassName: "system-cluster-critical"
logFormat: json
Expand All @@ -281,7 +262,7 @@ ${APP_INGRESS_ANNOTATIONS}
requests:
cpu: 2m
limits:
memory: 32M
memory: 64M
image:
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus-operator/}prometheus-operator
prometheusDefaultBaseImage: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}prometheus
Expand Down
12 changes: 12 additions & 0 deletions magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,18 @@ resources:
- protocol: tcp
port_range_min: 9090
port_range_max: 9090
- protocol: tcp
port_range_min: 10259
port_range_max: 10259
- protocol: tcp
port_range_min: 10257
port_range_max: 10257
- protocol: tcp
port_range_min: 10249
port_range_max: 10249
- protocol: tcp
port_range_min: 9153
port_range_max: 9153

secgroup_kube_minion:
condition: create_cluster_resources
Expand Down

0 comments on commit 5b56182

Please sign in to comment.