From 77964307704a6aadf3f9c7cf1be6df5077b0d361 Mon Sep 17 00:00:00 2001 From: Diogo Guerra Date: Thu, 7 Jan 2021 14:20:42 +0000 Subject: [PATCH] Update magnum k8s monitoring infra * Prometheus-server now runs only on master nodes. * Update prometheus-operator helm chart and tag. * Update prometheus-adapter version. * Deprecation notice for prometheus_monitoring component. task: 41569 story: 2006765 Signed-off-by: Diogo Guerra Change-Id: I05e8c2be4e4c8e66a166b485ec7851875dca8b1c (cherry picked from commit 0934160d22b6fb33e2971ccb9c55426d2a3c205a) (cherry picked from commit 9a476968e49e4030115eafd4fbc3e777de39f004) (cherry picked from commit 30419089b01963a946bad3a20f137f0fcabd398a) (cherry picked from commit 5f786f0d7528a820310a23438f2cc4f996b32f48) (cherry picked from commit 4abaf43c5ce8f3c6d04893fc8c03ec8795b6eed2) --- doc/source/user/index.rst | 15 +- doc/source/user/monitoring.rst | 21 +- .../kubernetes/helm/prometheus-adapter.sh | 5 +- .../kubernetes/helm/prometheus-operator.sh | 132 +- .../templates/kubecluster.yaml | 1552 +++++++++++++++++ .../templates/kubecluster.yaml | 12 +- ...te-monitoring-charts-1067dc4a0f0060b6.yaml | 10 + 7 files changed, 1660 insertions(+), 87 deletions(-) create mode 100644 magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml create mode 100644 releasenotes/notes/update-monitoring-charts-1067dc4a0f0060b6.yaml diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 853b5ca933..a29f9782c9 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -1181,13 +1181,14 @@ _`container_infra_prefix` Images that might be needed if 'monitoring_enabled' is 'true': - * quay.io/prometheus/alertmanager:v0.20.0 - * docker.io/squareup/ghostunnel:v1.5.2 - * docker.io/jettech/kube-webhook-certgen:v1.0.0 - * quay.io/coreos/prometheus-operator:v0.37.0 - * quay.io/coreos/configmap-reload:v0.0.1 - * quay.io/coreos/prometheus-config-reloader:v0.37.0 - * quay.io/prometheus/prometheus:v2.15.2 + * quay.io/prometheus/alertmanager:v0.21.0 + * docker.io/jettech/kube-webhook-certgen:v1.5.0 + * quay.io/prometheus-operator/prometheus-operator:v0.44.0 + * docker.io/jimmidyson/configmap-reload:v0.4.0 + * quay.io/prometheus-operator/prometheus-config-reloader:v0.44.0 + * quay.io/prometheus/prometheus:v2.22.1 + * quay.io/prometheus/node-exporter:v1.0.1 + * docker.io/directxman12/k8s-prometheus-adapter:v0.8.2 Images that might be needed if 'cinder_csi_enabled' is 'true': diff --git a/doc/source/user/monitoring.rst b/doc/source/user/monitoring.rst index b7f40f71fe..f913e16ba5 100644 --- a/doc/source/user/monitoring.rst +++ b/doc/source/user/monitoring.rst @@ -33,13 +33,15 @@ _`metrics_server_enabled` _`monitoring_enabled` Enable installation of cluster monitoring solution provided by the - stable/prometheus-operator helm chart. + prometheus-community/kube-prometheus-stack helm chart. + To use this service tiller_enabled must be true when using + helm_client_tag> ${HELM_CHART_DIR}/values.yaml prometheus-adapter: image: - repository: ${CONTAINER_INFRA_PREFIX:-docker.io/directxman12/}k8s-prometheus-adapter-${ARCH} + repository: ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/prometheus-adapter/}prometheus-adapter priorityClassName: "system-cluster-critical" prometheus: - url: http://web.tcp.prometheus-prometheus.kube-system.svc.cluster.local + url: http://web.tcp.magnum-kube-prometheus-sta-prometheus.kube-system.svc.cluster.local + path: /prometheus resources: requests: cpu: 150m diff --git a/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh b/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh index 724975b8e2..4e3f1d709c 100644 --- a/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh +++ b/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh @@ -2,7 +2,7 @@ set +x . /etc/sysconfig/heat-params set -ex -CHART_NAME="prometheus-operator" +CHART_NAME="kube-prometheus-stack" if [ "$(echo ${MONITORING_ENABLED} | tr '[:upper:]' '[:lower:]')" = "true" ]; then echo "Writing ${CHART_NAME} config" @@ -80,22 +80,18 @@ EOF PROTOCOL="http" INSECURE_SKIP_VERIFY="True" fi - # FIXME: Force protocol to http as we don't want to use the cluster certs - USE_HTTPS="False" if [ "$(echo ${VERIFY_CA} | tr '[:upper:]' '[:lower:]')" == "false" ]; then INSECURE_SKIP_VERIFY="True" fi cat << EOF >> ${HELM_CHART_DIR}/values.yaml -prometheus-operator: - - defaultRules: - rules: - #TODO: To enable this we need firstly take care of exposing certs - etcd: false +kube-prometheus-stack: alertmanager: + podDisruptionBudget: + enabled: true + #config: ingress: enabled: ${MONITORING_INGRESS_ENABLED} annotations: @@ -108,6 +104,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} - ${CLUSTER_ROOT_DOMAIN_NAME} paths: - /alertmanager${APP_INGRESS_PATH_APPEND} + pathType: ImplementationSpecific ## TLS configuration for Alertmanager Ingress ## Secret must be manually created in the namespace tls: [] @@ -118,8 +115,8 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} image: repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}alertmanager logFormat: json + routePrefix: /alertmanager externalUrl: https://${CLUSTER_ROOT_DOMAIN_NAME}/alertmanager - # routePrefix: /alertmanager # resources: # requests: # cpu: 100m @@ -127,15 +124,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} priorityClassName: "system-cluster-critical" grafana: - image: - repository: ${CONTAINER_INFRA_PREFIX:-grafana/}grafana #enabled: ${ENABLE_GRAFANA} - sidecar: - image: ${CONTAINER_INFRA_PREFIX:-kiwigrid/}k8s-sidecar:0.1.99 - resources: - requests: - cpu: 100m - memory: 128Mi adminPassword: ${GRAFANA_ADMIN_PASSWD} ingress: enabled: ${MONITORING_INGRESS_ENABLED} @@ -146,13 +135,24 @@ ${APP_INGRESS_ANNOTATIONS} ## Must be provided if Ingress is enable. hosts: - ${CLUSTER_ROOT_DOMAIN_NAME} - path: /grafana${APP_INGRESS_PATH_APPEND} + paths: + - /grafana${APP_INGRESS_PATH_APPEND} + pathType: ImplementationSpecific ## TLS configuration for grafana Ingress ## Secret must be manually created in the namespace tls: [] # - secretName: grafana-general-tls # hosts: # - grafana.example.com + sidecar: + image: + repository: ${CONTAINER_INFRA_PREFIX:-quay.io/kiwigrid/}k8s-sidecar + image: + repository: ${CONTAINER_INFRA_PREFIX:-grafana/}grafana + resources: + requests: + cpu: 100m + memory: 128Mi persistence: enabled: ${APP_GRAFANA_PERSISTENT_STORAGE} storageClassName: ${MONITORING_STORAGE_CLASS_NAME} @@ -162,21 +162,10 @@ ${APP_INGRESS_ANNOTATIONS} domain: ${CLUSTER_ROOT_DOMAIN_NAME} root_url: https://${CLUSTER_ROOT_DOMAIN_NAME}/grafana serve_from_sub_path: true - paths: - data: /var/lib/grafana/data - logs: /var/log/grafana - plugins: /var/lib/grafana/plugins - provisioning: /etc/grafana/provisioning - analytics: - check_for_updates: true log: mode: console log.console: format: json - grafana_net: - url: https://grafana.net - plugins: - - grafana-piechart-panel kubeApiServer: tlsConfig: @@ -198,9 +187,9 @@ ${APP_INGRESS_ANNOTATIONS} serviceMonitor: ## Enable scraping kube-controller-manager over https. ## Requires proper certs (not self-signed) and delegated authentication/authorization checks - https: ${USE_HTTPS} + https: "True" # Skip TLS certificate validation when scraping - insecureSkipVerify: null + insecureSkipVerify: "True" # Name of the server to use when validating TLS certificate serverName: null @@ -242,19 +231,21 @@ ${APP_INGRESS_ANNOTATIONS} serviceMonitor: ## Enable scraping kube-scheduler over https. ## Requires proper certs (not self-signed) and delegated authentication/authorization checks - https: ${USE_HTTPS} + https: "True" ## Skip TLS certificate validation when scraping - insecureSkipVerify: null + insecureSkipVerify: "True" ## Name of the server to use when validating TLS certificate serverName: null - # kubeProxy: - # ## If your kube proxy is not deployed as a pod, specify IPs it can be found on - # endpoints: [] # masters + minions - # serviceMonitor: - # ## Enable scraping kube-proxy over https. - # ## Requires proper certs (not self-signed) and delegated authentication/authorization checks - # https: ${USE_HTTPS} + kubeProxy: + ## If your kube proxy is not deployed as a pod, specify IPs it can be found on + endpoints: ${KUBE_MASTERS_PRIVATE} # masters + minions + serviceMonitor: + ## Enable scraping kube-proxy over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks + https: "True" + ## Skip TLS certificate validation when scraping + insecureSkipVerify: "True" kube-state-metrics: priorityClassName: "system-cluster-critical" @@ -271,37 +262,34 @@ ${APP_INGRESS_ANNOTATIONS} limits: cpu: 20m memory: 20M - extraArgs: - - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) - - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ - sidecars: [] - ## - name: nvidia-dcgm-exporter - ## image: nvidia/dcgm-exporter:1.4.3 prometheusOperator: - priorityClassName: "system-cluster-critical" - tlsProxy: - image: - repository: ${CONTAINER_INFRA_PREFIX:-squareup/}ghostunnel admissionWebhooks: patch: image: repository: ${CONTAINER_INFRA_PREFIX:-jettech/}kube-webhook-certgen - priorityClassName: "system-cluster-critical" - - resources: {} - # requests: - # cpu: 5m - # memory: 10Mi + resources: + requests: + cpu: 2m + limits: + memory: 30M + # clusterDomain: ${CLUSTER_ROOT_DOMAIN_NAME} + priorityClassName: "system-cluster-critical" logFormat: json + logLevel: info + resources: + requests: + cpu: 2m + limits: + memory: 32M image: - repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}prometheus-operator - configmapReloadImage: - repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}configmap-reload + repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus-operator/}prometheus-operator + prometheusDefaultBaseImage: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}prometheus + alertmanagerDefaultBaseImage: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}alertmanager prometheusConfigReloaderImage: - repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}prometheus-config-reloader - hyperkubeImage: - repository: ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube + repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus-operator/}prometheus-config-reloader + thanosImage: + repository: ${CONTAINER_INFRA_PREFIX:-quay.io/thanos/}thanos prometheus: ingress: @@ -317,6 +305,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} - ${CLUSTER_ROOT_DOMAIN_NAME} paths: - /prometheus${APP_INGRESS_PATH_APPEND} + pathType: ImplementationSpecific ## TLS configuration for Prometheus Ingress ## Secret must be manually created in the namespace tls: [] @@ -332,11 +321,13 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} bearerTokenFile: prometheusSpec: scrapeInterval: ${MONITORING_INTERVAL_SECONDS}s - scrapeInterval: 30s evaluationInterval: 30s image: repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}prometheus - retention: 14d + tolerations: + - key: "node-role.kubernetes.io/master" + operator: "Exists" + effect: "NoSchedule" externalLabels: cluster_uuid: ${CLUSTER_UUID} externalUrl: https://${CLUSTER_ROOT_DOMAIN_NAME}/prometheus @@ -352,7 +343,16 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS} retention: ${MONITORING_RETENTION_DAYS}d retentionSize: ${MONITORING_RETENTION_SIZE_GB}GB logFormat: json - #routePrefix: /prometheus + routePrefix: /prometheus + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: magnum.openstack.org/role + operator: In + values: + - master resources: requests: cpu: ${PROMETHEUS_SERVER_CPU}m diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml new file mode 100644 index 0000000000..03845c0b8e --- /dev/null +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml @@ -0,0 +1,1552 @@ +heat_template_version: queens + +description: > + This template will boot a Kubernetes cluster with one or more + minions (as specified by the number_of_minions parameter, which + defaults to 1). + +conditions: + create_cluster_resources: + equals: + - get_param: is_cluster_stack + - true + + is_master: + and: + - equals: + - get_param: master_role + - "master" + - equals: + - get_param: worker_role + - "" + + is_worker: + not: + equals: + - get_param: worker_role + - "" + + master_only: + or: + - create_cluster_resources + - is_master + + worker_only: + or: + - create_cluster_resources + - is_worker + + calico_v3_3: + yaql: + expression: $.data.calico_tag.startsWith("v3.3.") + data: + calico_tag: {get_param: calico_tag} + +parameters: + + # needs to become a list if we want to join master nodes? + existing_master_private_ip: + type: string + default: "" + + is_cluster_stack: + type: boolean + default: false + + master_role: + type: string + default: "" + + worker_role: + type: string + default: "" + + existing_security_group: + type: string + default: "" + + ssh_key_name: + type: string + description: name of ssh key to be provisioned on our server + default: "" + + ssh_public_key: + type: string + description: The public ssh key to add in all nodes + default: "" + + external_network: + type: string + description: uuid of a network to use for floating ip addresses + + fixed_network: + type: string + description: uuid/name of an existing network to use to provision machines + default: "" + + fixed_network_name: + type: string + description: name of a private network to use to provision machines + + fixed_subnet: + type: string + description: uuid/name of an existing subnet to use to provision machines + default: "" + + master_image: + type: string + description: glance image used to boot the server + # When creating a new minion nodegroup this will not + # be provided by magnum. So make it default to "" + default: "" + + minion_image: + type: string + description: glance image used to boot the server + # When creating a new master nodegroup this will not + # be provided by magnum. So make it default to "" + default: "" + + master_flavor: + type: string + default: m1.small + description: flavor to use when booting the server for master nodes + + master_nodegroup_name: + type: string + default: "" + description: the name of the nodegroup where the node belongs + + worker_nodegroup_name: + type: string + default: "" + description: the name of the nodegroup where the node belongs + + heapster_enabled: + type: boolean + description: enable/disable the use of heapster + default: false + + metrics_server_enabled: + type: boolean + description: enable/disable the use of metrics-server + default: true + + metrics_server_chart_tag: + type: string + description: tag of the stable/metrics-server chart to install + default: v3.7.0 + + minion_flavor: + type: string + default: m1.small + description: flavor to use when booting the server for minions + + prometheus_monitoring: + type: boolean + default: false + description: > + whether or not to have the grafana-prometheus-cadvisor monitoring setup + + grafana_admin_passwd: + type: string + default: admin + hidden: true + description: > + admin user password for the Grafana monitoring interface + + dns_nameserver: + type: comma_delimited_list + description: address of a DNS nameserver reachable in your environment + default: 8.8.8.8 + + number_of_masters: + type: number + description: how many kubernetes masters to spawn + default: 1 + + number_of_minions: + type: number + description: how many kubernetes minions to spawn + default: 1 + + fixed_subnet_cidr: + type: string + description: network range for fixed ip network + default: 10.0.0.0/24 + + portal_network_cidr: + type: string + description: > + address range used by kubernetes for service portals + default: 10.254.0.0/16 + + network_driver: + type: string + description: network driver to use for instantiating container networks + default: flannel + + flannel_network_cidr: + type: string + description: network range for flannel overlay network + default: 10.100.0.0/16 + + flannel_network_subnetlen: + type: number + description: size of subnet assigned to each minion + default: 24 + + flannel_backend: + type: string + description: > + specify the backend for flannel, default vxlan backend + default: "vxlan" + constraints: + - allowed_values: ["udp", "vxlan", "host-gw"] + + system_pods_initial_delay: + type: number + description: > + health check, time to wait for system pods (podmaster, scheduler) to boot + (in seconds) + default: 30 + + system_pods_timeout: + type: number + description: > + health check, timeout for system pods (podmaster, scheduler) to answer. + (in seconds) + default: 5 + + admission_control_list: + type: string + description: > + List of admission control plugins to activate + default: "NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota" + + kube_allow_priv: + type: string + description: > + whether or not kubernetes should permit privileged containers. + default: "true" + constraints: + - allowed_values: ["true", "false"] + + boot_volume_size: + type: number + description: > + size of the cinder boot volume for nodes root volume + + boot_volume_type: + type: string + description: > + type of the cinder boot volume for nodes root volume + + etcd_volume_size: + type: number + description: > + size of the cinder volume for etcd storage + default: 0 + + etcd_volume_type: + type: string + description: > + type of a cinder volume for etcd storage + + docker_volume_size: + type: number + description: > + size of a cinder volume to allocate to docker for container/image + storage + default: 0 + + docker_volume_type: + type: string + description: > + type of a cinder volume to allocate to docker for container/image + storage + + docker_storage_driver: + type: string + description: docker storage driver name + default: "devicemapper" + + cgroup_driver: + type: string + description: > + cgroup driver name that kubelet should use, ideally the same as + the docker cgroup driver. + default: "cgroupfs" + + traefik_ingress_controller_tag: + type: string + description: tag of the traefik containers to be used. + default: v1.7.28 + + wait_condition_timeout: + type: number + description: > + timeout for the Wait Conditions + default: 6000 + + minions_to_remove: + type: comma_delimited_list + description: > + List of minions to be removed when doing an update. Individual minion may + be referenced several ways: (1) The resource name (e.g. ['1', '3']), + (2) The private IP address ['10.0.0.4', '10.0.0.6']. Note: the list should + be empty when doing an create. + default: [] + + discovery_url: + type: string + description: > + Discovery URL used for bootstrapping the etcd cluster. + + registry_enabled: + type: boolean + description: > + Indicates whether the docker registry is enabled. + default: false + + registry_port: + type: number + description: port of registry service + default: 5000 + + swift_region: + type: string + description: region of swift service + default: "" + + registry_container: + type: string + description: > + name of swift container which docker registry stores images in + default: "container" + + registry_insecure: + type: boolean + description: > + indicates whether to skip TLS verification between registry and backend storage + default: true + + registry_chunksize: + type: number + description: > + size fo the data segments for the swift dynamic large objects + default: 5242880 + + volume_driver: + type: string + description: volume driver to use for container storage + default: "" + + region_name: + type: string + description: A logically separate section of the cluster + + username: + type: string + description: > + user account + + password: + type: string + description: > + user password, not set in current implementation, only used to + fill in for Kubernetes config file + default: + ChangeMe + hidden: true + + loadbalancing_protocol: + type: string + description: > + The protocol which is used for load balancing. If you want to change + tls_disabled option to 'True', please change this to "HTTP". + default: TCP + constraints: + - allowed_values: ["TCP", "HTTP"] + + tls_disabled: + type: boolean + description: whether or not to disable TLS + default: False + + kube_dashboard_enabled: + type: boolean + description: whether or not to enable kubernetes dashboard + default: True + + influx_grafana_dashboard_enabled: + type: boolean + description: Enable influxdb with grafana dashboard for data from heapster + default: False + + verify_ca: + type: boolean + description: whether or not to validate certificate authority + + kubernetes_port: + type: number + description: > + The port which are used by kube-apiserver to provide Kubernetes + service. + default: 6443 + + cluster_uuid: + type: string + description: identifier for the cluster this template is generating + + magnum_url: + type: string + description: endpoint to retrieve TLS certs from + + http_proxy: + type: string + description: http proxy address for docker + default: "" + + https_proxy: + type: string + description: https proxy address for docker + default: "" + + no_proxy: + type: string + description: no proxies for docker + default: "" + + trustee_domain_id: + type: string + description: domain id of the trustee + + trustee_user_id: + type: string + description: user id of the trustee + + trustee_username: + type: string + description: username of the trustee + + trustee_password: + type: string + description: password of the trustee + hidden: true + + trust_id: + type: string + description: id of the trust which is used by the trustee + hidden: true + + auth_url: + type: string + description: url for keystone + + hyperkube_prefix: + type: string + description: prefix to use for hyperkube images + default: k8s.gcr.io/ + + kube_tag: + type: string + description: tag of the k8s containers used to provision the kubernetes cluster + default: v1.15.7 + + master_kube_tag: + type: string + description: tag of the k8s containers used to provision the kubernetes cluster + default: v1.15.7 + + minion_kube_tag: + type: string + description: tag of the k8s containers used to provision the kubernetes cluster + default: v1.15.7 + + # FIXME update cloud_provider_tag when a fix for PVC is released + # https://github.com/kubernetes/cloud-provider-openstack/pull/405 + cloud_provider_tag: + type: string + description: + tag of the kubernetes/cloud-provider-openstack + https://hub.docker.com/r/k8scloudprovider/openstack-cloud-controller-manager/tags/ + default: v1.15.0 + + cloud_provider_enabled: + type: boolean + description: Enable or disable the openstack kubernetes cloud provider + + etcd_tag: + type: string + description: tag of the etcd system container + default: v3.2.7 + + coredns_tag: + type: string + description: tag for coredns + default: 1.3.1 + + flannel_tag: + type: string + description: tag of the flannel container + default: v0.11.0-amd64 + + flannel_cni_tag: + type: string + description: tag of the flannel cni container + default: v0.3.0 + + kube_version: + type: string + description: version of kubernetes used for kubernetes cluster + default: v1.15.7 + + kube_dashboard_version: + type: string + description: version of kubernetes dashboard used for kubernetes cluster + default: v2.0.0 + + metrics_scraper_tag: + type: string + description: > + Tag of metrics-scraper for kubernetes dashboard. + default: v1.0.4 + + insecure_registry_url: + type: string + description: insecure registry url + default: "" + + container_infra_prefix: + type: string + description: > + prefix of container images used in the cluster, kubernetes components, + kubernetes-dashboard, coredns etc + constraints: + - allowed_pattern: "^$|.*/" + default: "" + + dns_service_ip: + type: string + description: > + address used by Kubernetes DNS service + default: 10.254.0.10 + + dns_cluster_domain: + type: string + description: > + domain name for cluster DNS + default: "cluster.local" + + openstack_ca: + type: string + hidden: true + description: The OpenStack CA certificate to install on the node. + + nodes_affinity_policy: + type: string + description: > + affinity policy for nodes server group + constraints: + - allowed_values: ["affinity", "anti-affinity", "soft-affinity", + "soft-anti-affinity"] + + availability_zone: + type: string + description: > + availability zone for master and nodes + default: "" + + cert_manager_api: + type: boolean + description: true if the kubernetes cert api manager should be enabled + default: false + + ca_key: + type: string + description: key of internal ca for the kube certificate api manager + default: "" + hidden: true + + calico_tag: + type: string + description: tag of the calico containers used to provision the calico node + default: v3.3.6 + + calico_kube_controllers_tag: + type: string + description: tag of the kube_controllers used to provision the calico node + default: v1.0.3 + + calico_ipv4pool: + type: string + description: Configure the IP pool from which Pod IPs will be chosen + default: "10.100.0.0/16" + + calico_ipv4pool_ipip: + type: string + description: IPIP Mode to use for the IPv4 POOL created at start up + default: "Off" + constraints: + - allowed_values: ["Always", "CrossSubnet", "Never", "Off"] + + pods_network_cidr: + type: string + description: Configure the IP pool/range from which pod IPs will be chosen + + ingress_controller: + type: string + description: > + ingress controller backend to use + default: "" + + ingress_controller_role: + type: string + description: > + node role where the ingress controller backend should run + default: "ingress" + + octavia_ingress_controller_tag: + type: string + description: Octavia ingress controller docker image tag. + default: "v1.15.0" + + kubelet_options: + type: string + description: > + additional options to be passed to the kubelet + default: "" + + kubeapi_options: + type: string + description: > + additional options to be passed to the api + default: "" + + kubecontroller_options: + type: string + description: > + additional options to be passed to the controller manager + default: "" + + kubeproxy_options: + type: string + description: > + additional options to be passed to the kube proxy + default: "" + + kubescheduler_options: + type: string + description: > + additional options to be passed to the scheduler + default: "" + + octavia_enabled: + type: boolean + description: > + whether or not to use Octavia for LoadBalancer type service. + default: False + + kube_service_account_key: + type: string + hidden: true + description: > + The signed cert will be used to verify the k8s service account tokens + during authentication. + + kube_service_account_private_key: + type: string + hidden: true + description: > + The private key will be used to sign generated k8s service account + tokens. + + prometheus_tag: + type: string + description: tag of the prometheus container + default: v1.8.2 + + grafana_tag: + type: string + description: tag of grafana container + default: 5.1.5 + + heat_container_agent_tag: + type: string + description: tag of the heat_container_agent system container + default: wallaby-stable-1 + + keystone_auth_enabled: + type: boolean + description: > + true if the keystone authN and authZ should be enabled + default: + true + + keystone_auth_default_policy: + type: string + description: Json read from /etc/magnum/keystone_auth_default_policy.json + default: "" + + k8s_keystone_auth_tag: + type: string + description: tag of the k8s_keystone_auth container + default: v1.15.0 + + monitoring_enabled: + type: boolean + description: Enable or disable prometheus-operator monitoring solution. + default: false + + monitoring_retention_days: + type: number + description: The number of time (in days) that prometheus metrics should be kept. + default: 14 + + monitoring_retention_size: + type: number + description: > + The maximum memory (in Gi) allowed to be used by prometheus server to store metrics. + default: 14 + + monitoring_interval_seconds: + type: number + description: > + The time interval (in seconds) between consecutive metric scrapings. + default: 30 + + monitoring_storage_class_name: + type: string + description: The kubernetes storage class name to use for the prometheus pvc. + default: "" + + monitoring_ingress_enabled: + type: boolean + description: Enable/disable configuration of ingresses for the monitoring services. + default: false + + cluster_basic_auth_secret: + type: string + description: > + The kubernetes secret name to use for the proxy basic auth username and password. + default: "" + + cluster_root_domain_name: + type: string + description: > + The root domain name to use for the cluster automatically set up applications. + default: "localhost" + + prometheus_operator_chart_tag: + type: string + description: The prometheus-community/kube-prometheus-stack chart version to use. + default: 17.2.0 + + prometheus_adapter_enabled: + type: boolean + description: Enable or disable prometheus-adapter custom metrics. + default: true + + prometheus_adapter_chart_tag: + type: string + description: The prometheus-community/prometheus-adapter chart version to use. + default: 2.5.1 + + prometheus_adapter_configmap: + type: string + description: The prometheus adapter rules ConfigMap name to use as overwrite. + default: "" + + project_id: + type: string + description: > + project id of current project + + tiller_enabled: + type: boolean + description: Choose whether to install tiller or not. + default: true + + tiller_tag: + type: string + description: tag of tiller container + default: "v2.16.7" + + tiller_namespace: + type: string + description: namespace where tiller will be installed. + default: "magnum-tiller" + + helm_client_url: + type: string + description: url of helm client tarball + default: "" + + helm_client_sha256: + type: string + description: sha256 of helm client tarball + default: "018f9908cb950701a5d59e757653a790c66d8eda288625dbb185354ca6f41f6b" + + helm_client_tag: + type: string + description: > + release tag of helm client + https://github.com/helm/helm/releases + default: "v3.2.1" + + auto_healing_enabled: + type: boolean + description: > + true if the auto healing feature should be enabled + default: + false + + auto_healing_controller: + type: string + description: > + The service to be deployed for auto-healing. + default: "draino" + + magnum_auto_healer_tag: + type: string + description: tag of the magnum-auto-healer service. + default: "v1.15.0" + + auto_scaling_enabled: + type: boolean + description: > + true if the auto scaling feature should be enabled + default: + false + + cinder_csi_enabled: + type: boolean + description: > + true if the cinder csi feature should be enabled + default: + true + + cinder_csi_plugin_tag: + type: string + description: tag of cinder csi plugin + default: v1.16.0 + + csi_attacher_tag: + type: string + description: tag of csi attacher + default: v2.0.0 + + csi_provisioner_tag: + type: string + description: tag of csi provisioner + default: v1.4.0 + + csi_snapshotter_tag: + type: string + description: tag of csi snapshotter + default: v1.2.2 + + csi_resizer_tag: + type: string + description: tag of csi resizer + default: v0.3.0 + + csi_node_driver_registrar_tag: + type: string + description: tag of csi node driver registrar + default: v1.1.0 + + node_problem_detector_tag: + type: string + description: tag of the node problem detector container + default: v0.6.2 + + nginx_ingress_controller_tag: + type: string + description: nginx ingress controller docker image tag + default: 0.32.0 + + nginx_ingress_controller_chart_tag: + type: string + description: nginx ingress controller helm chart tag + default: v1.36.3 + + draino_tag: + type: string + description: tag of the draino container + default: abf028a + + autoscaler_tag: + type: string + description: tag of the autoscaler container + default: v1.0 + + min_node_count: + type: number + description: > + minimum node count of cluster workers when doing scale down + default: 0 + + max_node_count: + type: number + description: > + maximum node count of cluster workers when doing scale up + + update_max_batch_size: + type: number + description: > + max batch size when doing rolling upgrade + default: 1 + + npd_enabled: + type: boolean + description: > + true if the npd service should be launched + default: + true + + ostree_remote: + type: string + description: The ostree remote branch to upgrade + default: '' + + ostree_commit: + type: string + description: The ostree commit to deploy + default: '' + + use_podman: + type: boolean + description: > + if true, run system containers for kubernetes, etcd and heat-agent + default: + false + + selinux_mode: + type: string + description: > + Choose SELinux mode + default: "permissive" + constraints: + - allowed_values: ["enforcing", "permissive", "disabled"] + + kube_image_digest: + type: string + description: > + The digest of the image which should match the given kube_tag + default: '' + + container_runtime: + type: string + description: The container runtime to install + default: 'host-docker' + + containerd_version: + type: string + description: The containerd version to download from https://github.com/containerd/containerd/releases + default: '1.4.4' + + containerd_tarball_url: + type: string + description: Url location of the containerd tarball. + default: '' + + containerd_tarball_sha256: + type: string + description: sha256 of the target containerd tarball. + default: '96641849cb78a0a119223a427dfdc1ade88412ef791a14193212c8c8e29d447b' + + post_install_manifest_url: + type: string + description: > + Post install manifest URL used to setup some cloud provider/vendor + specific configs + default: "" + + master_lb_allowed_cidrs: + type: comma_delimited_list + description: The allowed CIDR list for master load balancer + default: [] + +resources: + + ###################################################################### + # + # network resources. allocate a network and router for our server. + # Important: the Load Balancer feature in Kubernetes requires that + # the name for the fixed_network must be "private" for the + # address lookup in Kubernetes to work properly + # + + network: + condition: create_cluster_resources + type: ../../common/templates/network.yaml + properties: + existing_network: {get_param: fixed_network} + existing_subnet: {get_param: fixed_subnet} + private_network_cidr: {get_param: fixed_subnet_cidr} + dns_nameserver: {get_param: dns_nameserver} + external_network: {get_param: external_network} + private_network_name: {get_param: fixed_network_name} + + api_lb: + condition: create_cluster_resources + type: ../../common/templates/lb_api.yaml + properties: + fixed_subnet: {get_attr: [network, fixed_subnet]} + external_network: {get_param: external_network} + protocol: {get_param: loadbalancing_protocol} + port: {get_param: kubernetes_port} + allowed_cidrs: {get_param: master_lb_allowed_cidrs} + + etcd_lb: + condition: create_cluster_resources + type: ../../common/templates/lb_etcd.yaml + properties: + fixed_subnet: {get_attr: [network, fixed_subnet]} + protocol: {get_param: loadbalancing_protocol} + port: 2379 + allowed_cidrs: {get_param: master_lb_allowed_cidrs} + + ###################################################################### + # + # security groups. we need to permit network traffic of various + # sorts. + # + + secgroup_kube_master: + condition: create_cluster_resources + type: OS::Neutron::SecurityGroup + properties: + rules: + - protocol: icmp + - protocol: tcp + port_range_min: 22 + port_range_max: 22 + - protocol: tcp + port_range_min: 7080 + port_range_max: 7080 + - protocol: tcp + port_range_min: 8080 + port_range_max: 8080 + - protocol: tcp + port_range_min: 2379 + port_range_max: 2379 + - protocol: tcp + port_range_min: 2380 + port_range_max: 2380 + - protocol: tcp + port_range_min: 6443 + port_range_max: 6443 + - protocol: tcp + port_range_min: 9100 + port_range_max: 9100 + - protocol: tcp + port_range_min: 10250 + port_range_max: 10250 + - protocol: tcp + port_range_min: 30000 + port_range_max: 32767 + - protocol: udp + port_range_min: 8472 + port_range_max: 8472 + # Prometheus Server + - protocol: tcp + port_range_min: 9090 + port_range_max: 9090 + + secgroup_kube_minion: + condition: create_cluster_resources + type: OS::Neutron::SecurityGroup + properties: + rules: + - protocol: icmp + # Default port range for external service ports. + # In future, if the option `manage-security-groups` for ccm works + # well, we could remove this rule here. + # The PR in ccm is + # https://github.com/kubernetes/cloud-provider-openstack/pull/491 + - protocol: tcp + port_range_min: 22 + port_range_max: 22 + - protocol: tcp + port_range_min: 30000 + port_range_max: 32767 + # allow any traffic from master nodes + - protocol: tcp + port_range_min: 1 + port_range_max: 65535 + remote_mode: 'remote_group_id' + remote_group_id: {get_resource: secgroup_kube_master} + - protocol: udp + port_range_min: 1 + port_range_max: 65535 + remote_mode: 'remote_group_id' + remote_group_id: {get_resource: secgroup_kube_master} + + # allow any traffic between worker nodes + secgroup_rule_tcp_kube_minion: + condition: create_cluster_resources + type: OS::Neutron::SecurityGroupRule + properties: + protocol: tcp + port_range_min: 1 + port_range_max: 65535 + security_group: {get_resource: secgroup_kube_minion} + remote_group: {get_resource: secgroup_kube_minion} + secgroup_rule_udp_kube_minion: + condition: create_cluster_resources + type: OS::Neutron::SecurityGroupRule + properties: + protocol: udp + port_range_min: 1 + port_range_max: 65535 + security_group: {get_resource: secgroup_kube_minion} + remote_group: {get_resource: secgroup_kube_minion} + + ###################################################################### + # + # resources that expose the IPs of either the kube master or a given + # LBaaS pool depending on whether LBaaS is enabled for the cluster. + # + + api_address_lb_switch: + condition: create_cluster_resources + type: Magnum::ApiGatewaySwitcher + properties: + pool_public_ip: {get_attr: [api_lb, floating_address]} + pool_private_ip: {get_attr: [api_lb, address]} + master_public_ip: {get_attr: [kube_masters, resource.0.kube_master_external_ip]} + master_private_ip: {get_attr: [kube_masters, resource.0.kube_master_ip]} + + etcd_address_lb_switch: + condition: create_cluster_resources + type: Magnum::ApiGatewaySwitcher + properties: + pool_private_ip: {get_attr: [etcd_lb, address]} + master_private_ip: {get_attr: [kube_masters, resource.0.kube_master_ip]} + + ###################################################################### + # + # resources that expose the IPs of either floating ip or a given + # fixed ip depending on whether FloatingIP is enabled for the cluster. + # + + api_address_floating_switch: + condition: create_cluster_resources + type: Magnum::FloatingIPAddressSwitcher + properties: + public_ip: {get_attr: [api_address_lb_switch, public_ip]} + private_ip: {get_attr: [api_address_lb_switch, private_ip]} + + ###################################################################### + # + # resources that expose one server group for each master and worker nodes + # separately. + # + + master_nodes_server_group: + condition: master_only + type: OS::Nova::ServerGroup + properties: + policies: [{get_param: nodes_affinity_policy}] + + worker_nodes_server_group: + condition: worker_only + type: OS::Nova::ServerGroup + properties: + policies: [{get_param: nodes_affinity_policy}] + + ###################################################################### + # + # kubernetes masters. This is a resource group that will create + # masters. + # + + kube_masters: + condition: master_only + type: OS::Heat::ResourceGroup + depends_on: + - network + update_policy: + rolling_update: {max_batch_size: {get_param: update_max_batch_size}, pause_time: 30} + properties: + count: {get_param: number_of_masters} + resource_def: + type: kubemaster.yaml + properties: + name: + list_join: + - '-' + - [{ get_param: 'OS::stack_name' }, 'master', '%index%'] + nodegroup_role: {get_param: master_role} + nodegroup_name: {get_param: master_nodegroup_name} + heapster_enabled: {get_param: heapster_enabled} + metrics_server_enabled: {get_param: metrics_server_enabled} + metrics_server_chart_tag: {get_param: metrics_server_chart_tag} + prometheus_monitoring: {get_param: prometheus_monitoring} + api_public_address: {get_attr: [api_lb, floating_address]} + api_private_address: {get_attr: [api_lb, address]} + ssh_key_name: {get_param: ssh_key_name} + server_image: {get_param: master_image} + master_flavor: {get_param: master_flavor} + external_network: {get_param: external_network} + kube_allow_priv: {get_param: kube_allow_priv} + boot_volume_size: {get_param: boot_volume_size} + boot_volume_type: {get_param: boot_volume_type} + etcd_volume_size: {get_param: etcd_volume_size} + etcd_volume_type: {get_param: etcd_volume_type} + docker_volume_size: {get_param: docker_volume_size} + docker_volume_type: {get_param: docker_volume_type} + docker_storage_driver: {get_param: docker_storage_driver} + cgroup_driver: {get_param: cgroup_driver} + network_driver: {get_param: network_driver} + flannel_network_cidr: {get_param: flannel_network_cidr} + flannel_network_subnetlen: {get_param: flannel_network_subnetlen} + flannel_backend: {get_param: flannel_backend} + system_pods_initial_delay: {get_param: system_pods_initial_delay} + system_pods_timeout: {get_param: system_pods_timeout} + portal_network_cidr: {get_param: portal_network_cidr} + admission_control_list: {get_param: admission_control_list} + discovery_url: {get_param: discovery_url} + cluster_uuid: {get_param: cluster_uuid} + magnum_url: {get_param: magnum_url} + traefik_ingress_controller_tag: {get_param: traefik_ingress_controller_tag} + volume_driver: {get_param: volume_driver} + region_name: {get_param: region_name} + fixed_network: {get_attr: [network, fixed_network]} + fixed_network_name: {get_param: fixed_network_name} + fixed_subnet: {get_attr: [network, fixed_subnet]} + fixed_subnet_cidr: {get_param: fixed_subnet_cidr} + api_pool_id: {get_attr: [api_lb, pool_id]} + etcd_pool_id: {get_attr: [etcd_lb, pool_id]} + username: {get_param: username} + password: {get_param: password} + kubernetes_port: {get_param: kubernetes_port} + tls_disabled: {get_param: tls_disabled} + kube_dashboard_enabled: {get_param: kube_dashboard_enabled} + influx_grafana_dashboard_enabled: {get_param: influx_grafana_dashboard_enabled} + verify_ca: {get_param: verify_ca} + secgroup_kube_master_id: {get_resource: secgroup_kube_master} + http_proxy: {get_param: http_proxy} + https_proxy: {get_param: https_proxy} + no_proxy: {get_param: no_proxy} + hyperkube_prefix: {get_param: hyperkube_prefix} + kube_tag: {get_param: master_kube_tag} + cloud_provider_tag: {get_param: cloud_provider_tag} + cloud_provider_enabled: {get_param: cloud_provider_enabled} + kube_version: {get_param: kube_version} + etcd_tag: {get_param: etcd_tag} + coredns_tag: {get_param: coredns_tag} + flannel_tag: {get_param: flannel_tag} + flannel_cni_tag: {get_param: flannel_cni_tag} + kube_dashboard_version: {get_param: kube_dashboard_version} + trustee_user_id: {get_param: trustee_user_id} + trustee_password: {get_param: trustee_password} + trust_id: {get_param: trust_id} + auth_url: {get_param: auth_url} + insecure_registry_url: {get_param: insecure_registry_url} + container_infra_prefix: {get_param: container_infra_prefix} + etcd_lb_vip: {get_attr: [etcd_lb, address]} + dns_service_ip: {get_param: dns_service_ip} + dns_cluster_domain: {get_param: dns_cluster_domain} + openstack_ca: {get_param: openstack_ca} + nodes_server_group_id: {get_resource: master_nodes_server_group} + availability_zone: {get_param: availability_zone} + ca_key: {get_param: ca_key} + cert_manager_api: {get_param: cert_manager_api} + calico_tag: {get_param: calico_tag} + calico_kube_controllers_tag: {get_param: calico_kube_controllers_tag} + calico_ipv4pool: {get_param: calico_ipv4pool} + calico_ipv4pool_ipip: {get_param: calico_ipv4pool_ipip} + pods_network_cidr: {get_param: pods_network_cidr} + ingress_controller: {get_param: ingress_controller} + ingress_controller_role: {get_param: ingress_controller_role} + octavia_ingress_controller_tag: {get_param: octavia_ingress_controller_tag} + kubelet_options: {get_param: kubelet_options} + kubeapi_options: {get_param: kubeapi_options} + kubeproxy_options: {get_param: kubeproxy_options} + kubecontroller_options: {get_param: kubecontroller_options} + kubescheduler_options: {get_param: kubescheduler_options} + octavia_enabled: {get_param: octavia_enabled} + kube_service_account_key: {get_param: kube_service_account_key} + kube_service_account_private_key: {get_param: kube_service_account_private_key} + prometheus_tag: {get_param: prometheus_tag} + grafana_tag: {get_param: grafana_tag} + heat_container_agent_tag: {get_param: heat_container_agent_tag} + keystone_auth_enabled: {get_param: keystone_auth_enabled} + k8s_keystone_auth_tag: {get_param: k8s_keystone_auth_tag} + monitoring_enabled: {get_param: monitoring_enabled} + monitoring_retention_days: {get_param: monitoring_retention_days} + monitoring_retention_size: {get_param: monitoring_retention_size} + monitoring_interval_seconds: {get_param: monitoring_interval_seconds} + monitoring_storage_class_name: {get_param: monitoring_storage_class_name} + monitoring_ingress_enabled: {get_param: monitoring_ingress_enabled} + cluster_basic_auth_secret: {get_param: cluster_basic_auth_secret} + cluster_root_domain_name: {get_param: cluster_root_domain_name} + prometheus_operator_chart_tag: {get_param: prometheus_operator_chart_tag} + prometheus_adapter_enabled: {get_param: prometheus_adapter_enabled} + prometheus_adapter_chart_tag: {get_param: prometheus_adapter_chart_tag} + prometheus_adapter_configmap: {get_param: prometheus_adapter_configmap} + project_id: {get_param: project_id} + tiller_enabled: {get_param: tiller_enabled} + tiller_tag: {get_param: tiller_tag} + tiller_namespace: {get_param: tiller_namespace} + helm_client_url: {get_param: helm_client_url} + helm_client_sha256: {get_param: helm_client_sha256} + helm_client_tag: {get_param: helm_client_tag} + node_problem_detector_tag: {get_param: node_problem_detector_tag} + nginx_ingress_controller_tag: {get_param: nginx_ingress_controller_tag} + nginx_ingress_controller_chart_tag: {get_param: nginx_ingress_controller_chart_tag} + auto_healing_enabled: {get_param: auto_healing_enabled} + auto_healing_controller: {get_param: auto_healing_controller} + magnum_auto_healer_tag: {get_param: magnum_auto_healer_tag} + auto_scaling_enabled: {get_param: auto_scaling_enabled} + cinder_csi_enabled: {get_param: cinder_csi_enabled} + cinder_csi_plugin_tag: {get_param: cinder_csi_plugin_tag} + csi_attacher_tag: {get_param: csi_attacher_tag} + csi_provisioner_tag: {get_param: csi_provisioner_tag} + csi_snapshotter_tag: {get_param: csi_snapshotter_tag} + csi_resizer_tag: {get_param: csi_resizer_tag} + csi_node_driver_registrar_tag: {get_param: csi_node_driver_registrar_tag} + draino_tag: {get_param: draino_tag} + autoscaler_tag: {get_param: autoscaler_tag} + min_node_count: {get_param: min_node_count} + max_node_count: {get_param: max_node_count} + npd_enabled: {get_param: npd_enabled} + ostree_remote: {get_param: ostree_remote} + ostree_commit: {get_param: ostree_commit} + use_podman: {get_param: use_podman} + selinux_mode: {get_param: selinux_mode} + container_runtime: {get_param: container_runtime} + containerd_version: {get_param: containerd_version} + containerd_tarball_url: {get_param: containerd_tarball_url} + containerd_tarball_sha256: {get_param: containerd_tarball_sha256} + post_install_manifest_url: {get_param: post_install_manifest_url} + metrics_scraper_tag: {get_param: metrics_scraper_tag} + + kube_cluster_config: + condition: create_cluster_resources + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - "\n" + - + - "#!/bin/bash" + - get_file: ../../common/templates/kubernetes/fragments/kube-apiserver-to-kubelet-role.sh + - get_file: ../../common/templates/kubernetes/fragments/core-dns-service.sh + - if: + - calico_v3_3 + - get_file: ../../common/templates/kubernetes/fragments/calico-service-v3-3-x.sh + - get_file: ../../common/templates/kubernetes/fragments/calico-service.sh + - get_file: ../../common/templates/kubernetes/fragments/flannel-service.sh + - get_file: ../../common/templates/kubernetes/fragments/enable-helm-tiller.sh + - str_replace: + template: {get_file: ../../common/templates/kubernetes/fragments/enable-prometheus-monitoring.sh} + params: + "${GRAFANA_ADMIN_PASSWD}": {get_param: grafana_admin_passwd} + - str_replace: + params: + $enable-ingress-traefik: {get_file: ../../common/templates/kubernetes/fragments/enable-ingress-traefik.sh} + $enable-ingress-octavia: {get_file: ../../common/templates/kubernetes/fragments/enable-ingress-octavia.sh} + template: {get_file: ../../common/templates/kubernetes/fragments/enable-ingress-controller.sh} + - get_file: ../../common/templates/kubernetes/fragments/kube-dashboard-service.sh + - str_replace: + template: {get_file: ../../common/templates/kubernetes/fragments/enable-keystone-auth.sh} + params: + "$KEYSTONE_AUTH_DEFAULT_POLICY": {get_param: keystone_auth_default_policy} + - get_file: ../../common/templates/kubernetes/fragments/enable-auto-healing.sh + - get_file: ../../common/templates/kubernetes/fragments/enable-auto-scaling.sh + - get_file: ../../common/templates/kubernetes/fragments/enable-cinder-csi.sh + # Helm Based Installation Configuration Scripts + - get_file: ../../common/templates/kubernetes/helm/metrics-server.sh + - str_replace: + template: {get_file: ../../common/templates/kubernetes/helm/prometheus-operator.sh} + params: + "${GRAFANA_ADMIN_PASSWD}": {get_param: grafana_admin_passwd} + "${KUBE_MASTERS_PRIVATE}": {get_attr: [kube_masters, kube_master_external_ip]} + - get_file: ../../common/templates/kubernetes/helm/prometheus-adapter.sh + - get_file: ../../common/templates/kubernetes/helm/ingress-nginx.sh + - get_file: ../../common/templates/kubernetes/fragments/install-helm-modules.sh + + kube_cluster_deploy: + condition: create_cluster_resources + type: OS::Heat::SoftwareDeployment + properties: + actions: ['CREATE'] + signal_transport: HEAT_SIGNAL + config: + get_resource: kube_cluster_config + server: + get_attr: [kube_masters, resource.0] + + + ###################################################################### + # + # kubernetes minions. This is an resource group that will initially + # create minions, and needs to be manually scaled. + # + + kube_minions: + condition: worker_only + type: OS::Heat::ResourceGroup + depends_on: + - network + update_policy: + rolling_update: {max_batch_size: {get_param: update_max_batch_size}, pause_time: 30} + properties: + count: {get_param: number_of_minions} + removal_policies: [{resource_list: {get_param: minions_to_remove}}] + resource_def: + type: kubeminion.yaml + properties: + name: + list_join: + - '-' + - [{ get_param: 'OS::stack_name' }, 'node', '%index%'] + prometheus_monitoring: {get_param: prometheus_monitoring} + nodegroup_role: {get_param: worker_role} + nodegroup_name: {get_param: worker_nodegroup_name} + ssh_key_name: {get_param: ssh_key_name} + server_image: {get_param: minion_image} + minion_flavor: {get_param: minion_flavor} + fixed_network: + if: + - create_cluster_resources + - get_attr: [network, fixed_network] + - get_param: fixed_network + fixed_subnet: + if: + - create_cluster_resources + - get_attr: [network, fixed_subnet] + - get_param: fixed_subnet + network_driver: {get_param: network_driver} + flannel_network_cidr: {get_param: flannel_network_cidr} + kube_master_ip: + if: + - create_cluster_resources + - get_attr: [api_address_lb_switch, private_ip] + - get_param: existing_master_private_ip + etcd_server_ip: + if: + - create_cluster_resources + - get_attr: [etcd_address_lb_switch, private_ip] + - get_param: existing_master_private_ip + external_network: {get_param: external_network} + kube_allow_priv: {get_param: kube_allow_priv} + boot_volume_size: {get_param: boot_volume_size} + boot_volume_type: {get_param: boot_volume_type} + docker_volume_size: {get_param: docker_volume_size} + docker_volume_type: {get_param: docker_volume_type} + docker_storage_driver: {get_param: docker_storage_driver} + cgroup_driver: {get_param: cgroup_driver} + wait_condition_timeout: {get_param: wait_condition_timeout} + registry_enabled: {get_param: registry_enabled} + registry_port: {get_param: registry_port} + swift_region: {get_param: swift_region} + registry_container: {get_param: registry_container} + registry_insecure: {get_param: registry_insecure} + registry_chunksize: {get_param: registry_chunksize} + cluster_uuid: {get_param: cluster_uuid} + magnum_url: {get_param: magnum_url} + volume_driver: {get_param: volume_driver} + region_name: {get_param: region_name} + auth_url: {get_param: auth_url} + username: {get_param: username} + password: {get_param: password} + kubernetes_port: {get_param: kubernetes_port} + tls_disabled: {get_param: tls_disabled} + verify_ca: {get_param: verify_ca} + secgroup_kube_minion_id: + if: + - create_cluster_resources + - get_resource: secgroup_kube_minion + - get_param: existing_security_group + http_proxy: {get_param: http_proxy} + https_proxy: {get_param: https_proxy} + no_proxy: {get_param: no_proxy} + hyperkube_prefix: {get_param: hyperkube_prefix} + kube_tag: {get_param: minion_kube_tag} + kube_version: {get_param: kube_version} + trustee_user_id: {get_param: trustee_user_id} + trustee_username: {get_param: trustee_username} + trustee_password: {get_param: trustee_password} + trustee_domain_id: {get_param: trustee_domain_id} + trust_id: {get_param: trust_id} + cloud_provider_enabled: {get_param: cloud_provider_enabled} + insecure_registry_url: {get_param: insecure_registry_url} + container_infra_prefix: {get_param: container_infra_prefix} + dns_service_ip: {get_param: dns_service_ip} + dns_cluster_domain: {get_param: dns_cluster_domain} + openstack_ca: {get_param: openstack_ca} + nodes_server_group_id: {get_resource: worker_nodes_server_group} + availability_zone: {get_param: availability_zone} + pods_network_cidr: {get_param: pods_network_cidr} + kubelet_options: {get_param: kubelet_options} + kubeproxy_options: {get_param: kubeproxy_options} + octavia_enabled: {get_param: octavia_enabled} + heat_container_agent_tag: {get_param: heat_container_agent_tag} + auto_healing_enabled: {get_param: auto_healing_enabled} + npd_enabled: {get_param: npd_enabled} + auto_healing_controller: {get_param: auto_healing_controller} + ostree_remote: {get_param: ostree_remote} + ostree_commit: {get_param: ostree_commit} + use_podman: {get_param: use_podman} + selinux_mode: {get_param: selinux_mode} + container_runtime: {get_param: container_runtime} + containerd_version: {get_param: containerd_version} + containerd_tarball_url: {get_param: containerd_tarball_url} + containerd_tarball_sha256: {get_param: containerd_tarball_sha256} + +outputs: + + api_address: + condition: create_cluster_resources + value: + str_replace: + template: api_ip_address + params: + api_ip_address: {get_attr: [api_address_floating_switch, ip_address]} + description: > + This is the API endpoint of the Kubernetes cluster. Use this to access + the Kubernetes API. + + registry_address: + condition: create_cluster_resources + value: + str_replace: + template: localhost:port + params: + port: {get_param: registry_port} + description: + This is the url of docker registry server where you can store docker + images. + + kube_masters_private: + condition: master_only + value: {get_attr: [kube_masters, kube_master_ip]} + description: > + This is a list of the "private" IP addresses of all the Kubernetes masters. + + kube_masters: + condition: master_only + value: {get_attr: [kube_masters, kube_master_external_ip]} + description: > + This is a list of the "public" IP addresses of all the Kubernetes masters. + Use these IP addresses to log in to the Kubernetes masters via ssh. + + kube_minions_private: + condition: worker_only + value: {get_attr: [kube_minions, kube_minion_ip]} + description: > + This is a list of the "private" IP addresses of all the Kubernetes minions. + + kube_minions: + condition: worker_only + value: {get_attr: [kube_minions, kube_minion_external_ip]} + description: > + This is a list of the "public" IP addresses of all the Kubernetes minions. + Use these IP addresses to log in to the Kubernetes minions via ssh. diff --git a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml index 01e382527f..2de5e27acd 100644 --- a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml @@ -773,8 +773,8 @@ parameters: prometheus_operator_chart_tag: type: string - description: The stable/prometheus-operator chart version to use. - default: v8.12.13 + description: The prometheus-community/kube-prometheus-stack chart version to use. + default: 33.0.0 prometheus_adapter_enabled: type: boolean @@ -783,8 +783,8 @@ parameters: prometheus_adapter_chart_tag: type: string - description: The stable/prometheus-adapter chart version to use. - default: 1.4.0 + description: The prometheus-community/prometheus-adapter chart version to use. + default: 3.0.2 prometheus_adapter_configmap: type: string @@ -1117,6 +1117,10 @@ resources: - protocol: udp port_range_min: 8472 port_range_max: 8472 + # Prometheus Server + - protocol: tcp + port_range_min: 9090 + port_range_max: 9090 secgroup_kube_minion: condition: create_cluster_resources diff --git a/releasenotes/notes/update-monitoring-charts-1067dc4a0f0060b6.yaml b/releasenotes/notes/update-monitoring-charts-1067dc4a0f0060b6.yaml new file mode 100644 index 0000000000..174f6b8a3b --- /dev/null +++ b/releasenotes/notes/update-monitoring-charts-1067dc4a0f0060b6.yaml @@ -0,0 +1,10 @@ +--- +upgrade: + - Prometheus-Adapter helm chart updated to 2.12.1 from 1.4.0. + - Prometheus-Operator helm chart updated to kube-prometheus-stack:17.2.0 + from prometheus-operator:v8.12.13. + - Prometheus-server now runs only on master nodes + +deprecations: + - Enabling monitoring using the prometheus_monitoring label is deprecated + and will be removed in the X cycle.