Skip to content

Commit

Permalink
Take feedback and improve monitoring handling for autopilot clusters
Browse files Browse the repository at this point in the history
  • Loading branch information
IIBenII authored and bberriot committed Jan 3, 2023
1 parent 0ac5cf7 commit 2c649a8
Show file tree
Hide file tree
Showing 27 changed files with 46 additions and 135 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ Then perform the following commands on the root folder:
| cluster\_dns\_scope | The scope of access to cluster DNS records. DNS\_SCOPE\_UNSPECIFIED (default) or CLUSTER\_SCOPE or VPC\_SCOPE. | `string` | `"DNS_SCOPE_UNSPECIFIED"` | no |
| cluster\_ipv4\_cidr | The IP address range of the kubernetes pods in this cluster. Default is an automatically assigned CIDR. | `string` | `null` | no |
| cluster\_resource\_labels | The GCE resource labels (a map of key/value pairs) to be applied to the cluster | `map(string)` | `{}` | no |
| cluster\_telemetry\_type | Available options include ENABLED, DISABLED, and SYSTEM\_ONLY | `string` | `null` | no |
| configure\_ip\_masq | Enables the installation of ip masquerading, which is usually no longer required when using aliasied IP addresses. IP masquerading uses a kubectl call, so when you have a private cluster, you will need access to the API server. | `bool` | `false` | no |
| create\_service\_account | Defines if service account specified to run nodes should be created. | `bool` | `true` | no |
| database\_encryption | Application-layer Secrets Encryption settings. The object format is {state = string, key\_name = string}. Valid values of state are: "ENCRYPTED"; "DECRYPTED". key\_name is the name of a CloudKMS key. | `list(object({ state = string, key_name = string }))` | <pre>[<br> {<br> "key_name": "",<br> "state": "DECRYPTED"<br> }<br>]</pre> | no |
Expand Down
10 changes: 9 additions & 1 deletion autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,25 @@ resource "google_container_cluster" "primary" {
}
}
{% endif %}
{% if autopilot_cluster != true %}
# only one of logging/monitoring_service or logging/monitoring_config can be specified
{% if beta_cluster %}
logging_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.logging_service
{% else %}
logging_service = local.logmon_config_is_set ? null : var.logging_service
{% endif %}
dynamic "logging_config" {
for_each = length(var.logging_enabled_components) > 0 ? [1] : []

content {
enable_components = var.logging_enabled_components
}
}
{% if beta_cluster %}
monitoring_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.monitoring_service
{% else %}
monitoring_service = local.logmon_config_is_set ? null : var.monitoring_service
{% endif %}
dynamic "monitoring_config" {
for_each = length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus ? [1] : []

Expand All @@ -109,7 +118,6 @@ resource "google_container_cluster" "primary" {
}
}
}
{% if autopilot_cluster != true %}
cluster_autoscaling {
enabled = var.cluster_autoscaling.enabled
dynamic "auto_provisioning_defaults" {
Expand Down
7 changes: 5 additions & 2 deletions autogen/main/main.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ locals {
cluster_cloudrun_enabled = var.cloudrun
gke_backup_agent_config = var.gke_backup_agent_config ? [{ enabled = true }] : [{ enabled = false }]
{% endif %}
logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus

{% if autopilot_cluster != true %}
logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus
{% endif %}

cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down Expand Up @@ -223,10 +226,10 @@ locals {
cluster_pod_security_policy_enabled = local.cluster_output_pod_security_policy_enabled
cluster_intranode_visibility_enabled = local.cluster_output_intranode_visbility_enabled
confidential_node_config = var.enable_confidential_nodes == true ? [{ enabled = true }] : []
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

# /BETA features
{% endif %}
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

cluster_maintenance_window_is_recurring = var.maintenance_recurrence != "" && var.maintenance_end_time != "" ? [1] : []
cluster_maintenance_window_is_daily = length(local.cluster_maintenance_window_is_recurring) > 0 ? [] : [1]
Expand Down
4 changes: 4 additions & 0 deletions autogen/main/variables.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,13 @@ variable "configure_ip_masq" {
default = false
}

{% if beta_cluster %}
variable "cluster_telemetry_type" {
type = string
description = "Available options include ENABLED, DISABLED, and SYSTEM_ONLY"
default = null
}
{% endif %}

variable "logging_service" {
type = string
Expand Down Expand Up @@ -644,6 +646,7 @@ variable "timeouts" {
}
}

{% if autopilot_cluster != true %}
variable "monitoring_enable_managed_prometheus" {
type = bool
description = "Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled."
Expand All @@ -661,6 +664,7 @@ variable "logging_enabled_components" {
description = "List of services to monitor: SYSTEM_COMPONENTS, WORKLOADS. Empty list is default GKE configuration."
default = []
}
{% endif %}

{% if beta_cluster %}
{% if autopilot_cluster != true %}
Expand Down
4 changes: 2 additions & 2 deletions cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ resource "google_container_cluster" "primary" {
min_master_version = var.release_channel == null || var.release_channel == "UNSPECIFIED" ? local.master_version : null

# only one of logging/monitoring_service or logging/monitoring_config can be specified
logging_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.logging_service
logging_service = local.logmon_config_is_set ? null : var.logging_service
dynamic "logging_config" {
for_each = length(var.logging_enabled_components) > 0 ? [1] : []

content {
enable_components = var.logging_enabled_components
}
}
monitoring_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.monitoring_service
monitoring_service = local.logmon_config_is_set ? null : var.monitoring_service
dynamic "monitoring_config" {
for_each = length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus ? [1] : []

Expand Down
4 changes: 2 additions & 2 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ locals {
provider = null
}]
cluster_gce_pd_csi_config = var.gce_pd_csi_driver ? [{ enabled = true }] : [{ enabled = false }]
logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus

logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus

cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down Expand Up @@ -156,7 +157,6 @@ locals {
cluster_workload_identity_config = !local.workload_identity_enabled ? [] : var.identity_namespace == "enabled" ? [{
workload_pool = "${var.project_id}.svc.id.goog" }] : [{ workload_pool = var.identity_namespace
}]
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

cluster_maintenance_window_is_recurring = var.maintenance_recurrence != "" && var.maintenance_end_time != "" ? [1] : []
cluster_maintenance_window_is_daily = length(local.cluster_maintenance_window_is_recurring) > 0 ? [] : [1]
Expand Down
3 changes: 0 additions & 3 deletions modules/beta-autopilot-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ Then perform the following commands on the root folder:
| ip\_range\_services | The _name_ of the secondary subnet range to use for services | `string` | n/a | yes |
| issue\_client\_certificate | Issues a client certificate to authenticate to the cluster endpoint. To maximize the security of your cluster, leave this option disabled. Client certificates don't automatically rotate and aren't easily revocable. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| kubernetes\_version | The Kubernetes version of the masters. If set to 'latest' it will pull latest available version in the selected region. | `string` | `"latest"` | no |
| logging\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS. Empty list is default GKE configuration. | `list(string)` | `[]` | no |
| logging\_service | The logging service that the cluster should write logs to. Available options include logging.googleapis.com, logging.googleapis.com/kubernetes (beta), and none | `string` | `"logging.googleapis.com/kubernetes"` | no |
| maintenance\_end\_time | Time window specified for recurring maintenance operations in RFC3339 format | `string` | `""` | no |
| maintenance\_exclusions | List of maintenance exclusions. A cluster can have up to three | `list(object({ name = string, start_time = string, end_time = string, exclusion_scope = string }))` | `[]` | no |
Expand All @@ -111,8 +110,6 @@ Then perform the following commands on the root folder:
| master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no |
| master\_global\_access\_enabled | Whether the cluster master is accessible globally (from any region) or only within the same region as the private endpoint. | `bool` | `true` | no |
| master\_ipv4\_cidr\_block | (Beta) The IP range in CIDR notation to use for the hosted master network | `string` | `"10.0.0.0/28"` | no |
| monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no |
| monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no |
| monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no |
| name | The name of the cluster (required) | `string` | n/a | yes |
| network | The VPC network to host the cluster in (required) | `string` | n/a | yes |
Expand Down
25 changes: 0 additions & 25 deletions modules/beta-autopilot-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,6 @@ resource "google_container_cluster" "primary" {

min_master_version = var.release_channel == null || var.release_channel == "UNSPECIFIED" ? local.master_version : null

# only one of logging/monitoring_service or logging/monitoring_config can be specified
logging_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.logging_service
dynamic "logging_config" {
for_each = length(var.logging_enabled_components) > 0 ? [1] : []

content {
enable_components = var.logging_enabled_components
}
}
monitoring_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.monitoring_service
dynamic "monitoring_config" {
for_each = length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus ? [1] : []

content {
enable_components = length(var.monitoring_enabled_components) > 0 ? var.monitoring_enabled_components : []

dynamic "managed_prometheus" {
for_each = var.monitoring_enable_managed_prometheus ? [1] : []

content {
enabled = var.monitoring_enable_managed_prometheus
}
}
}
}
cluster_autoscaling {
dynamic "auto_provisioning_defaults" {
for_each = var.create_service_account ? [1] : []
Expand Down
4 changes: 2 additions & 2 deletions modules/beta-autopilot-private-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ locals {
cluster_alias_ranges_cidr = var.add_cluster_firewall_rules ? { for range in toset(data.google_compute_subnetwork.gke_subnetwork[0].secondary_ip_range) : range.range_name => range.ip_cidr_range } : {}
pod_all_ip_ranges = var.add_cluster_firewall_rules ? [local.cluster_alias_ranges_cidr[var.ip_range_pods]] : []

logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus


cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down Expand Up @@ -130,9 +130,9 @@ locals {
cluster_pod_security_policy_enabled = local.cluster_output_pod_security_policy_enabled
cluster_intranode_visibility_enabled = local.cluster_output_intranode_visbility_enabled
confidential_node_config = var.enable_confidential_nodes == true ? [{ enabled = true }] : []
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

# /BETA features
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

cluster_maintenance_window_is_recurring = var.maintenance_recurrence != "" && var.maintenance_end_time != "" ? [1] : []
cluster_maintenance_window_is_daily = length(local.cluster_maintenance_window_is_recurring) > 0 ? [] : [1]
Expand Down
17 changes: 0 additions & 17 deletions modules/beta-autopilot-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -417,21 +417,4 @@ variable "timeouts" {
}
}

variable "monitoring_enable_managed_prometheus" {
type = bool
description = "Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled."
default = false
}

variable "monitoring_enabled_components" {
type = list(string)
description = "List of services to monitor: SYSTEM_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration."
default = []
}

variable "logging_enabled_components" {
type = list(string)
description = "List of services to monitor: SYSTEM_COMPONENTS, WORKLOADS. Empty list is default GKE configuration."
default = []
}

3 changes: 0 additions & 3 deletions modules/beta-autopilot-public-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,12 @@ Then perform the following commands on the root folder:
| ip\_range\_services | The _name_ of the secondary subnet range to use for services | `string` | n/a | yes |
| issue\_client\_certificate | Issues a client certificate to authenticate to the cluster endpoint. To maximize the security of your cluster, leave this option disabled. Client certificates don't automatically rotate and aren't easily revocable. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| kubernetes\_version | The Kubernetes version of the masters. If set to 'latest' it will pull latest available version in the selected region. | `string` | `"latest"` | no |
| logging\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS. Empty list is default GKE configuration. | `list(string)` | `[]` | no |
| logging\_service | The logging service that the cluster should write logs to. Available options include logging.googleapis.com, logging.googleapis.com/kubernetes (beta), and none | `string` | `"logging.googleapis.com/kubernetes"` | no |
| maintenance\_end\_time | Time window specified for recurring maintenance operations in RFC3339 format | `string` | `""` | no |
| maintenance\_exclusions | List of maintenance exclusions. A cluster can have up to three | `list(object({ name = string, start_time = string, end_time = string, exclusion_scope = string }))` | `[]` | no |
| maintenance\_recurrence | Frequency of the recurring maintenance window in RFC5545 format. | `string` | `""` | no |
| maintenance\_start\_time | Time window specified for daily or recurring maintenance operations in RFC3339 format | `string` | `"05:00"` | no |
| master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no |
| monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no |
| monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no |
| monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no |
| name | The name of the cluster (required) | `string` | n/a | yes |
| network | The VPC network to host the cluster in (required) | `string` | n/a | yes |
Expand Down
25 changes: 0 additions & 25 deletions modules/beta-autopilot-public-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,6 @@ resource "google_container_cluster" "primary" {

min_master_version = var.release_channel == null || var.release_channel == "UNSPECIFIED" ? local.master_version : null

# only one of logging/monitoring_service or logging/monitoring_config can be specified
logging_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.logging_service
dynamic "logging_config" {
for_each = length(var.logging_enabled_components) > 0 ? [1] : []

content {
enable_components = var.logging_enabled_components
}
}
monitoring_service = local.cluster_telemetry_type_is_set || local.logmon_config_is_set ? null : var.monitoring_service
dynamic "monitoring_config" {
for_each = length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus ? [1] : []

content {
enable_components = length(var.monitoring_enabled_components) > 0 ? var.monitoring_enabled_components : []

dynamic "managed_prometheus" {
for_each = var.monitoring_enable_managed_prometheus ? [1] : []

content {
enabled = var.monitoring_enable_managed_prometheus
}
}
}
}
cluster_autoscaling {
dynamic "auto_provisioning_defaults" {
for_each = var.create_service_account ? [1] : []
Expand Down
4 changes: 2 additions & 2 deletions modules/beta-autopilot-public-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ locals {
cluster_alias_ranges_cidr = var.add_cluster_firewall_rules ? { for range in toset(data.google_compute_subnetwork.gke_subnetwork[0].secondary_ip_range) : range.range_name => range.ip_cidr_range } : {}
pod_all_ip_ranges = var.add_cluster_firewall_rules ? [local.cluster_alias_ranges_cidr[var.ip_range_pods]] : []

logmon_config_is_set = length(var.logging_enabled_components) > 0 || length(var.monitoring_enabled_components) > 0 || var.monitoring_enable_managed_prometheus


cluster_authenticator_security_group = var.authenticator_security_group == null ? [] : [{
security_group = var.authenticator_security_group
Expand Down Expand Up @@ -129,9 +129,9 @@ locals {
cluster_pod_security_policy_enabled = local.cluster_output_pod_security_policy_enabled
cluster_intranode_visibility_enabled = local.cluster_output_intranode_visbility_enabled
confidential_node_config = var.enable_confidential_nodes == true ? [{ enabled = true }] : []
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

# /BETA features
cluster_telemetry_type_is_set = var.cluster_telemetry_type != null

cluster_maintenance_window_is_recurring = var.maintenance_recurrence != "" && var.maintenance_end_time != "" ? [1] : []
cluster_maintenance_window_is_daily = length(local.cluster_maintenance_window_is_recurring) > 0 ? [] : [1]
Expand Down
17 changes: 0 additions & 17 deletions modules/beta-autopilot-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -387,21 +387,4 @@ variable "timeouts" {
}
}

variable "monitoring_enable_managed_prometheus" {
type = bool
description = "Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled."
default = false
}

variable "monitoring_enabled_components" {
type = list(string)
description = "List of services to monitor: SYSTEM_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration."
default = []
}

variable "logging_enabled_components" {
type = list(string)
description = "List of services to monitor: SYSTEM_COMPONENTS, WORKLOADS. Empty list is default GKE configuration."
default = []
}

Loading

0 comments on commit 2c649a8

Please sign in to comment.