From 746f6e2b50cc6ae54f3b3b2ec793534739446856 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Thu, 13 Jun 2019 21:16:12 -0700 Subject: [PATCH 1/9] GKE terraform * use gcloud CLI for service account provisioning * provide option to use small instances or prod instances * local-exec scripts: add set -e and make idempotent --- deploy/gcp/.gitignore | 1 + deploy/gcp/README.md | 78 ++++++++++++------- deploy/gcp/data.tf | 6 +- deploy/gcp/main.tf | 65 ++++++++++------ deploy/gcp/prod.tfvars | 5 ++ deploy/gcp/small.tfvars | 3 + .../templates/tidb-cluster-values.yaml.tpl | 2 +- deploy/gcp/variables.tf | 20 +++-- 8 files changed, 111 insertions(+), 69 deletions(-) create mode 100644 deploy/gcp/prod.tfvars create mode 100644 deploy/gcp/small.tfvars diff --git a/deploy/gcp/.gitignore b/deploy/gcp/.gitignore index 955562c1e4..4c7f1937b0 100644 --- a/deploy/gcp/.gitignore +++ b/deploy/gcp/.gitignore @@ -2,3 +2,4 @@ *.tfstate* credentials rendered +terraform-key.json diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md index 2ff61c067c..7745d036d7 100644 --- a/deploy/gcp/README.md +++ b/deploy/gcp/README.md @@ -36,28 +36,52 @@ gcloud services enable container.googleapis.com The terraform script expects three environment variables. You can let Terraform prompt you for them, or `export` them in the `~/.bash_profile` file ahead of time. The required environment variables are: -* `TF_VAR_GCP_CREDENTIALS_PATH`: Path to a valid GCP credentials file. - - It is recommended to create a new service account to be used by Terraform. See [this page](https://cloud.google.com/iam/docs/creating-managing-service-accounts) to create a service account and grant `Project Editor` role to it. - - See [this page](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) to create service account keys, and choose `JSON` key type during creation. The downloaded `JSON` file that contains the private key is the credentials file you need. * `TF_VAR_GCP_REGION`: The region to create the resources in, for example: `us-west1`. * `TF_VAR_GCP_PROJECT`: The name of the GCP project. - -> *Note*: The service account must have sufficient permissions to create resources in the project. The `Project Editor` primitive will accomplish this. - -To set the three environment variables, for example, you can enter in your terminal: +* `TF_VAR_GCP_CREDENTIALS_PATH`: Path to a valid GCP credentials file. + - It is recommended to create a new service account to be used by Terraform as shown in the below example. ```bash -# Replace the values with the path to the JSON file you have downloaded, the GCP region and your GCP project name. -export TF_VAR_GCP_CREDENTIALS_PATH="/Path/to/my-project.json" -export TF_VAR_GCP_REGION="us-west1" -export TF_VAR_GCP_PROJECT="my-project" +# Replace the region with your GCP region and your GCP project name. +export TF_VAR_GCP_REGION=us-west1 +# First make sure you are connected to the correct project. gcloud config set project $PROJECT +export TF_VAR_GCP_PROJECT=$(gcloud config get-value project) +gcloud iam service-accounts create --display-name terraform terraform +email="terraform@${TF_VAR_GCP_PROJECT}.iam.gserviceaccount.com" +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/container.clusterAdmin +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.networkAdmin +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.viewer +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.securityAdmin +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/iam.serviceAccountUser +gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.instanceAdmin.v1 + +gcloud iam service-accounts keys create terraform-key.json --iam-account "$email" +export TF_VAR_GCP_CREDENTIALS_PATH="$(pwd)/terraform-key.json" ``` You can also append them in your `~/.bash_profile` so they will be exported automatically next time. + ## Deploy -The default setup creates a new VPC, two subnetworks, and an f1-micro instance as a bastion machine. The GKE cluster is created with the following instance types as worker nodes: + +Now that you have configured gcloud access, make sure you have a copy of the repo: + +```bash +git clone --depth=1 https://github.com/pingcap/tidb-operator +cd tidb-operator/deploy/gcp +``` + +You need to decide on instance types. If you just want to just get a feel for a TiDB deployment and lower your cost, you can use the small settings. + + cp small.tfvars terraform.tfvars + +If you want to benchmark a production deployment, run: + + cp prod.tfvars terraform.tfvars + +The terraform creates a new VPC, two subnetworks, and an f1-micro instance as a bastion machine. +The production setup used the following instance types: * 3 n1-standard-4 instances for PD * 3 n1-highmem-8 instances for TiKV @@ -66,13 +90,11 @@ The default setup creates a new VPC, two subnetworks, and an f1-micro instance a > *Note*: The number of nodes created depends on how many availability zones there are in the chosen region. Most have 3 zones, but us-central1 has 4. See [Regions and Zones](https://cloud.google.com/compute/docs/regions-zones/) for more information and see the [Customize](#customize) section on how to customize node pools in a regional cluster. -The default setup, as listed above, requires at least 91 CPUs which exceed the default CPU quota of a GCP project. To increase your project's quota, follow the instructions [here](https://cloud.google.com/compute/quotas). You need more CPUs if you need to scale out. +The production setup, as listed above, requires at least 91 CPUs which exceed the default CPU quota of a GCP project. To increase your project's quota, follow the instructions [here](https://cloud.google.com/compute/quotas). You need more CPUs if you need to scale out. -Now that you have configured everything needed, you can launch the script to deploy the TiDB cluster: +Once you choose your instances, you can install your TiDB cluster with: ```bash -git clone --depth=1 https://github.com/pingcap/tidb-operator -cd tidb-operator/deploy/gcp terraform init terraform apply ``` @@ -86,11 +108,11 @@ Apply complete! Resources: 17 added, 0 changed, 0 destroyed. Outputs: -cluster_id = my-cluster -cluster_name = my-cluster +cluster_id = tidb +cluster_name = tidb how_to_connect_to_mysql_from_bastion = mysql -h 172.31.252.20 -P 4000 -u root how_to_ssh_to_bastion = gcloud compute ssh bastion --zone us-west1-b -kubeconfig_file = ./credentials/kubeconfig_my-cluster +kubeconfig_file = ./credentials/kubeconfig_tidb monitor_ilb_ip = 35.227.134.146 monitor_port = 3000 region = us-west1 @@ -113,7 +135,7 @@ mysql -h -P 4000 -u root ## Interact with the cluster -You can interact with the cluster using `kubectl` and `helm` with the kubeconfig file `credentials/kubeconfig_` as follows. The default `cluster_name` is `my-cluster`, which can be changed in `variables.tf`. +You can interact with the cluster using `kubectl` and `helm` with the kubeconfig file `credentials/kubeconfig_` as follows. The default `cluster_name` is `tidb`, which can be changed in `variables.tf`. ```bash # By specifying --kubeconfig argument. @@ -178,7 +200,7 @@ You can change default values in `variables.tf` (such as the cluster name and th ### Customize GCP resources -GCP allows attaching a local SSD to any instance type that is `n1-standard-1` or greater. This allows for good customizability. +GCP allows attaching a local SSD to any instance type that is `n1-standard-1` or greater. ### Customize TiDB parameters @@ -199,9 +221,9 @@ gcloud compute instance-groups managed list | grep monitor And the result will be something like this: ```bash -gke-my-cluster-monitor-pool-08578e18-grp us-west1-b zone gke-my-cluster-monitor-pool-08578e18 0 0 gke-my-cluster-monitor-pool-08578e18 no -gke-my-cluster-monitor-pool-7e31100f-grp us-west1-c zone gke-my-cluster-monitor-pool-7e31100f 1 1 gke-my-cluster-monitor-pool-7e31100f no -gke-my-cluster-monitor-pool-78a961e5-grp us-west1-a zone gke-my-cluster-monitor-pool-78a961e5 1 1 gke-my-cluster-monitor-pool-78a961e5 no +gke-tidb-monitor-pool-08578e18-grp us-west1-b zone gke-tidb-monitor-pool-08578e18 0 0 gke-tidb-monitor-pool-08578e18 no +gke-tidb-monitor-pool-7e31100f-grp us-west1-c zone gke-tidb-monitor-pool-7e31100f 1 1 gke-tidb-monitor-pool-7e31100f no +gke-tidb-monitor-pool-78a961e5-grp us-west1-a zone gke-tidb-monitor-pool-78a961e5 1 1 gke-tidb-monitor-pool-78a961e5 no ``` The first column is the name of the managed instance group, and the second column is the zone in which it was created. You also need the name of the instance in that group, and you can get it by running: @@ -213,16 +235,16 @@ gcloud compute instance-groups managed list-instances *Note*: When `terraform destroy` is running, an error with the following message might occur: `Error reading Container Cluster "my-cluster": Cluster "my-cluster" has status "RECONCILING" with message""`. This happens when GCP is upgrading the kubernetes master node, which it does automatically at times. While this is happening, it is not possible to delete the cluster. When it is done, run `terraform destroy` again. +> *Note*: When `terraform destroy` is running, an error with the following message might occur: `Error reading Container Cluster "tidb": Cluster "tidb" has status "RECONCILING" with message""`. This happens when GCP is upgrading the kubernetes master node, which it does automatically at times. While this is happening, it is not possible to delete the cluster. When it is done, run `terraform destroy` again. ## More information diff --git a/deploy/gcp/data.tf b/deploy/gcp/data.tf index 49b04c47f4..3eda2d6872 100644 --- a/deploy/gcp/data.tf +++ b/deploy/gcp/data.tf @@ -7,13 +7,11 @@ data "template_file" "tidb_cluster_values" { tikv_replicas = var.tikv_replica_count tidb_replicas = var.tidb_replica_count operator_version = var.tidb_operator_version + tidb_operator_registry = var.tidb_operator_registry } } -data external "available_zones_in_region" { - depends_on = [null_resource.prepare-dir] - program = ["bash", "-c", "gcloud compute regions describe ${var.GCP_REGION} --format=json | jq '{zone: .zones|.[0]|match(\"[^/]*$\"; \"g\")|.string}'"] -} +data "google_compute_zones" "available" { } data "external" "tidb_ilb_ip" { depends_on = [null_resource.deploy-tidb-cluster] diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index 6c7677e08c..95fb11592f 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -254,7 +254,7 @@ resource "google_compute_firewall" "allow_ssh_from_bastion" { resource "google_compute_instance" "bastion" { project = var.GCP_PROJECT - zone = data.external.available_zones_in_region.result["zone"] + zone = data.google_compute_zones.available.names[0] machine_type = var.bastion_instance_type name = "bastion" @@ -308,62 +308,77 @@ resource "null_resource" "setup-env" { depends_on = [ google_container_cluster.cluster, null_resource.get-credentials, + var.tidb_operator_registry, + var.tidb_operator_version, ] provisioner "local-exec" { working_dir = path.module + interpreter = ["bash", "-c"] command = </dev/null; then + kubectl create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin --user $(gcloud config get-value account) +fi + +if ! kubectl get serviceaccount -n kube-system tiller 2>/dev/null ; then + kubectl create serviceaccount --namespace kube-system tiller +fi + kubectl apply -f manifests/crd.yaml kubectl apply -k manifests/local-ssd kubectl apply -f manifests/gke/persistent-disk.yaml kubectl apply -f manifests/tiller-rbac.yaml + helm init --service-account tiller --upgrade --wait until helm ls; do echo "Wait until tiller is ready" done -helm install --namespace tidb-admin --name tidb-operator ${path.module}/charts/tidb-operator +helm upgrade --install tidb-operator --namespace tidb-admin ${path.module}/charts/tidb-operator --set operatorImage=${var.tidb_operator_registry}/tidb-operator:${var.tidb_operator_version} EOS -environment = { -KUBECONFIG = local.kubeconfig -} -} + environment = { + KUBECONFIG = local.kubeconfig + } + } } resource "null_resource" "deploy-tidb-cluster" { -depends_on = [ -null_resource.setup-env, -local_file.tidb-cluster-values, -google_container_node_pool.pd_pool, -google_container_node_pool.tikv_pool, -google_container_node_pool.tidb_pool, -] - -triggers = { -values = data.template_file.tidb_cluster_values.rendered -} + depends_on = [ + null_resource.setup-env, + local_file.tidb-cluster-values, + google_container_node_pool.pd_pool, + google_container_node_pool.tikv_pool, + google_container_node_pool.tidb_pool, + ] -provisioner "local-exec" { + triggers = { + values = data.template_file.tidb_cluster_values.rendered + } + + provisioner "local-exec" { + interpreter = ["bash", "-c"] command = < Date: Sun, 16 Jun 2019 09:45:05 -0700 Subject: [PATCH 2/9] use a separate shell script for creating a service account --- deploy/gcp/.gitignore | 1 + deploy/gcp/README.md | 28 +++++++++------------------- deploy/gcp/create-service-account.sh | 27 +++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 19 deletions(-) create mode 100755 deploy/gcp/create-service-account.sh diff --git a/deploy/gcp/.gitignore b/deploy/gcp/.gitignore index 4c7f1937b0..2db6c700ed 100644 --- a/deploy/gcp/.gitignore +++ b/deploy/gcp/.gitignore @@ -3,3 +3,4 @@ credentials rendered terraform-key.json +credentials.auto.tfvars diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md index 7745d036d7..44ff4f7643 100644 --- a/deploy/gcp/README.md +++ b/deploy/gcp/README.md @@ -34,34 +34,24 @@ gcloud services enable container.googleapis.com ### Configure Terraform -The terraform script expects three environment variables. You can let Terraform prompt you for them, or `export` them in the `~/.bash_profile` file ahead of time. The required environment variables are: +The terraform script expects three variables to be set. * `TF_VAR_GCP_REGION`: The region to create the resources in, for example: `us-west1`. * `TF_VAR_GCP_PROJECT`: The name of the GCP project. * `TF_VAR_GCP_CREDENTIALS_PATH`: Path to a valid GCP credentials file. - It is recommended to create a new service account to be used by Terraform as shown in the below example. +Below we will set these environment variables + ```bash # Replace the region with your GCP region and your GCP project name. -export TF_VAR_GCP_REGION=us-west1 +echo GCP_REGION=us-west1 >> terraform.tfvars # First make sure you are connected to the correct project. gcloud config set project $PROJECT -export TF_VAR_GCP_PROJECT=$(gcloud config get-value project) -gcloud iam service-accounts create --display-name terraform terraform -email="terraform@${TF_VAR_GCP_PROJECT}.iam.gserviceaccount.com" -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/container.clusterAdmin -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.networkAdmin -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.viewer -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.securityAdmin -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/iam.serviceAccountUser -gcloud projects add-iam-policy-binding "$TF_VAR_GCP_PROJECT" --member "$email" --role roles/compute.instanceAdmin.v1 - -gcloud iam service-accounts keys create terraform-key.json --iam-account "$email" -export TF_VAR_GCP_CREDENTIALS_PATH="$(pwd)/terraform-key.json" +echo "GCP_PROJECT=$(gcloud config get-value project)" >> terraform.tfvars +# Create a service account for terraform with restricted permissions and set the credentails path +./create-service-account.sh ``` -You can also append them in your `~/.bash_profile` so they will be exported automatically next time. - - ## Deploy @@ -74,11 +64,11 @@ cd tidb-operator/deploy/gcp You need to decide on instance types. If you just want to just get a feel for a TiDB deployment and lower your cost, you can use the small settings. - cp small.tfvars terraform.tfvars + cat small.tfvars >> terraform.tfvars If you want to benchmark a production deployment, run: - cp prod.tfvars terraform.tfvars + cat prod.tfvars >> terraform.tfvars The terraform creates a new VPC, two subnetworks, and an f1-micro instance as a bastion machine. The production setup used the following instance types: diff --git a/deploy/gcp/create-service-account.sh b/deploy/gcp/create-service-account.sh new file mode 100755 index 0000000000..d0ca3223ee --- /dev/null +++ b/deploy/gcp/create-service-account.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euo pipefail +cd "$(dirname "$0")" +PROJECT="${TF_VAR_GCP_PROJECT:-$(cat terraform.tfvars | awk -F '=' '/GCP_PROJECT/ {print $2}' | cut -d '"' -f 2)}" +echo "$PROJECT" + +cred_file=credentials.auto.tfvars +if test -f "$cred_file" ; then + if cat "$cred_file" | awk -F'=' '/GCP_CREDENTIALS/ {print $2}' >/dev/null ; then + echo "GCP_CREDENTAILS_PATH already set in $cred_file" + exit 1 + fi +fi + +gcloud iam service-accounts create --display-name terraform terraform +email="terraform@${PROJECT}.iam.gserviceaccount.com" +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/container.clusterAdmin +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/compute.networkAdmin +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/compute.viewer +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/compute.securityAdmin +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/iam.serviceAccountUser +gcloud projects add-iam-policy-binding "$PROJECT" --member "$email" --role roles/compute.instanceAdmin.v1 + +mkdir -p credentials +gcloud iam service-accounts keys create credentials/terraform-key.json --iam-account "$email" +echo GCP_CREDENTIALS_PATH="$(pwd)/credentials/terraform-key.json" > "$cred_file" From 18cd912586c50e24cee5d04e9a7f96a2e008fafc Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Mon, 17 Jun 2019 09:23:34 -0700 Subject: [PATCH 3/9] Apply suggestions from code review Co-Authored-By: Tennix --- deploy/gcp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md index 44ff4f7643..200c777205 100644 --- a/deploy/gcp/README.md +++ b/deploy/gcp/README.md @@ -62,7 +62,7 @@ git clone --depth=1 https://github.com/pingcap/tidb-operator cd tidb-operator/deploy/gcp ``` -You need to decide on instance types. If you just want to just get a feel for a TiDB deployment and lower your cost, you can use the small settings. +You need to decide on instance types. If you just want to get a feel for a TiDB deployment and lower your cost, you can use the small settings. cat small.tfvars >> terraform.tfvars From 48a9df56ae07b0d1c86c191d11cbc1aa5db38002 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Mon, 17 Jun 2019 16:05:24 -0700 Subject: [PATCH 4/9] fix accidental commit of test data --- deploy/gcp/prod.tfvars | 2 -- 1 file changed, 2 deletions(-) diff --git a/deploy/gcp/prod.tfvars b/deploy/gcp/prod.tfvars index 6e26756bab..2345772918 100644 --- a/deploy/gcp/prod.tfvars +++ b/deploy/gcp/prod.tfvars @@ -1,5 +1,3 @@ -tidb_operator_version = "v1.0.0-beta.3.start-immediate" -tidb_operator_registry = "gcr.io/pingcap-tidb-alpha" pd_instance_type = "n1-standard-4" tikv_instance_type = "n1-highmem-8" tidb_instance_type = "n1-standard-16" \ No newline at end of file From 23d307118a0b71c8f73e9b642a9f678cbe25f782 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Mon, 17 Jun 2019 18:49:42 -0700 Subject: [PATCH 5/9] local volume provisioner install script improvements add DEBIAN_FRONTEND=noninteractive this is still needed even when using -y add dpkg configure -a which can be needed --- manifests/gke/local-ssd-provision/local-ssd-provision.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manifests/gke/local-ssd-provision/local-ssd-provision.yaml b/manifests/gke/local-ssd-provision/local-ssd-provision.yaml index 1712131d7a..9d197a4c21 100644 --- a/manifests/gke/local-ssd-provision/local-ssd-provision.yaml +++ b/manifests/gke/local-ssd-provision/local-ssd-provision.yaml @@ -57,7 +57,9 @@ spec: set -x # Install the linux guest environment tools + export DEBIAN_FRONTEND=noninteractive cat /etc/apt/sources.list + dpkg --configure -a apt-get update apt-get install -y software-properties-common || echo "could not install software-properties-common" apt-add-repository universe From 220631211212a67a58a39a5899d2bfd1d074eeb8 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Mon, 17 Jun 2019 21:23:56 -0700 Subject: [PATCH 6/9] don't auto-upgrade/repair --- deploy/gcp/main.tf | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index 95fb11592f..074009f0f8 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -118,6 +118,11 @@ resource "google_container_node_pool" "pd_pool" { name = "pd-pool" initial_node_count = var.pd_count + management { + auto_repair = false + auto_upgrade = false + } + node_config { machine_type = var.pd_instance_type local_ssd_count = 0 @@ -146,6 +151,11 @@ resource "google_container_node_pool" "tikv_pool" { name = "tikv-pool" initial_node_count = var.tikv_count + management { + auto_repair = false + auto_upgrade = false + } + node_config { machine_type = var.tikv_instance_type image_type = "UBUNTU" @@ -177,6 +187,11 @@ resource "google_container_node_pool" "tidb_pool" { name = "tidb-pool" initial_node_count = var.tidb_count + management { + auto_repair = false + auto_upgrade = false + } + node_config { machine_type = var.tidb_instance_type @@ -203,6 +218,11 @@ resource "google_container_node_pool" "monitor_pool" { name = "monitor-pool" initial_node_count = var.monitor_count + management { + auto_repair = false + auto_upgrade = false + } + node_config { machine_type = var.monitor_instance_type tags = ["monitor"] From a5e8f3f889d5335796871220aa0bffebcf7857eb Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Tue, 18 Jun 2019 10:24:24 -0700 Subject: [PATCH 7/9] create the TiKV pool first The local volume provisioner startup can take some time --- deploy/gcp/main.tf | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index 074009f0f8..4ccffbec31 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -110,7 +110,8 @@ resource "google_container_cluster" "cluster" { } resource "google_container_node_pool" "pd_pool" { - depends_on = [google_container_cluster.cluster] + // The monitor pool is where tiller must first be deployed to. + depends_on = [google_container_node_pool.monitor_pool] provider = google-beta project = var.GCP_PROJECT cluster = google_container_cluster.cluster.name @@ -143,7 +144,6 @@ resource "google_container_node_pool" "pd_pool" { } resource "google_container_node_pool" "tikv_pool" { - depends_on = [google_container_node_pool.pd_pool] provider = google-beta project = var.GCP_PROJECT cluster = google_container_cluster.cluster.name @@ -179,7 +179,8 @@ resource "google_container_node_pool" "tikv_pool" { } resource "google_container_node_pool" "tidb_pool" { - depends_on = [google_container_node_pool.tikv_pool] + // The pool order is monitor -> tikv -> pd -> tidb + depends_on = [google_container_node_pool.pd_pool] provider = google-beta project = var.GCP_PROJECT cluster = google_container_cluster.cluster.name @@ -211,7 +212,9 @@ resource "google_container_node_pool" "tidb_pool" { } resource "google_container_node_pool" "monitor_pool" { - depends_on = [google_container_node_pool.tidb_pool] + // Setup local SSD on TiKV nodes first (this can take some time) + // Create the monitor pool next because that is where tiller will be deployed to + depends_on = [google_container_node_pool.tikv_pool] project = var.GCP_PROJECT cluster = google_container_cluster.cluster.name location = google_container_cluster.cluster.location @@ -352,9 +355,10 @@ kubectl apply -k manifests/local-ssd kubectl apply -f manifests/gke/persistent-disk.yaml kubectl apply -f manifests/tiller-rbac.yaml -helm init --service-account tiller --upgrade --wait +helm init --service-account tiller --upgrade until helm ls; do echo "Wait until tiller is ready" + sleep 1 done helm upgrade --install tidb-operator --namespace tidb-admin ${path.module}/charts/tidb-operator --set operatorImage=${var.tidb_operator_registry}/tidb-operator:${var.tidb_operator_version} EOS @@ -371,8 +375,6 @@ resource "null_resource" "deploy-tidb-cluster" { null_resource.setup-env, local_file.tidb-cluster-values, google_container_node_pool.pd_pool, - google_container_node_pool.tikv_pool, - google_container_node_pool.tidb_pool, ] triggers = { From c82753e872b428e9f01b18897e9dc94794e5b560 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Tue, 18 Jun 2019 10:51:37 -0700 Subject: [PATCH 8/9] add a maintenance window --- deploy/gcp/main.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index 4ccffbec31..9b006d6e90 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -107,6 +107,12 @@ resource "google_container_cluster" "cluster" { lifecycle { ignore_changes = [master_auth] // see above linked issue } + + maintenance_policy { + daily_maintenance_window { + start_time = "01:00" + } + } } resource "google_container_node_pool" "pd_pool" { From 8d8f6c99eb6043f6599b4e2f730017f0be253024 Mon Sep 17 00:00:00 2001 From: Greg Weber Date: Tue, 18 Jun 2019 11:09:18 -0700 Subject: [PATCH 9/9] fix comment --- deploy/gcp/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index 9b006d6e90..bded06a24f 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -185,7 +185,7 @@ resource "google_container_node_pool" "tikv_pool" { } resource "google_container_node_pool" "tidb_pool" { - // The pool order is monitor -> tikv -> pd -> tidb + // The pool order is tikv -> monitor -> pd -> tidb depends_on = [google_container_node_pool.pd_pool] provider = google-beta project = var.GCP_PROJECT