Skip to content

Commit

Permalink
fix scaling out and helm failure
Browse files Browse the repository at this point in the history
  • Loading branch information
tennix committed Apr 29, 2019
1 parent 34d64e1 commit acbf223
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 29 deletions.
15 changes: 9 additions & 6 deletions deploy/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* [awscli](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html) >= 1.16.73
* [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl) >= 1.11
* [helm](https://github.com/helm/helm/blob/master/docs/install.md#installing-the-helm-client) >= 2.9.0
* [jq](https://stedolan.github.io/jq/download/)
* [aws-iam-authenticator](https://github.com/kubernetes-sigs/aws-iam-authenticator#4-set-up-kubectl-to-use-authentication-tokens-provided-by-aws-iam-authenticator-for-kubernetes)

## Configure awscli
Expand Down Expand Up @@ -57,10 +58,14 @@ helm ls

To upgrade TiDB cluster, modify `tidb_version` variable to a higher version in variables.tf and run `terraform apply`.

> *Note*: The upgrading doesn't finish immediately. You can watch the upgrading process by `watch kubectl --kubeconfig credentials/kubeconfig_<cluster_name> get po -n tidb`
## Scale TiDB cluster

To scale TiDB cluster, modify `tikv_count` or `tidb_count` to your desired count, and then run `terraform apply`.

> *Note*: Currently, scaling in is not supported since we cannot determine which node to scale. Scaling out needs a few minutes to complete, you can watch the scaling out by `watch kubectl --kubeconfig credentials/kubeconfig_<cluster_name> get po -n tidb`
## Customize

By default, the terraform script will create a new VPC. You can use an existing VPC by setting `create_vpc` to `false` and specify your existing VPC id and subnet ids to `vpc_id` and `subnets` variables.
Expand All @@ -73,9 +78,7 @@ Currently, the instance type of TiDB cluster component is not configurable becau

## TODO

- [ ] auto-scaling group policy
- [ ] Allow create a minimal TiDB cluster

## Known issues

There is possibility the helm install release fails the first time, but running `terraform apply` again will install tidb-operator and tidb-cluster release successfully.
- [ ] Use [cluster autoscaler](https://github.com/kubernetes/autoscaler)
- [ ] Allow create a minimal TiDB cluster for testing
- [ ] Make the resource creation synchronously to follow Terraform convention
- [ ] Make more parameters customizable
49 changes: 37 additions & 12 deletions deploy/aws/data.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
data "aws_availability_zones" "available" {}

data "aws_ami" "amazon-linux-2" {
most_recent = true

owners = ["amazon"]

filter {
name = "name"
values = ["amzn2-ami-hvm-*-x86_64-gp2"]
}
}

data "template_file" "tidb_cluster_values" {
template = "${file("${path.module}/templates/tidb-cluster-values.yaml.tpl")}"
vars {
Expand All @@ -10,18 +21,32 @@ data "template_file" "tidb_cluster_values" {
}
}

data "kubernetes_service" "tidb" {
depends_on = ["helm_release.tidb-cluster"]
metadata {
name = "tidb-cluster-tidb"
namespace = "tidb"
}
# kubernetes provider can't use computed config_path right now, see issue:
# https://github.com/terraform-providers/terraform-provider-kubernetes/issues/142
# so we don't use kubernetes provider to retrieve tidb and monitor connection info,
# instead we use external data source.
# data "kubernetes_service" "tidb" {
# depends_on = ["helm_release.tidb-cluster"]
# metadata {
# name = "tidb-cluster-tidb"
# namespace = "tidb"
# }
# }

# data "kubernetes_service" "monitor" {
# depends_on = ["helm_release.tidb-cluster"]
# metadata {
# name = "tidb-cluster-grafana"
# namespace = "tidb"
# }
# }

data "external" "tidb_service" {
depends_on = ["null_resource.wait-tidb-ready"]
program = ["bash", "-c", "kubectl --kubeconfig credentials/kubeconfig_${var.cluster_name} get svc -n tidb tidb-cluster-tidb -ojson | jq '.status.loadBalancer.ingress[0]'"]
}

data "kubernetes_service" "monitor" {
depends_on = ["helm_release.tidb-cluster"]
metadata {
name = "tidb-cluster-grafana"
namespace = "tidb"
}
data "external" "monitor_service" {
depends_on = ["null_resource.wait-tidb-ready"]
program = ["bash", "-c", "kubectl --kubeconfig credentials/kubeconfig_${var.cluster_name} get svc -n tidb tidb-cluster-grafana -ojson | jq '.status.loadBalancer.ingress[0]'"]
}
48 changes: 39 additions & 9 deletions deploy/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ module "ec2" {
version = "1.21.0"
name = "${var.cluster_name}-bastion"
instance_count = "${var.create_bastion ? 1:0}"
ami = "${var.bastion_ami}"
ami = "${data.aws_ami.amazon-linux-2.id}"
instance_type = "${var.bastion_instance_type}"
key_name = "${module.key-pair.key_name}"
associate_public_ip_address = true
Expand All @@ -77,13 +77,15 @@ module "ec2" {
}

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "2.3.1"
# source = "terraform-aws-modules/eks/aws"
# version = "2.3.1"
# We can not use cluster autoscaler for pod with local PV due to the limitations listed here:
# https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#i-have-a-couple-of-pending-pods-but-there-was-no-scale-up
# so we scale out by updating auto-scaling-group desired_capacity directly via the patched version of aws eks module
source = "github.com/tennix/terraform-aws-eks?ref=v2.3.1-patch"
cluster_name = "${var.cluster_name}"
cluster_version = "${var.k8s_version}"
# The output config can not be used by kubernetes and helm provider directly
# so using local_file resource to force kubernetes and helm provider to rely on
# config_output_path = "credentials/"
config_output_path = "credentials/"
subnets = "${split(",", var.create_vpc ? join(",", module.vpc.private_subnets) : join(",", var.subnets))}"
vpc_id = "${var.create_vpc ? module.vpc.vpc_id : var.vpc_id}"

Expand Down Expand Up @@ -159,9 +161,13 @@ resource "local_file" "kubeconfig" {
filename = "${path.module}/credentials/kubeconfig_${var.cluster_name}"
}

provider "kubernetes" {
config_path = "${local_file.kubeconfig.filename}"
}
# kubernetes provider can't use computed config_path right now, see issue:
# https://github.com/terraform-providers/terraform-provider-kubernetes/issues/142
# so we don't use kubernetes provider to retrieve tidb and monitor connection info,
# instead we use external data source.
# provider "kubernetes" {
# config_path = "${local_file.kubeconfig.filename}"
# }

provider "helm" {
insecure = true
Expand Down Expand Up @@ -210,3 +216,27 @@ resource "helm_release" "tidb-cluster" {
"${data.template_file.tidb_cluster_values.rendered}"
]
}

resource "null_resource" "wait-tidb-ready" {
depends_on = ["helm_release.tidb-cluster"]

provisioner "local-exec" {
command = <<EOS
until kubectl get po -n tidb -lapp.kubernetes.io/component=tidb | grep Running; do
echo "Wait TiDB pod running"
sleep 5
done
until kubectl get svc -n tidb tidb-cluster-tidb | grep elb; do
echo "Wait TiDB service ready"
sleep 5
done
until kubectl get svc -n tidb tidb-cluster-grafana | grep elb; do
echo "Wait monitor service ready"
sleep 5
done
EOS
environment = {
KUBECONFIG = "${local_file.kubeconfig.filename}"
}
}
}
14 changes: 12 additions & 2 deletions deploy/aws/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,29 @@ output "eks_endpoint" {
value = "${module.eks.cluster_endpoint}"
}

#output "tidb_dns" {
# description = "tidb service dns name"
# value = "${data.kubernetes_service.tidb.load_balancer_ingress.0.hostname}"
#}

output "tidb_dns" {
description = "tidb service dns name"
value = "${data.kubernetes_service.tidb.load_balancer_ingress.0.hostname}"
value = "${data.external.tidb_service.result["hostname"]}"
}

output "tidb_port" {
description = "tidb service port"
value = "4000"
}

#output "monitor_endpoint" {
# description = "monitor service endpoint"
# value = "http://${data.kubernetes_service.monitor.load_balancer_ingress.0.hostname}:3000"
#}

output "monitor_endpoint" {
description = "monitor service endpoint"
value = "http://${data.kubernetes_service.monitor.load_balancer_ingress.0.hostname}:3000"
value = "http://${data.external.monitor_service.result["hostname"]}:3000"
}

output "bastion_ip" {
Expand Down

0 comments on commit acbf223

Please sign in to comment.