From f40fb16c736137cefd378ff1c72dba48f140074b Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Mon, 13 Feb 2017 09:46:23 -0800 Subject: [PATCH 1/6] Remove superfluous ignored errors --- ansible/roles/kraken.services/tasks/kill-services.yaml | 6 +++--- ansible/roles/kraken.services/tasks/main.yml | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ansible/roles/kraken.services/tasks/kill-services.yaml b/ansible/roles/kraken.services/tasks/kill-services.yaml index fa00d2116..f03c835f3 100644 --- a/ansible/roles/kraken.services/tasks/kill-services.yaml +++ b/ansible/roles/kraken.services/tasks/kill-services.yaml @@ -3,7 +3,7 @@ command: > kubectl --kubeconfig={{ kubeconfig }} get services --all-namespaces -o json register: added_services - when: kraken_action == 'down' + when: kraken_action == 'down' and kubeconfig|is_file ignore_errors: yes - name: Register services fact @@ -37,12 +37,12 @@ HELM_HOME: "{{ helm_home }}" with_items: "{{cluster_services}}" ignore_errors: yes - when: tiller_present|success + when: tiller_present|succeeded and not tiller_present|skipped - name: Clean up tiller if present command: > kubectl --kubeconfig={{ kubeconfig }} delete deployment {{ tiller }} --namespace=kube-system - when: tiller_present|success + when: tiller_present|succeeded and not tiller_present|skipped - name: Delete all service namespaces command: > diff --git a/ansible/roles/kraken.services/tasks/main.yml b/ansible/roles/kraken.services/tasks/main.yml index 265f7c8b3..f7f25e08f 100644 --- a/ansible/roles/kraken.services/tasks/main.yml +++ b/ansible/roles/kraken.services/tasks/main.yml @@ -21,6 +21,7 @@ shell: > kubectl --kubeconfig={{ kubeconfig }} get deployment {{ tiller }} --namespace=kube-system register: tiller_present + when: kubeconfig|is_file ignore_errors: yes - include: kill-services.yaml From 441c191e733bbc7fb15ac47a71e09b55406742dd Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Wed, 1 Feb 2017 10:05:52 -0800 Subject: [PATCH 2/6] Determine when tiller is ready Instead of a 60 second sleep, deterministically wait for tiller-deploy to be ready. --- ansible/roles/kraken.services/tasks/run-services.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ansible/roles/kraken.services/tasks/run-services.yaml b/ansible/roles/kraken.services/tasks/run-services.yaml index 419936c16..8bcc82c94 100644 --- a/ansible/roles/kraken.services/tasks/run-services.yaml +++ b/ansible/roles/kraken.services/tasks/run-services.yaml @@ -45,8 +45,13 @@ retries: 60 delay: 1 -- name: Give tiller rc a chance to fully init - pause: seconds=60 +- name: Wait for tiller to be ready + command: "kubectl --kubeconfig={{ kubeconfig | expanduser }} --namespace=kube-system get deploy tiller-deploy -o json" + register: output + until: ((output.stdout|from_json).status.availableReplicas|default(0)) > 0 + retries: 600 + delay: 1 + changed_when: false - name: Remove helm repositories command: > From 04521d8d4912ac321fa71989d6b0e85bf5bdfcb9 Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Thu, 2 Feb 2017 15:48:50 -0800 Subject: [PATCH 3/6] Wait for ELBs to delete after services are deleted Rather than waiting a solid 5 minutes regardless of need, wait up to 5 minutes for the ELBs to stop before moving on. --- .../kraken.services/tasks/kill-services.yaml | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/ansible/roles/kraken.services/tasks/kill-services.yaml b/ansible/roles/kraken.services/tasks/kill-services.yaml index f03c835f3..015e6d55f 100644 --- a/ansible/roles/kraken.services/tasks/kill-services.yaml +++ b/ansible/roles/kraken.services/tasks/kill-services.yaml @@ -18,17 +18,6 @@ when: kraken_action == 'down' ignore_errors: yes -- name: Clean up services - command: > - kubectl --kubeconfig={{ kubeconfig }} delete --namespace {{ item.metadata.namespace }} svc {{ item.metadata.name }} - with_items: "{{ the_services }}" - when: item.status.loadBalancer.ingress[0].hostname is defined and kraken_action == 'down' - ignore_errors: yes - -- name: Pauase to let services come down - pause: minutes=5 - when: kraken_action == 'down' - - name: Clean up releases command: > helm delete --purge {{ item.name }} @@ -44,9 +33,43 @@ kubectl --kubeconfig={{ kubeconfig }} delete deployment {{ tiller }} --namespace=kube-system when: tiller_present|succeeded and not tiller_present|skipped +- name: Clean up services + command: > + kubectl --kubeconfig={{ kubeconfig }} delete --namespace {{ item.metadata.namespace }} svc {{ item.metadata.name }} + with_items: "{{ the_services }}" + when: item.status.loadBalancer.ingress[0].hostname is defined and kraken_action == 'down' + ignore_errors: yes + - name: Delete all service namespaces command: > kubectl --kubeconfig={{ kubeconfig }} delete namespace {{ item }} with_items: "{{ cluster_namespaces }}" when: cluster_namespaces is defined ignore_errors: yes + +- name: Get vpc id + shell: "terraform state show -state={{ config_base | expanduser }}/{{kraken_config.cluster}}/terraform.tfstate module.vpc.aws_vpc.vpc | awk '/^id/{print $3}'" + register: vpcid + when: kraken_action == 'down' + changed_when: false + +- name: Set vpcid lookup string + set_fact: + vpc_lookup: "elbs[?vpc_id=='{{ vpcid.stdout }}']" + when: kraken_action == 'down' + changed_when: false + +- name: Wait for ELBs to be deleted + action: + module: ec2_elb_facts + region: "{{ kraken_config.providerConfig.region }}" + aws_access_key: "{{ kraken_config.providerConfig.authentication.accessKey or omit}}" + aws_secret_key: "{{ kraken_config.providerConfig.authentication.accessSecret or omit }}" + profile: "{{ kraken_config.providerConfig.authentication.credentialsProfile or omit }}" + register: elb_facts + vars: + vpc_lookup: "elbs[?vpc_id=='{{ vpcid.stdout }}']" + when: kraken_action == 'down' + until: "{{ elb_facts|json_query(vpc_lookup)|length }} <= 1" + retries: 600 + delay: 1 From 83b2091324b3695180bdcda1fcecc3d5593923f4 Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Thu, 9 Feb 2017 15:55:19 -0800 Subject: [PATCH 4/6] Wait for the api server again For some reason, the kubernetes api server is not listening when we try to `helm init` Wait for it to be listening. --- .../kraken.provider.aws/tasks/aws-action.yaml | 4 ++-- ansible/roles/kraken.readiness/tasks/do-wait.yaml | 12 ++++++++++-- .../roles/kraken.services/tasks/kill-services.yaml | 13 ++++++------- .../roles/kraken.services/tasks/run-services.yaml | 11 +++++++++++ 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ansible/roles/kraken.provider/kraken.provider.aws/tasks/aws-action.yaml b/ansible/roles/kraken.provider/kraken.provider.aws/tasks/aws-action.yaml index ef6d4af84..0854e20f6 100644 --- a/ansible/roles/kraken.provider/kraken.provider.aws/tasks/aws-action.yaml +++ b/ansible/roles/kraken.provider/kraken.provider.aws/tasks/aws-action.yaml @@ -26,7 +26,7 @@ - name: Set the kraken end point fact set_fact: kraken_endpoint: "{{ endpoint_result.stdout }}" - when: kraken_action == 'up' + when: endpoint_result|succeeded and not endpoint_result|skipped - name: Get kraken aws_route53_zone.private_zone.zone_id shell: > @@ -74,4 +74,4 @@ - route53_zone - aws_prefix - terraform.tfstate - - terraform.tfstate.backup \ No newline at end of file + - terraform.tfstate.backup diff --git a/ansible/roles/kraken.readiness/tasks/do-wait.yaml b/ansible/roles/kraken.readiness/tasks/do-wait.yaml index e041ce81a..4df64dc08 100644 --- a/ansible/roles/kraken.readiness/tasks/do-wait.yaml +++ b/ansible/roles/kraken.readiness/tasks/do-wait.yaml @@ -7,8 +7,16 @@ set_fact: wait_api_start_timestamp: "{{ lookup('pipe','date +%Y%m%d%H%M%S') }}" -- name: Wait for api server to become available - wait_for: host={{ kraken_endpoint }} port=443 timeout={{ readiness_wait }} +- name: Fetch k8s api server address + set_fact: + api_servers: "{{ lookup('file', kubeconfig)|from_yaml|json_query('clusters[*].cluster.server') }}" + +- name: Wait for api server to become available in case it's not + wait_for: + host: "{{ item|regex_replace('https://','') }}" + port: 443 + timeout: "{{ readiness_wait }}" + with_items: "{{ api_servers }}" - name: Get timestamp after api server wait set_fact: diff --git a/ansible/roles/kraken.services/tasks/kill-services.yaml b/ansible/roles/kraken.services/tasks/kill-services.yaml index 015e6d55f..973590693 100644 --- a/ansible/roles/kraken.services/tasks/kill-services.yaml +++ b/ansible/roles/kraken.services/tasks/kill-services.yaml @@ -49,15 +49,14 @@ - name: Get vpc id shell: "terraform state show -state={{ config_base | expanduser }}/{{kraken_config.cluster}}/terraform.tfstate module.vpc.aws_vpc.vpc | awk '/^id/{print $3}'" - register: vpcid + register: terraform_state_show when: kraken_action == 'down' changed_when: false -- name: Set vpcid lookup string +- name: Set vpc_id fact set_fact: - vpc_lookup: "elbs[?vpc_id=='{{ vpcid.stdout }}']" + vpcid: "{{ terraform_state_show.stdout }}" when: kraken_action == 'down' - changed_when: false - name: Wait for ELBs to be deleted action: @@ -68,8 +67,8 @@ profile: "{{ kraken_config.providerConfig.authentication.credentialsProfile or omit }}" register: elb_facts vars: - vpc_lookup: "elbs[?vpc_id=='{{ vpcid.stdout }}']" - when: kraken_action == 'down' - until: "{{ elb_facts|json_query(vpc_lookup)|length }} <= 1" + vpc_lookup: "elbs[?vpc_id=='{{ vpcid }}']" + when: kraken_action == 'down' and kraken_config.provider == 'aws' + until: (elb_facts is none) or (elb_facts|json_query(vpc_lookup) is none) or (elb_facts|json_query(vpc_lookup)|length <= 1) retries: 600 delay: 1 diff --git a/ansible/roles/kraken.services/tasks/run-services.yaml b/ansible/roles/kraken.services/tasks/run-services.yaml index 8bcc82c94..09e18eee0 100644 --- a/ansible/roles/kraken.services/tasks/run-services.yaml +++ b/ansible/roles/kraken.services/tasks/run-services.yaml @@ -25,6 +25,17 @@ helm_init_command: "{{helm_command}} init --tiller-image {{ tiller_image }}" when: (tiller_image is defined) and (tiller_image is not none) and (tiller_image|trim != '') +- name: Fetch k8s api server address + set_fact: + api_servers: "{{ lookup('file', kubeconfig)|from_yaml|json_query('clusters[*].cluster.server') }}" + +- name: Wait for api server to become available in case it's not + wait_for: + host: "{{ item|regex_replace('https://','') }}" + port: 443 + timeout: "{{ readiness_wait }}" + with_items: "{{ api_servers }}" + - name: Init helm dry-run command: > {{helm_init_command}} --dry-run From 850996c8c1a780b9753afd3798ae971827f3a2e4 Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Tue, 14 Feb 2017 16:10:43 -0800 Subject: [PATCH 5/6] set maximum wait time to 5 minutes --- ansible/roles/kraken.services/tasks/run-services.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/kraken.services/tasks/run-services.yaml b/ansible/roles/kraken.services/tasks/run-services.yaml index 09e18eee0..1c97a2a32 100644 --- a/ansible/roles/kraken.services/tasks/run-services.yaml +++ b/ansible/roles/kraken.services/tasks/run-services.yaml @@ -33,7 +33,7 @@ wait_for: host: "{{ item|regex_replace('https://','') }}" port: 443 - timeout: "{{ readiness_wait }}" + timeout: 600 with_items: "{{ api_servers }}" - name: Init helm dry-run From e6aed87259f13261eb19e97b89774595d0a02325 Mon Sep 17 00:00:00 2001 From: Joe Julian Date: Tue, 14 Feb 2017 16:40:53 -0800 Subject: [PATCH 6/6] make sure the api is listening when we need it --- .../kraken.fabric/kraken.fabric.flannel/defaults/main.yml | 1 + .../kraken.fabric/kraken.fabric.flannel/tasks/main.yml | 7 +++++++ ansible/roles/kraken.services/defaults/main.yml | 3 ++- ansible/roles/kraken.services/tasks/run-services.yaml | 4 ---- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ansible/roles/kraken.fabric/kraken.fabric.flannel/defaults/main.yml b/ansible/roles/kraken.fabric/kraken.fabric.flannel/defaults/main.yml index ed97d539c..b2f34e015 100644 --- a/ansible/roles/kraken.fabric/kraken.fabric.flannel/defaults/main.yml +++ b/ansible/roles/kraken.fabric/kraken.fabric.flannel/defaults/main.yml @@ -1 +1,2 @@ --- +api_servers: "{{ lookup('file', kubeconfig)|from_yaml|json_query('clusters[*].cluster.server') }}" diff --git a/ansible/roles/kraken.fabric/kraken.fabric.flannel/tasks/main.yml b/ansible/roles/kraken.fabric/kraken.fabric.flannel/tasks/main.yml index 89280bb30..ca20c957f 100644 --- a/ansible/roles/kraken.fabric/kraken.fabric.flannel/tasks/main.yml +++ b/ansible/roles/kraken.fabric/kraken.fabric.flannel/tasks/main.yml @@ -12,6 +12,13 @@ template: src=config.yaml.part.jinja2 dest="{{ config_base | expanduser }}/{{kraken_config.cluster}}/fabric/config.yaml" +- name: Wait for api server to become available in case it's not + wait_for: + host: "{{ item|regex_replace('https://','') }}" + port: 443 + timeout: 600 + with_items: "{{ api_servers }}" + - name: Ensure the kube-networking namespace exists command: > kubectl --kubeconfig={{ kubeconfig | expanduser }} create namespace kube-networking diff --git a/ansible/roles/kraken.services/defaults/main.yml b/ansible/roles/kraken.services/defaults/main.yml index 45381ac08..bf2a92c7a 100644 --- a/ansible/roles/kraken.services/defaults/main.yml +++ b/ansible/roles/kraken.services/defaults/main.yml @@ -2,4 +2,5 @@ kubeconfig: "{{ config_base | expanduser }}/{{kraken_config.cluster}}/admin.kubeconfig" helm_home: "{{ config_base | expanduser }}/{{kraken_config.cluster}}/.helm" tiller: tiller-deploy -tiller_image: \ No newline at end of file +tiller_image: +api_servers: "{{ lookup('file', kubeconfig)|from_yaml|json_query('clusters[*].cluster.server') }}" diff --git a/ansible/roles/kraken.services/tasks/run-services.yaml b/ansible/roles/kraken.services/tasks/run-services.yaml index 1c97a2a32..768227cc5 100644 --- a/ansible/roles/kraken.services/tasks/run-services.yaml +++ b/ansible/roles/kraken.services/tasks/run-services.yaml @@ -25,10 +25,6 @@ helm_init_command: "{{helm_command}} init --tiller-image {{ tiller_image }}" when: (tiller_image is defined) and (tiller_image is not none) and (tiller_image|trim != '') -- name: Fetch k8s api server address - set_fact: - api_servers: "{{ lookup('file', kubeconfig)|from_yaml|json_query('clusters[*].cluster.server') }}" - - name: Wait for api server to become available in case it's not wait_for: host: "{{ item|regex_replace('https://','') }}"