Add rollback window and history settings (#792) #504
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This is an e2e test to deploy PostHog on Google Cloud Platform using Helm. | |
# | |
# TODO: | |
# - run k8s spec test | |
# | |
name: e2e - Google Cloud Platform (install) | |
on: | |
workflow_dispatch: | |
# Please do not add 'pull_request' here as without the proper | |
# GitHub settings might lead 3rd party users to run commands | |
# into our cloud account for testing | |
push: | |
branches: | |
- main | |
env: | |
# This is needed until k8s v1.25 | |
# https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke | |
USE_GKE_GCLOUD_AUTH_PLUGIN: True | |
jobs: | |
gcp-install: | |
runs-on: ubuntu-20.04 | |
if: github.repository == 'PostHog/charts-clickhouse' | |
# | |
# These permissions are needed to interact with GitHub's OIDC Token endpoint. | |
# We use OpenID Connect (OIDC) to allow this GitHub Action to access and manage | |
# GCP resources without needing to store the GCP credentials as long-lived GitHub secrets. | |
# | |
# see: https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-google-cloud-platform | |
# | |
permissions: | |
id-token: write | |
contents: write | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Authenticate to Google Cloud | |
uses: google-github-actions/auth@v0 | |
with: | |
workload_identity_provider: 'projects/494532703488/locations/global/workloadIdentityPools/github/providers/github' | |
service_account: 'github@posthog-helm-chart-testing.iam.gserviceaccount.com' | |
access_token_lifetime: '7200s' | |
- name: Set up Google Cloud SDK | |
uses: google-github-actions/setup-gcloud@v0 | |
with: | |
install_components: 'gke-gcloud-auth-plugin' | |
- name: Install doctl to manage 'posthog.cc' DNS | |
uses: digitalocean/action-doctl@v2 | |
with: | |
token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }} | |
- name: Declare variables that we can share across steps | |
id: vars | |
run: | | |
TEST_NAME="helm-test-e2e-gcp-$(date '+%F')-$(git rev-parse --short HEAD)" | |
echo "k8s_cluster_name=${TEST_NAME}" >> $GITHUB_OUTPUT | |
echo "dns_record=${TEST_NAME}" >> $GITHUB_OUTPUT | |
echo "fqdn_record=${TEST_NAME}.posthog.cc" >> $GITHUB_OUTPUT | |
- name: Deploy a new k8s cluster | |
id: k8s_cluster_creation | |
run: | | |
gcloud container clusters create \ | |
${{ steps.vars.outputs.k8s_cluster_name }} \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--region us-central1 \ | |
--cluster-version 1.24 \ | |
--labels="provisioned_by=github_action" \ | |
--machine-type e2-standard-2 \ | |
--num-nodes 2 | |
# note: num-nodes will be created in each zone, such that if you specify | |
# --num-nodes=4 and choose two locations 8 nodes will be created. | |
- name: Create new GCP global static IP address | |
id: static_ip_creation | |
# note: we need to create the IP address first in order | |
# to get the load balancer successfully provisioned | |
run: | | |
gcloud compute addresses create \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--global \ | |
${{ steps.vars.outputs.dns_record }} | |
- name: Create the DNS record | |
id: dns_creation | |
run: | | |
# Get the global static IP address | |
global_static_ip=$(gcloud compute addresses list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=name:${{ steps.vars.outputs.dns_record }} --format="value(ADDRESS)") | |
# Create the DNS record | |
doctl compute domain records create \ | |
posthog.cc \ | |
--record-type A \ | |
--record-ttl 60 \ | |
--record-name "${{ steps.vars.outputs.dns_record }}" \ | |
--record-data "$global_static_ip" | |
- name: Install PostHog using the Helm chart | |
id: helm_install | |
run: | | |
helm upgrade --install \ | |
-f ci/values/google_cloud_platform.yaml \ | |
--set "ingress.hostname=${{ steps.vars.outputs.fqdn_record }}" \ | |
--set "ingress.gcp.ip_name=${{ steps.vars.outputs.dns_record }}" \ | |
--timeout 30m \ | |
--create-namespace \ | |
--namespace posthog \ | |
posthog ./charts/posthog \ | |
--wait-for-jobs \ | |
--wait | |
# | |
# Wait for all k8s resources to be ready. | |
# | |
# Despite the --wait flag used in the command above | |
# there is no guarantee that all the resources will be deployed | |
# when the command returns. | |
# | |
# | |
# Why can't we directly use the 'action-k8s-await-workloads' step below? | |
# Because it's not working for this use case | |
# | |
# ref: https://github.com/jupyterhub/action-k8s-await-workloads/issues/38 | |
# | |
- name: Workaround - wait for all the PostHog resources in k8s to be ready | |
timeout-minutes: 15 | |
run: ./ci/wait_for_all_the_posthog_resources_to_be_ready.sh | |
- name: Workaround - wait for the GCP load balancer to be ready | |
timeout-minutes: 15 | |
run: | | |
echo "Waiting for the GCP Load Balancer to be ready..." | |
load_balancer_external_ip="" | |
while [ -z "$load_balancer_external_ip" ]; | |
do | |
echo " sleeping 10 seconds" && sleep 10 | |
load_balancer_external_ip=$(kubectl get ingress -n posthog posthog -o jsonpath="{.status.loadBalancer.ingress[0].ip}") | |
done | |
echo "The GCP Load Balancer is now ready!" | |
- name: Wait until all the resources are fully deployed in k8s | |
uses: jupyterhub/action-k8s-await-workloads@main | |
with: | |
namespace: "posthog" | |
timeout: 300 | |
max-restarts: 10 | |
- name: Wait for the Google-managed TLS certificate to be issued and deployed | |
id: tls_certificate_creation | |
timeout-minutes: 60 | |
run: | | |
echo "Wait for the Google-managed TLS certificate to be issued and deployed..." | |
certificate_status="" | |
while [ "$certificate_status" != "Active" ]; | |
do | |
echo " sleeping 10 seconds" && sleep 10 | |
certificate_status=$(kubectl get managedcertificate -n posthog posthog-gke-cert -o jsonpath="{.status.certificateStatus}") | |
done | |
echo "The certificate has been issued and it has been deployed!" | |
- name: Setup PostHog for the ingestion test | |
run: ./ci/setup_ingestion_test.sh | |
- name: Set PostHog endpoints to use for the ingestion test | |
run: | | |
echo "POSTHOG_API_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV" | |
echo "POSTHOG_EVENT_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV" | |
# | |
# TODO: the GCE Ingress is not picking up health check from readiness probe definition and it's using '/' | |
# instead. We need to fix this issue before being able to enable the k6 ingestion test | |
# See WIP at https://github.com/PostHog/charts-clickhouse/pull/209 | |
# | |
# - name: Run ingestion test using k6 | |
# uses: k6io/action@v0.2.0 | |
# with: | |
# filename: ci/k6/ingestion-test.js | |
- name: Emit namespace report | |
uses: jupyterhub/action-k8s-namespace-report@v1 | |
if: always() | |
with: | |
namespace: posthog | |
# | |
# Due to what I consider as a bad design choice from Google, when a GKE cluster is deleted, | |
# standalone NEGs are not so we need to do the cleanup ourself. | |
# | |
# An additional problem is that we can't delete when they are in use but there's also no | |
# easy way to get the NEG associated to a specific GKE cluster using the 'gcloud' CLI. | |
# In order to do so, we need to fetch the NEG info via 'kubectl' in this step, delete | |
# the cluster in another and then, using the output from this step, finally delete the NEG. | |
# | |
- name: Fetch the associated NEG (Network Endpoint Groups) | |
id: fetch_neg | |
if: ${{ always() }} | |
shell: bash | |
run: | | |
# | |
# posthog-events | |
# | |
POSTHOG_EVENTS_NEG_INFO=$(kubectl get svc -n posthog posthog-events -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]') | |
POSTHOG_EVENTS_NEG_NAME=$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]') | |
IFS=" " read -r -a POSTHOG_EVENTS_NEG_ZONES <<< "$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')" | |
echo "posthog_events_neg_name=${POSTHOG_EVENTS_NEG_NAME}" | |
echo "posthog_events_neg_zones=${POSTHOG_EVENTS_NEG_ZONES[@]}" | |
# | |
# posthog-web | |
# | |
POSTHOG_WEB_NEG_INFO=$(kubectl get svc -n posthog posthog-web -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]') | |
POSTHOG_WEB_NEG_NAME=$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]') | |
IFS=" " read -r -a POSTHOG_WEB_NEG_ZONES <<< "$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')" | |
echo "posthog_web_neg_name=${POSTHOG_WEB_NEG_NAME}" | |
echo "posthog_web_neg_zones=${POSTHOG_WEB_NEG_ZONES[@]}" | |
- name: Delete the k8s cluster and all the associated resources | |
if: ${{ always() && steps.k8s_cluster_creation.outcome == 'success' }} | |
run: | | |
gcloud container clusters delete \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--region us-central1 \ | |
--quiet \ | |
${{ steps.vars.outputs.k8s_cluster_name }} | |
- name: Delete the associated NEG (Network Endpoint Groups) | |
if: ${{ always() && steps.fetch_neg.outcome == 'success' }} | |
shell: bash | |
run: | | |
delete_neg() { | |
local NEG_NAME="$1" | |
local NEG_ZONES | |
IFS=',' read -r -a NEG_ZONES <<< "$2" | |
for NEG_ZONE in "${NEG_ZONES[@]}" | |
do | |
gcloud compute network-endpoint-groups delete \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--zone "$NEG_ZONE" \ | |
--quiet \ | |
"$NEG_NAME" | |
done | |
} | |
delete_neg "${{ steps.vars.outputs.posthog_events_neg_name }}" "${{ steps.vars.outputs.posthog_events_neg_zones }}" | |
delete_neg "${{ steps.vars.outputs.posthog_web_neg_name }}" "${{ steps.vars.outputs.posthog_web_neg_zones }}" | |
- name: Delete the global static IP address | |
if: ${{ always() && steps.static_ip_creation.outcome == 'success' }} | |
run: | | |
gcloud compute addresses delete \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--global \ | |
--quiet \ | |
${{ steps.vars.outputs.dns_record }} | |
- name: Delete the DNS record | |
if: ${{ always() && steps.dns_creation.outcome == 'success' }} | |
run: | | |
DNS_RECORD_ID=$(doctl compute domain records list posthog.cc --no-header --format ID,Name | grep ${{ steps.vars.outputs.dns_record }} | awk '{print $1}') | |
doctl compute domain records delete \ | |
posthog.cc \ | |
--force \ | |
"$DNS_RECORD_ID" | |
- name: Delete the Google-managed TLS certificate | |
if: ${{ always() }} | |
run: | | |
TLS_CERTIFICATE_NAME=$(gcloud compute ssl-certificates list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=${{ steps.vars.outputs.dns_record }} --format="value(NAME)") | |
if [ -n "$TLS_CERTIFICATE_NAME" ]; | |
then | |
gcloud compute ssl-certificates delete \ | |
--project ${{ secrets.GCP_PROJECT_ID }} \ | |
--quiet \ | |
"$TLS_CERTIFICATE_NAME" | |
fi |