Skip to content

Add rollback window and history settings (#792) #504

Add rollback window and history settings (#792)

Add rollback window and history settings (#792) #504

#
# This is an e2e test to deploy PostHog on Google Cloud Platform using Helm.
#
# TODO:
# - run k8s spec test
#
name: e2e - Google Cloud Platform (install)
on:
workflow_dispatch:
# Please do not add 'pull_request' here as without the proper
# GitHub settings might lead 3rd party users to run commands
# into our cloud account for testing
push:
branches:
- main
env:
# This is needed until k8s v1.25
# https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
gcp-install:
runs-on: ubuntu-20.04
if: github.repository == 'PostHog/charts-clickhouse'
#
# These permissions are needed to interact with GitHub's OIDC Token endpoint.
# We use OpenID Connect (OIDC) to allow this GitHub Action to access and manage
# GCP resources without needing to store the GCP credentials as long-lived GitHub secrets.
#
# see: https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-google-cloud-platform
#
permissions:
id-token: write
contents: write
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v0
with:
workload_identity_provider: 'projects/494532703488/locations/global/workloadIdentityPools/github/providers/github'
service_account: 'github@posthog-helm-chart-testing.iam.gserviceaccount.com'
access_token_lifetime: '7200s'
- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v0
with:
install_components: 'gke-gcloud-auth-plugin'
- name: Install doctl to manage 'posthog.cc' DNS
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }}
- name: Declare variables that we can share across steps
id: vars
run: |
TEST_NAME="helm-test-e2e-gcp-$(date '+%F')-$(git rev-parse --short HEAD)"
echo "k8s_cluster_name=${TEST_NAME}" >> $GITHUB_OUTPUT
echo "dns_record=${TEST_NAME}" >> $GITHUB_OUTPUT
echo "fqdn_record=${TEST_NAME}.posthog.cc" >> $GITHUB_OUTPUT
- name: Deploy a new k8s cluster
id: k8s_cluster_creation
run: |
gcloud container clusters create \
${{ steps.vars.outputs.k8s_cluster_name }} \
--project ${{ secrets.GCP_PROJECT_ID }} \
--region us-central1 \
--cluster-version 1.24 \
--labels="provisioned_by=github_action" \
--machine-type e2-standard-2 \
--num-nodes 2
# note: num-nodes will be created in each zone, such that if you specify
# --num-nodes=4 and choose two locations 8 nodes will be created.
- name: Create new GCP global static IP address
id: static_ip_creation
# note: we need to create the IP address first in order
# to get the load balancer successfully provisioned
run: |
gcloud compute addresses create \
--project ${{ secrets.GCP_PROJECT_ID }} \
--global \
${{ steps.vars.outputs.dns_record }}
- name: Create the DNS record
id: dns_creation
run: |
# Get the global static IP address
global_static_ip=$(gcloud compute addresses list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=name:${{ steps.vars.outputs.dns_record }} --format="value(ADDRESS)")
# Create the DNS record
doctl compute domain records create \
posthog.cc \
--record-type A \
--record-ttl 60 \
--record-name "${{ steps.vars.outputs.dns_record }}" \
--record-data "$global_static_ip"
- name: Install PostHog using the Helm chart
id: helm_install
run: |
helm upgrade --install \
-f ci/values/google_cloud_platform.yaml \
--set "ingress.hostname=${{ steps.vars.outputs.fqdn_record }}" \
--set "ingress.gcp.ip_name=${{ steps.vars.outputs.dns_record }}" \
--timeout 30m \
--create-namespace \
--namespace posthog \
posthog ./charts/posthog \
--wait-for-jobs \
--wait
#
# Wait for all k8s resources to be ready.
#
# Despite the --wait flag used in the command above
# there is no guarantee that all the resources will be deployed
# when the command returns.
#
#
# Why can't we directly use the 'action-k8s-await-workloads' step below?
# Because it's not working for this use case
#
# ref: https://github.com/jupyterhub/action-k8s-await-workloads/issues/38
#
- name: Workaround - wait for all the PostHog resources in k8s to be ready
timeout-minutes: 15
run: ./ci/wait_for_all_the_posthog_resources_to_be_ready.sh
- name: Workaround - wait for the GCP load balancer to be ready
timeout-minutes: 15
run: |
echo "Waiting for the GCP Load Balancer to be ready..."
load_balancer_external_ip=""
while [ -z "$load_balancer_external_ip" ];
do
echo " sleeping 10 seconds" && sleep 10
load_balancer_external_ip=$(kubectl get ingress -n posthog posthog -o jsonpath="{.status.loadBalancer.ingress[0].ip}")
done
echo "The GCP Load Balancer is now ready!"
- name: Wait until all the resources are fully deployed in k8s
uses: jupyterhub/action-k8s-await-workloads@main
with:
namespace: "posthog"
timeout: 300
max-restarts: 10
- name: Wait for the Google-managed TLS certificate to be issued and deployed
id: tls_certificate_creation
timeout-minutes: 60
run: |
echo "Wait for the Google-managed TLS certificate to be issued and deployed..."
certificate_status=""
while [ "$certificate_status" != "Active" ];
do
echo " sleeping 10 seconds" && sleep 10
certificate_status=$(kubectl get managedcertificate -n posthog posthog-gke-cert -o jsonpath="{.status.certificateStatus}")
done
echo "The certificate has been issued and it has been deployed!"
- name: Setup PostHog for the ingestion test
run: ./ci/setup_ingestion_test.sh
- name: Set PostHog endpoints to use for the ingestion test
run: |
echo "POSTHOG_API_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV"
echo "POSTHOG_EVENT_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV"
#
# TODO: the GCE Ingress is not picking up health check from readiness probe definition and it's using '/'
# instead. We need to fix this issue before being able to enable the k6 ingestion test
# See WIP at https://github.com/PostHog/charts-clickhouse/pull/209
#
# - name: Run ingestion test using k6
# uses: k6io/action@v0.2.0
# with:
# filename: ci/k6/ingestion-test.js
- name: Emit namespace report
uses: jupyterhub/action-k8s-namespace-report@v1
if: always()
with:
namespace: posthog
#
# Due to what I consider as a bad design choice from Google, when a GKE cluster is deleted,
# standalone NEGs are not so we need to do the cleanup ourself.
#
# An additional problem is that we can't delete when they are in use but there's also no
# easy way to get the NEG associated to a specific GKE cluster using the 'gcloud' CLI.
# In order to do so, we need to fetch the NEG info via 'kubectl' in this step, delete
# the cluster in another and then, using the output from this step, finally delete the NEG.
#
- name: Fetch the associated NEG (Network Endpoint Groups)
id: fetch_neg
if: ${{ always() }}
shell: bash
run: |
#
# posthog-events
#
POSTHOG_EVENTS_NEG_INFO=$(kubectl get svc -n posthog posthog-events -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]')
POSTHOG_EVENTS_NEG_NAME=$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]')
IFS=" " read -r -a POSTHOG_EVENTS_NEG_ZONES <<< "$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')"
echo "posthog_events_neg_name=${POSTHOG_EVENTS_NEG_NAME}"
echo "posthog_events_neg_zones=${POSTHOG_EVENTS_NEG_ZONES[@]}"
#
# posthog-web
#
POSTHOG_WEB_NEG_INFO=$(kubectl get svc -n posthog posthog-web -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]')
POSTHOG_WEB_NEG_NAME=$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]')
IFS=" " read -r -a POSTHOG_WEB_NEG_ZONES <<< "$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')"
echo "posthog_web_neg_name=${POSTHOG_WEB_NEG_NAME}"
echo "posthog_web_neg_zones=${POSTHOG_WEB_NEG_ZONES[@]}"
- name: Delete the k8s cluster and all the associated resources
if: ${{ always() && steps.k8s_cluster_creation.outcome == 'success' }}
run: |
gcloud container clusters delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--region us-central1 \
--quiet \
${{ steps.vars.outputs.k8s_cluster_name }}
- name: Delete the associated NEG (Network Endpoint Groups)
if: ${{ always() && steps.fetch_neg.outcome == 'success' }}
shell: bash
run: |
delete_neg() {
local NEG_NAME="$1"
local NEG_ZONES
IFS=',' read -r -a NEG_ZONES <<< "$2"
for NEG_ZONE in "${NEG_ZONES[@]}"
do
gcloud compute network-endpoint-groups delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--zone "$NEG_ZONE" \
--quiet \
"$NEG_NAME"
done
}
delete_neg "${{ steps.vars.outputs.posthog_events_neg_name }}" "${{ steps.vars.outputs.posthog_events_neg_zones }}"
delete_neg "${{ steps.vars.outputs.posthog_web_neg_name }}" "${{ steps.vars.outputs.posthog_web_neg_zones }}"
- name: Delete the global static IP address
if: ${{ always() && steps.static_ip_creation.outcome == 'success' }}
run: |
gcloud compute addresses delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--global \
--quiet \
${{ steps.vars.outputs.dns_record }}
- name: Delete the DNS record
if: ${{ always() && steps.dns_creation.outcome == 'success' }}
run: |
DNS_RECORD_ID=$(doctl compute domain records list posthog.cc --no-header --format ID,Name | grep ${{ steps.vars.outputs.dns_record }} | awk '{print $1}')
doctl compute domain records delete \
posthog.cc \
--force \
"$DNS_RECORD_ID"
- name: Delete the Google-managed TLS certificate
if: ${{ always() }}
run: |
TLS_CERTIFICATE_NAME=$(gcloud compute ssl-certificates list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=${{ steps.vars.outputs.dns_record }} --format="value(NAME)")
if [ -n "$TLS_CERTIFICATE_NAME" ];
then
gcloud compute ssl-certificates delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--quiet \
"$TLS_CERTIFICATE_NAME"
fi