Skip to content

Improve stability of system tests (#6486) #3914

Improve stability of system tests (#6486)

Improve stability of system tests (#6486) #3914

Workflow file for this run

name: System tests
on:
# Allow manually triggering this workflow
workflow_dispatch:
inputs:
test_name:
description: "Test to run"
default: "."
log_level:
description: "Log level"
default: "debug"
push:
branches:
- staging
- trying
env:
GCLOUD_KEY: ${{ secrets.GCLOUD_KEY }}
PROJECT_NAME: ${{ secrets.PROJECT_NAME }}
CLUSTER_NAME: ${{ secrets.CLUSTER_NAME }}
CLUSTER_ZONE: ${{ secrets.CLUSTER_ZONE }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
ES_USER: ${{ secrets.ES_USER }}
ES_PASS: ${{ secrets.ES_PASS }}
MAIN_ES_IP: ${{ secrets.MAIN_ES_IP }}
TD_QUEUE_NAME: ${{ secrets.TD_QUEUE_NAME }}
TD_QUEUE_ZONE: ${{ secrets.TD_QUEUE_ZONE }}
DUMP_QUEUE_NAME: ${{ secrets.DUMP_QUEUE_NAME }}
DUMP_QUEUE_ZONE: ${{ secrets.DUMP_QUEUE_ZONE }}
CI_CLUSTER_NAME: ${{ secrets.CI_CLUSTER_NAME }}
CI_GCP_CREDENTIALS: ${{ secrets.CI_GCP_CREDENTIALS }}
CI_GCP_PROJECT_ID: ${{ secrets.CI_GCP_PROJECT_ID }}
CI_REGION_NAME: ${{ secrets.CI_REGION_NAME }}
USE_GKE_GCLOUD_AUTH_PLUGIN: True
concurrency:
group: ${{ github.base_ref == 'staging' && 'smci-staging' || format('smci-{0}-{1}', github.workflow, github.ref) }}
cancel-in-progress: ${{ github.base_ref == 'staging' && false || true }}
jobs:
filter-changes:
runs-on: ubuntu-22.04
outputs:
nondocchanges: ${{ steps.filter.outputs.nondoc }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
nondoc:
- '!**/*.md'
systest:
runs-on: ubuntu-22.04
if: ${{ needs.filter-changes.outputs.nondocchanges == 'true' }}
needs:
- filter-changes
timeout-minutes: 70
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v4
with:
ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }}
- name: Setup kubectl
id: install
uses: azure/setup-kubectl@v4
with:
version: "v1.27.16"
- name: Setup gcloud authentication
uses: google-github-actions/auth@v2
with:
project_id: ${{ secrets.GCP_WI_PROJECT_ID }}
workload_identity_provider: ${{ secrets.GCP_WI_PROVIDER_SA }}
service_account: ${{ secrets.GCP_WI_SA }}
token_format: access_token
- name: Configure gcloud
uses: google-github-actions/setup-gcloud@v2
with:
version: "469.0.0"
- name: Configure gke authentication plugin
run: gcloud components install gke-gcloud-auth-plugin --quiet
- name: Configure kubectl
run: gcloud container clusters get-credentials ${{ secrets.CI_CLUSTER_NAME }} --region ${{ secrets.CI_REGION_NAME }} --project ${{ secrets.CI_GCP_PROJECT_ID }}
- name: Create Node Pool
run: |
NODE_POOL_NAME="systemtest-${{ github.run_id }}"
gcloud container node-pools create $NODE_POOL_NAME \
--cluster ${{ secrets.CI_CLUSTER_NAME }} \
--num-nodes 2 \
--preemptible \
--location ${{ secrets.CI_REGION_NAME }} \
--machine-type ${{ secrets.CI_NODE_MACHINE_TYPE }} \
--disk-type pd-ssd \
--disk-size 300GB \
--image-type COS_CONTAINERD \
--enable-autorepair \
--no-enable-autoupgrade \
--node-labels env=dev,cluster=${{ secrets.CI_CLUSTER_NAME }},pipeline-id=${{ github.run_id }} \
--metadata disable-legacy-endpoints=true \
--service-account ${{ secrets.CI_GKE_NODEPOOL_SA }} \
--project ${{ secrets.CI_GCP_PROJECT_ID }} \
--quiet
echo "Node pool created: $NODE_POOL_NAME"
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- uses: extractions/netrc@v2
with:
machine: github.com
username: ${{ secrets.GH_ACTION_TOKEN_USER }}
password: ${{ secrets.GH_ACTION_TOKEN }}
if: vars.GOPRIVATE
- name: Push go-spacemesh build to docker hub
run: make dockerpush
- name: Push go-bootstrapper build to docker hub
run: make dockerpush-bs
- name: Get commit hash
id: vars
shell: bash
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- uses: extractions/netrc@v2
with:
machine: github.com
username: ${{ secrets.GH_ACTION_TOKEN_USER }}
password: ${{ secrets.GH_ACTION_TOKEN }}
if: vars.GOPRIVATE
- name: Build tests docker image
run: make -C systest docker
- name: Push tests docker images
run: make -C systest push
- name: set up go
uses: actions/setup-go@v5
with:
check-latest: true
go-version-file: "go.mod"
- name: Run tests
env:
test_id: systest-${{ steps.vars.outputs.sha_short }}
storage: premium-rwo=10Gi
node_selector: pipeline-id=${{ github.run_id }}
size: 20
bootstrap: 4m
level: ${{ inputs.log_level }}
clusters: 4
norbac: 1
run: make -C systest run test_name=${{ inputs.test_name }}
- name: Delete pod
if: always()
env:
test_id: systest-${{ steps.vars.outputs.sha_short }}
run: make -C systest clean
- name: Delete Node Pool
if: always()
run: |
NODE_POOL_NAME="systemtest-${{ github.run_id }}"
gcloud container node-pools delete $NODE_POOL_NAME \
--cluster ${{ secrets.CI_CLUSTER_NAME }} \
--location ${{ secrets.CI_REGION_NAME }} \
--project ${{ secrets.CI_GCP_PROJECT_ID }} \
--quiet
echo "Node pool deleted: $NODE_POOL_NAME"
systest-status:
if: always()
needs:
- filter-changes
- systest
runs-on: ubuntu-22.04
env:
# short-circuit success if no non-doc files were modified
status: ${{ (needs.filter-changes.outputs.nondocchanges == 'false' || needs.systest.result == 'success') && 'success' || 'failure' }}
steps:
# print a single, clean status update to slack
- uses: act10ns/slack@v2
name: Slack notification
# skip if the secret is not accessible
if: env.SLACK_WEBHOOK_URL
with:
status: ${{ env.status }}
- name: Mark the job as succeeded
if: env.status == 'success'
run: exit 0
- name: Mark the job as failed
if: env.status != 'success'
run: exit 1