Improve stability of system tests (#6486) #3914
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: System tests | |
on: | |
# Allow manually triggering this workflow | |
workflow_dispatch: | |
inputs: | |
test_name: | |
description: "Test to run" | |
default: "." | |
log_level: | |
description: "Log level" | |
default: "debug" | |
push: | |
branches: | |
- staging | |
- trying | |
env: | |
GCLOUD_KEY: ${{ secrets.GCLOUD_KEY }} | |
PROJECT_NAME: ${{ secrets.PROJECT_NAME }} | |
CLUSTER_NAME: ${{ secrets.CLUSTER_NAME }} | |
CLUSTER_ZONE: ${{ secrets.CLUSTER_ZONE }} | |
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} | |
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
ES_USER: ${{ secrets.ES_USER }} | |
ES_PASS: ${{ secrets.ES_PASS }} | |
MAIN_ES_IP: ${{ secrets.MAIN_ES_IP }} | |
TD_QUEUE_NAME: ${{ secrets.TD_QUEUE_NAME }} | |
TD_QUEUE_ZONE: ${{ secrets.TD_QUEUE_ZONE }} | |
DUMP_QUEUE_NAME: ${{ secrets.DUMP_QUEUE_NAME }} | |
DUMP_QUEUE_ZONE: ${{ secrets.DUMP_QUEUE_ZONE }} | |
CI_CLUSTER_NAME: ${{ secrets.CI_CLUSTER_NAME }} | |
CI_GCP_CREDENTIALS: ${{ secrets.CI_GCP_CREDENTIALS }} | |
CI_GCP_PROJECT_ID: ${{ secrets.CI_GCP_PROJECT_ID }} | |
CI_REGION_NAME: ${{ secrets.CI_REGION_NAME }} | |
USE_GKE_GCLOUD_AUTH_PLUGIN: True | |
concurrency: | |
group: ${{ github.base_ref == 'staging' && 'smci-staging' || format('smci-{0}-{1}', github.workflow, github.ref) }} | |
cancel-in-progress: ${{ github.base_ref == 'staging' && false || true }} | |
jobs: | |
filter-changes: | |
runs-on: ubuntu-22.04 | |
outputs: | |
nondocchanges: ${{ steps.filter.outputs.nondoc }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: dorny/paths-filter@v3 | |
id: filter | |
with: | |
filters: | | |
nondoc: | |
- '!**/*.md' | |
systest: | |
runs-on: ubuntu-22.04 | |
if: ${{ needs.filter-changes.outputs.nondocchanges == 'true' }} | |
needs: | |
- filter-changes | |
timeout-minutes: 70 | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
ssh-key: ${{ secrets.GH_ACTION_PRIVATE_KEY }} | |
- name: Setup kubectl | |
id: install | |
uses: azure/setup-kubectl@v4 | |
with: | |
version: "v1.27.16" | |
- name: Setup gcloud authentication | |
uses: google-github-actions/auth@v2 | |
with: | |
project_id: ${{ secrets.GCP_WI_PROJECT_ID }} | |
workload_identity_provider: ${{ secrets.GCP_WI_PROVIDER_SA }} | |
service_account: ${{ secrets.GCP_WI_SA }} | |
token_format: access_token | |
- name: Configure gcloud | |
uses: google-github-actions/setup-gcloud@v2 | |
with: | |
version: "469.0.0" | |
- name: Configure gke authentication plugin | |
run: gcloud components install gke-gcloud-auth-plugin --quiet | |
- name: Configure kubectl | |
run: gcloud container clusters get-credentials ${{ secrets.CI_CLUSTER_NAME }} --region ${{ secrets.CI_REGION_NAME }} --project ${{ secrets.CI_GCP_PROJECT_ID }} | |
- name: Create Node Pool | |
run: | | |
NODE_POOL_NAME="systemtest-${{ github.run_id }}" | |
gcloud container node-pools create $NODE_POOL_NAME \ | |
--cluster ${{ secrets.CI_CLUSTER_NAME }} \ | |
--num-nodes 2 \ | |
--preemptible \ | |
--location ${{ secrets.CI_REGION_NAME }} \ | |
--machine-type ${{ secrets.CI_NODE_MACHINE_TYPE }} \ | |
--disk-type pd-ssd \ | |
--disk-size 300GB \ | |
--image-type COS_CONTAINERD \ | |
--enable-autorepair \ | |
--no-enable-autoupgrade \ | |
--node-labels env=dev,cluster=${{ secrets.CI_CLUSTER_NAME }},pipeline-id=${{ github.run_id }} \ | |
--metadata disable-legacy-endpoints=true \ | |
--service-account ${{ secrets.CI_GKE_NODEPOOL_SA }} \ | |
--project ${{ secrets.CI_GCP_PROJECT_ID }} \ | |
--quiet | |
echo "Node pool created: $NODE_POOL_NAME" | |
- name: Login to Docker Hub | |
uses: docker/login-action@v3 | |
with: | |
username: ${{ secrets.DOCKER_USERNAME }} | |
password: ${{ secrets.DOCKER_PASSWORD }} | |
- uses: extractions/netrc@v2 | |
with: | |
machine: github.com | |
username: ${{ secrets.GH_ACTION_TOKEN_USER }} | |
password: ${{ secrets.GH_ACTION_TOKEN }} | |
if: vars.GOPRIVATE | |
- name: Push go-spacemesh build to docker hub | |
run: make dockerpush | |
- name: Push go-bootstrapper build to docker hub | |
run: make dockerpush-bs | |
- name: Get commit hash | |
id: vars | |
shell: bash | |
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT | |
- uses: extractions/netrc@v2 | |
with: | |
machine: github.com | |
username: ${{ secrets.GH_ACTION_TOKEN_USER }} | |
password: ${{ secrets.GH_ACTION_TOKEN }} | |
if: vars.GOPRIVATE | |
- name: Build tests docker image | |
run: make -C systest docker | |
- name: Push tests docker images | |
run: make -C systest push | |
- name: set up go | |
uses: actions/setup-go@v5 | |
with: | |
check-latest: true | |
go-version-file: "go.mod" | |
- name: Run tests | |
env: | |
test_id: systest-${{ steps.vars.outputs.sha_short }} | |
storage: premium-rwo=10Gi | |
node_selector: pipeline-id=${{ github.run_id }} | |
size: 20 | |
bootstrap: 4m | |
level: ${{ inputs.log_level }} | |
clusters: 4 | |
norbac: 1 | |
run: make -C systest run test_name=${{ inputs.test_name }} | |
- name: Delete pod | |
if: always() | |
env: | |
test_id: systest-${{ steps.vars.outputs.sha_short }} | |
run: make -C systest clean | |
- name: Delete Node Pool | |
if: always() | |
run: | | |
NODE_POOL_NAME="systemtest-${{ github.run_id }}" | |
gcloud container node-pools delete $NODE_POOL_NAME \ | |
--cluster ${{ secrets.CI_CLUSTER_NAME }} \ | |
--location ${{ secrets.CI_REGION_NAME }} \ | |
--project ${{ secrets.CI_GCP_PROJECT_ID }} \ | |
--quiet | |
echo "Node pool deleted: $NODE_POOL_NAME" | |
systest-status: | |
if: always() | |
needs: | |
- filter-changes | |
- systest | |
runs-on: ubuntu-22.04 | |
env: | |
# short-circuit success if no non-doc files were modified | |
status: ${{ (needs.filter-changes.outputs.nondocchanges == 'false' || needs.systest.result == 'success') && 'success' || 'failure' }} | |
steps: | |
# print a single, clean status update to slack | |
- uses: act10ns/slack@v2 | |
name: Slack notification | |
# skip if the secret is not accessible | |
if: env.SLACK_WEBHOOK_URL | |
with: | |
status: ${{ env.status }} | |
- name: Mark the job as succeeded | |
if: env.status == 'success' | |
run: exit 0 | |
- name: Mark the job as failed | |
if: env.status != 'success' | |
run: exit 1 |