Skip to content
This repository has been archived by the owner on Nov 24, 2023. It is now read-only.

Commit

Permalink
*: run chaos-mesh operations with GitHub action (#930)
Browse files Browse the repository at this point in the history
  • Loading branch information
csuzhangxc authored Sep 23, 2020
1 parent 4400218 commit 470e902
Show file tree
Hide file tree
Showing 24 changed files with 1,723 additions and 8 deletions.
265 changes: 265 additions & 0 deletions .github/workflows/chaos-mesh.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
name: chaos

# Controls when the action will run. Triggers the workflow on pull request
# events but only for the master and release-2.0 branch
on:
pull_request:
branches:
- master
- release-2.0

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "base"
base:
# The type of runner that the job will run on
runs-on: ubuntu-18.04

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Set up Go for building DM, now it's v1.13
- name: Set up Go 1.13
uses: actions/setup-go@v2
with:
go-version: 1.13
- name: Print Go version
run: go version

# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Check out code
uses: actions/checkout@v2

# Set up Kubernetes IN Docker
# - name: Set up kind cluster
# uses: helm/kind-action@v1.0.0
# with:
# cluster_name: dm-chaos
# Set up Kubernetes with K3s
- name: Set up K3s cluster
run: |
curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=v1.18.9+k3s1 sh -s - \
--write-kubeconfig-mode=644 \
"${k3s_disable_command:---disable}" metrics-server \
"${k3s_disable_command:---disable}" traefik \
--flannel-backend=none \
--docker
shell: bash
- name: Wait for coredns
run: |
kubectl rollout status --watch --timeout 300s deployment/coredns -n kube-system
shell: bash
env:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Export KUBECONFIG environment variable
run: echo "::set-env name=KUBECONFIG::/etc/rancher/k3s/k3s.yaml"
shell: bash
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
kubectl get sc
kubectl version
# Disable AppArmor for MySQL, see https://github.com/moby/moby/issues/7512#issuecomment-61787845
- name: Disable AppArmor for MySQL
run: |
sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/
sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld
- name: Build DM binary
run: make dm-master dm-worker dmctl chaos-case

# NOTE: we also copy config files into `bin` directory,
# so we only need to send `bin` as the context into docker daemon when building image.
- name: Build DM docker image
run: |
cp $GITHUB_WORKSPACE/chaos/cases/conf/source1.yaml $GITHUB_WORKSPACE/bin/source1.yaml
cp $GITHUB_WORKSPACE/chaos/cases/conf/source2.yaml $GITHUB_WORKSPACE/bin/source2.yaml
cp $GITHUB_WORKSPACE/chaos/cases/conf/task-single.yaml $GITHUB_WORKSPACE/bin/task-single.yaml
docker build -f $GITHUB_WORKSPACE/chaos/manifests/Dockerfile -t dm:chaos $GITHUB_WORKSPACE/bin
docker image list
# Load DM docker image into KIND, see https://kind.sigs.k8s.io/docs/user/quick-start/#loading-an-image-into-your-cluster
# - name: Load DM docker image into KIND
# run: |
# kind load docker-image dm:chaos --name dm-chaos

# Set up upstream MySQL instances
- name: Set up MySQL
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/mysql.yaml
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/mysql.yaml
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/mysql.yaml
- name: Wait for MySQL ready # kubectl wait --all not working
run: |
kubectl wait --for=condition=Ready pod/mysql-0 --timeout=120s || true
sleep 10
kubectl wait --for=condition=Ready pod/mysql-1 --timeout=120s || true
echo show pvc
kubectl get pvc -l app=mysql -o wide
echo show pv
kubectl get pv -o wide
echo show svc
kubectl get svc -l app=mysql -o wide
echo show sts
kubectl get sts -l app=mysql -o wide
echo show po
kubectl get po -l app=mysql -o wide
echo describe po
kubectl describe po -l app=mysql
echo describe pvc
kubectl describe pvc -l app=mysql
kubectl wait --for=condition=Ready pod/mysql-0 --timeout=0s
kubectl wait --for=condition=Ready pod/mysql-1 --timeout=0s
# Set up downstream TiDB instance (deploy a TiDB with mockTiKV, not a TidbCluster managed by TiDB-operator)
- name: Set up TiDB
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/tidb.yaml
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/tidb.yaml
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/tidb.yaml
- name: Wait for TiDB ready
run: |
kubectl wait --for=condition=Ready pod/tidb-0 --timeout=120s || true
echo show pvc
kubectl get pvc -l app=tidb -o wide
echo show pv
kubectl get pv -o wide
echo show svc
kubectl get svc -l app=tidb -o wide
echo show sts
kubectl get sts -l app=tidb -o wide
echo show po
kubectl get po -l app=tidb -o wide
echo describe po
kubectl describe po -l app=tidb
echo describe pvc
kubectl describe pvc -l app=tidb
kubectl wait --for=condition=Ready pod/tidb-0 --timeout=0s
- name: Set up DM-master
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/dm-master.yaml
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/dm-master.yaml
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/dm-master.yaml
# NOTE: even some DM-master instances are not ready, we still continue and let chaos test cases to check again.
- name: Wait for DM-master ready
run: |
sleep 10
kubectl wait --for=condition=Ready pod -l app=dm-master --all --timeout=120s || true
echo "<<<<< show pvc >>>>>"
kubectl get pvc -l app=dm-master -o wide
echo "<<<<< show pv >>>>>"
kubectl get pv -o wide
echo "<<<<< show svc >>>>>"
kubectl get svc -l app=dm-master -o wide
echo "<<<<< show sts >>>>>"
kubectl get sts -l app=dm-master -o wide
echo "<<<<< show po >>>>>"
kubectl get po -l app=dm-master -o wide
echo "<<<<< describe po >>>>>"
kubectl describe po -l app=dm-master
echo "<<<<< describe pvc >>>>>"
kubectl describe pvc -l app=dm-master
echo "<<<<< show current log for dm-master-0 >>>>>"
kubectl logs dm-master-0 || true
echo "<<<<< show previous log for dm-master-0 >>>>>"
kubectl logs dm-master-0 -p || true
echo "<<<<< show current log for dm-master-1 >>>>>"
kubectl logs dm-master-1 || true
echo "<<<<< show previous log for dm-master-1 >>>>>"
kubectl logs dm-master-1 -p || true
echo "<<<<< show current log for dm-master-2 >>>>>"
kubectl logs dm-master-2 || true
echo "<<<<< show previous log for dm-master-2 >>>>>"
kubectl logs dm-master-2 -p || true
- name: Set up DM-worker
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/dm-worker.yaml
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/dm-worker.yaml
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/dm-worker.yaml
# NOTE: even some DM-worker instances are not ready, we still continue and let chaos test cases to check again.
- name: Wait for DM-worker ready
run: |
sleep 10
kubectl wait --for=condition=Ready pod -l app=dm-worker --all --timeout=120s || true
echo "<<<<< show pvc >>>>>"
kubectl get pvc -l app=dm-worker -o wide
echo "<<<<< show pv >>>>>"
kubectl get pv -o wide
echo "<<<<< show svc >>>>>"
kubectl get svc -l app=dm-worker -o wide
echo "<<<<< show sts >>>>>"
kubectl get sts -l app=dm-worker -o wide
echo "<<<<< show po >>>>>"
kubectl get po -l app=dm-worker -o wide
echo "<<<<< describe po >>>>>"
kubectl describe po -l app=dm-worker
echo "<<<<< describe pvc >>>>>"
kubectl describe pvc -l app=dm-worker
echo "<<<<< show current log for dm-worker-0 >>>>>"
kubectl logs dm-worker-0 || true
echo "<<<<< show previous log for dm-worker-0 >>>>>"
kubectl logs dm-worker-0 -p || true
echo "<<<<< show current log for dm-worker-1 >>>>>"
kubectl logs dm-worker-1 || true
echo "<<<<< show previous log for worker-master-1 >>>>>"
kubectl logs dm-worker-1 -p || true
echo "<<<<< show current log for dm-worker-2 >>>>>"
kubectl logs dm-worker-2 || true
echo "<<<<< show previous log for dm-worker-2 >>>>>"
kubectl logs dm-worker-2 -p || true
# NOTE: we sleep a while when check members ready in cases before applying any chaos operations.
- name: Set up chaos test cases
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/cases.yaml
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/cases.yaml
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/cases.yaml
sleep 60
- name: Encode chaos-mesh action
run: |
echo "::set-env name=CFG_BASE64::$(base64 -w 0 $GITHUB_WORKSPACE/chaos/manifests/pod-failure-dm.yaml)"
- name: Run chaos mesh action
uses: chaos-mesh/chaos-mesh-action@master
env:
CFG_BASE64: ${{ env.CFG_BASE64 }}

# TODO: fail ASAP for test cases.
- name: Wait for chaos test case complete
run: |
kubectl wait --for=condition=complete job/chaos-test-case --timeout=21m
- name: Copy logs to hack permission
if: ${{ always() }}
run: |
mkdir ./logs
sudo cp -r -L /var/log/containers/. ./logs
sudo chown -R runner ./logs
- name: Upload logs
uses: actions/upload-artifact@v2
if: ${{ always() }}
with:
name: chaos-base-logs
path: |
./logs
!./logs/coredns-*
!./logs/local-path-provisioner-*
# send Slack notify if failed.
- name: Slack notification
if: ${{ failure() }}
uses: Ilshidur/action-slack@2.1.0
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_NOTIFY }}
with:
args: "chaos job failed, see https://github.com/pingcap/dm/actions/runs/{{ GITHUB_RUN_ID }}"

# Debug via SSH if previous steps failed
- name: Set up tmate session
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v2
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ else
endif

.PHONY: build retool_setup test unit_test dm_integration_test_build integration_test \
coverage check dm-worker dm-master dm-tracer dmctl debug-tools
coverage check dm-worker dm-master dm-tracer chaos-case dmctl debug-tools

build: check dm-worker dm-master dm-tracer dmctl dm-portal dm-syncer

Expand All @@ -62,6 +62,9 @@ dm-master:
dm-tracer:
$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/dm-tracer ./cmd/dm-tracer

chaos-case:
$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/chaos-case ./chaos/cases

dmctl:
$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/dmctl ./cmd/dm-ctl

Expand Down
7 changes: 7 additions & 0 deletions chaos/cases/conf/source1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source-id: "mysql-replica-01"

from:
host: "mysql-0.mysql" # same namespace with MySQL
user: "root"
password: ""
port: 3306
7 changes: 7 additions & 0 deletions chaos/cases/conf/source2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source-id: "mysql-replica-02"

from:
host: "mysql-1.mysql" # same namespace with MySQL
user: "root"
password: ""
port: 3306
21 changes: 21 additions & 0 deletions chaos/cases/conf/task-single.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
name: "task_single"
task-mode: all

target-database:
host: "tidb-0.tidb"
port: 4000
user: "root"
password: ""

mysql-instances:
-
source-id: "mysql-replica-01"
black-white-list: "instance"
mydumper-thread: 4
loader-thread: 16
syncer-thread: 16

black-white-list:
instance:
do-dbs: ["db_single"]
Loading

0 comments on commit 470e902

Please sign in to comment.