From 1898ec3f7834ec679964e51b4589ef85519a6802 Mon Sep 17 00:00:00 2001 From: Rintaro Okamura Date: Tue, 22 Dec 2020 10:28:03 +0900 Subject: [PATCH] :construction_worker: Add E2E Chaos tests running on Actions Signed-off-by: Rintaro Okamura --- .github/helm/values/values-chaos.yaml | 124 +++++ .github/workflows/e2e-chaos.yaml | 486 ++++++++++++++++++ tests/chaos/chart/.helmignore | 23 + tests/chaos/chart/Chart.yaml | 22 + tests/chaos/chart/templates/NOTES.txt | 0 tests/chaos/chart/templates/_helpers.tpl | 51 ++ .../chart/templates/network/partition.yaml | 30 ++ tests/chaos/chart/templates/pod/failure.yaml | 31 ++ tests/chaos/chart/templates/pod/kill.yaml | 30 ++ tests/chaos/chart/values.yaml | 51 ++ 10 files changed, 848 insertions(+) create mode 100644 .github/helm/values/values-chaos.yaml create mode 100644 .github/workflows/e2e-chaos.yaml create mode 100644 tests/chaos/chart/.helmignore create mode 100644 tests/chaos/chart/Chart.yaml create mode 100644 tests/chaos/chart/templates/NOTES.txt create mode 100644 tests/chaos/chart/templates/_helpers.tpl create mode 100644 tests/chaos/chart/templates/network/partition.yaml create mode 100644 tests/chaos/chart/templates/pod/failure.yaml create mode 100644 tests/chaos/chart/templates/pod/kill.yaml create mode 100644 tests/chaos/chart/values.yaml diff --git a/.github/helm/values/values-chaos.yaml b/.github/helm/values/values-chaos.yaml new file mode 100644 index 00000000000..a4299d730a8 --- /dev/null +++ b/.github/helm/values/values-chaos.yaml @@ -0,0 +1,124 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +gateway: + minReplicas: 3 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + gateway_config: + index_replica: 3 + +agent: + minReplicas: 3 + maxReplicas: 10 + podManagementPolicy: Parallel + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + ngt: + auto_index_duration_limit: 3m + auto_index_check_duration: 1m + auto_index_length: 1000 + dimension: 784 + +discoverer: + minReplicas: 3 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + +compressor: + minReplicas: 3 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + compress: + compress_algorithm: gob + +backupManager: + minReplicas: 3 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 30Mi + image: + repository: vdaas/vald-manager-backup-cassandra + initContainers: + - type: wait-for-cassandra + name: wait-for-scylla + image: cassandra:latest + cassandra: + hosts: + - scylla-0.scylla.default.svc.cluster.local + sleepDuration: 2 + env: [] + mysql: + enabled: false + cassandra: + enabled: true + config: + hosts: + - scylla-0.scylla.default.svc.cluster.local + consistency: one + +indexManager: + replicas: 3 + resources: + requests: + cpu: 100m + memory: 30Mi + +meta: + minReplicas: 3 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 30Mi + image: + repository: vdaas/vald-meta-cassandra + initContainers: + - type: wait-for-cassandra + name: wait-for-scylla + image: cassandra:latest + cassandra: + hosts: + - scylla-0.scylla.default.svc.cluster.local + sleepDuration: 2 + env: [] + redis: + enabled: false + cassandra: + enabled: true + config: + hosts: + - scylla-0.scylla.default.svc.cluster.local + consistency: one diff --git a/.github/workflows/e2e-chaos.yaml b/.github/workflows/e2e-chaos.yaml new file mode 100644 index 00000000000..fdf84e7a447 --- /dev/null +++ b/.github/workflows/e2e-chaos.yaml @@ -0,0 +1,486 @@ +name: "Run E2E chaos test" +on: + push: + tags: + - "*.*.*" + - "v*.*.*" + - "*.*.*-*" + - "v*.*.*-*" + pull_request: + types: + - "labeled" + +jobs: + agent-failure: + name: "E2E chaos test (Agent failure)" + runs-on: ubuntu-latest + timeout-minutes: 45 + if: startsWith( github.ref, 'refs/tags/') || github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + steps: + - uses: actions/checkout@v2 + - name: wait for dockers + if: startsWith( github.ref, 'refs/tags/') + run: | + tag=$(cat versions/VALD_VERSION) + for image in \ + vdaas/vald-agent-ngt \ + vdaas/vald-discoverer-k8s \ + vdaas/vald-manager-compressor \ + vdaas/vald-meta-cassandra \ + vdaas/vald-manager-backup-cassandra \ + vdaas/vald-gateway \ + vdaas/vald-manager-index + do + echo "searching ${image}:${tag}" + until curl -s "https://registry.hub.docker.com/v2/repositories/${image}/tags/${tag}" | jq '.name' | grep -v "null"; do + echo "waiting for ${image}:${tag} to be uploaded..." + sleep 2 + done + done + - name: Specify container versions + if: github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + run: | + pr_num=`cat $GITHUB_EVENT_PATH | jq -r ".number"` + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-agent-ngt/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set agent.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-discoverer-k8s/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set discoverer.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-compressor/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set compressor.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-meta-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set meta.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-backup-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set backupManager.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-gateway/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set gateway.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-index/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set indexManager.image.tag=pr-${pr_num}" + fi + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" >> $GITHUB_ENV + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libhdf5-dev + - name: Fetch Helm version + run: | + HELM_VERSION=`make version/helm` + echo "::set-output name=helm::${HELM_VERSION}" + id: version + - uses: rinx/setup-k3d@v0.0.2 + with: + version: latest + name: vald + agents: 3 + - name: check k3d + run: | + kubectl cluster-info + - uses: azure/setup-helm@v1 + with: + version: ${{ steps.version.outputs.helm }} + - name: Helm version + run: | + helm version + - name: deploy vald + run: | + make k8s/external/scylla/deploy + jq_query='.items[] | select( ([ .status.conditions[] | select( .type == "Ready" and .status == "True" ) ] | length ) == 1) | .metadata.namespace + "/" + .metadata.name' + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 1 ] + do + echo "waiting for databases to be ready..." + kubectl get pods + sleep 2 + done + helm repo add vald https://vald.vdaas.org/charts + tag=$(cat versions/VALD_VERSION) + helm install \ + --values .github/helm/values/values-chaos.yaml \ + --set defaults.image.tag=${tag} \ + ${HELM_EXTRA_OPTIONS} \ + vald-cluster vald/vald + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 22 ] + do + echo "waiting for Vald to be ready..." + kubectl get pods + sleep 2 + done + kubectl get pods + - name: run Insert job + run: | + make hack/benchmark/assets/dataset/${DATASET} + podname=`kubectl get pods --selector=app=vald-gateway | tail -1 | awk '{print $1}'` + go test \ + -v tests/e2e/crud_test.go \ + -tags "e2e" \ + -run "TestE2EInsert" \ + -timeout 15m \ + -host=localhost \ + -port=8081 \ + -dataset=`pwd`/hack/benchmark/assets/dataset/${DATASET} \ + -insert-num=1000 \ + -wait-after-insert=2m \ + -portforward \ + -portforward-ns=default \ + -portforward-pod-name=${podname} \ + -portforward-pod-port=8081 \ + -kubeconfig=${KUBECONFIG} + env: + DATASET: fashion-mnist-784-euclidean.hdf5 + - name: deploy Chaos Mesh + run: | + helm repo add chaos-mesh https://charts.chaos-mesh.org + curl -sSL https://mirrors.chaos-mesh.org/v1.0.3/crd.yaml | kubectl apply -f - + kubectl create ns chaos-testing + helm install \ + --namespace=chaos-testing \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/k3s/containerd/containerd.sock \ + --set webhook.certManager.enabled=true \ + chaos-mesh chaos-mesh/chaos-mesh + helm install \ + --set podChaos.failure.enabled=true \ + vald-chaos-test tests/chaos/chart + - name: run Insert and Search jobs + run: | + make hack/benchmark/assets/dataset/${DATASET} + podname=`kubectl get pods --selector=app=vald-gateway | tail -1 | awk '{print $1}'` + go test \ + -v tests/e2e/crud_test.go \ + -tags "e2e" \ + -run "TestE2EInsert|TestE2ESearch" \ + -timeout 15m \ + -host=localhost \ + -port=8081 \ + -dataset=`pwd`/hack/benchmark/assets/dataset/${DATASET} \ + -insert-from=1000 \ + -insert-num=10000 \ + -search-num=10000 \ + -wait-after-insert=2m \ + -portforward \ + -portforward-ns=default \ + -portforward-pod-name=${podname} \ + -portforward-pod-port=8081 \ + -kubeconfig=${KUBECONFIG} + env: + DATASET: fashion-mnist-784-euclidean.hdf5 + random-pod-failure: + name: "E2E chaos test (random Pod failure)" + runs-on: ubuntu-latest + timeout-minutes: 45 + if: startsWith( github.ref, 'refs/tags/') || github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + steps: + - uses: actions/checkout@v2 + - name: wait for dockers + if: startsWith( github.ref, 'refs/tags/') + run: | + tag=$(cat versions/VALD_VERSION) + for image in \ + vdaas/vald-agent-ngt \ + vdaas/vald-discoverer-k8s \ + vdaas/vald-manager-compressor \ + vdaas/vald-meta-cassandra \ + vdaas/vald-manager-backup-cassandra \ + vdaas/vald-gateway \ + vdaas/vald-manager-index + do + echo "searching ${image}:${tag}" + until curl -s "https://registry.hub.docker.com/v2/repositories/${image}/tags/${tag}" | jq '.name' | grep -v "null"; do + echo "waiting for ${image}:${tag} to be uploaded..." + sleep 2 + done + done + - name: Specify container versions + if: github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + run: | + pr_num=`cat $GITHUB_EVENT_PATH | jq -r ".number"` + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-agent-ngt/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set agent.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-discoverer-k8s/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set discoverer.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-compressor/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set compressor.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-meta-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set meta.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-backup-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set backupManager.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-gateway/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set gateway.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-index/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set indexManager.image.tag=pr-${pr_num}" + fi + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" >> $GITHUB_ENV + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libhdf5-dev + - name: Fetch Helm version + run: | + HELM_VERSION=`make version/helm` + echo "::set-output name=helm::${HELM_VERSION}" + id: version + - uses: rinx/setup-k3d@v0.0.2 + with: + version: latest + name: vald + agents: 3 + - name: check k3d + run: | + kubectl cluster-info + - uses: azure/setup-helm@v1 + with: + version: ${{ steps.version.outputs.helm }} + - name: Helm version + run: | + helm version + - name: deploy vald + run: | + make k8s/external/scylla/deploy + jq_query='.items[] | select( ([ .status.conditions[] | select( .type == "Ready" and .status == "True" ) ] | length ) == 1) | .metadata.namespace + "/" + .metadata.name' + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 1 ] + do + echo "waiting for databases to be ready..." + kubectl get pods + sleep 2 + done + helm repo add vald https://vald.vdaas.org/charts + tag=$(cat versions/VALD_VERSION) + helm install \ + --values .github/helm/values/values-chaos.yaml \ + --set defaults.image.tag=${tag} \ + ${HELM_EXTRA_OPTIONS} \ + vald-cluster vald/vald + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 22 ] + do + echo "waiting for Vald to be ready..." + kubectl get pods + sleep 2 + done + kubectl get pods + - name: run Insert job + run: | + make hack/benchmark/assets/dataset/${DATASET} + podname=`kubectl get pods --selector=app=vald-gateway | tail -1 | awk '{print $1}'` + go test \ + -v tests/e2e/crud_test.go \ + -tags "e2e" \ + -run "TestE2EInsert" \ + -timeout 15m \ + -host=localhost \ + -port=8081 \ + -dataset=`pwd`/hack/benchmark/assets/dataset/${DATASET} \ + -insert-num=1000 \ + -wait-after-insert=2m \ + -portforward \ + -portforward-ns=default \ + -portforward-pod-name=${podname} \ + -portforward-pod-port=8081 \ + -kubeconfig=${KUBECONFIG} + env: + DATASET: fashion-mnist-784-euclidean.hdf5 + - name: deploy Chaos Mesh + run: | + helm repo add chaos-mesh https://charts.chaos-mesh.org + curl -sSL https://mirrors.chaos-mesh.org/v1.0.3/crd.yaml | kubectl apply -f - + kubectl create ns chaos-testing + helm install \ + --namespace=chaos-testing \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/k3s/containerd/containerd.sock \ + --set webhook.certManager.enabled=true \ + chaos-mesh chaos-mesh/chaos-mesh + helm install \ + --set podChaos.failure.enabled=true \ + --set podChaos.failure.selector="{\"labelSelectors\": {\"app.kubernetes.io/name\": \"vald\"}, \"namespaces\": [\"default\"]}" \ + vald-chaos-test tests/chaos/chart + - name: run Insert and Search jobs + run: | + make hack/benchmark/assets/dataset/${DATASET} + podname=`kubectl get pods --selector=app=vald-gateway | tail -1 | awk '{print $1}'` + go test \ + -v tests/e2e/crud_test.go \ + -tags "e2e" \ + -run "TestE2EInsert|TestE2ESearch" \ + -timeout 15m \ + -host=localhost \ + -port=8081 \ + -dataset=`pwd`/hack/benchmark/assets/dataset/${DATASET} \ + -insert-from=1000 \ + -insert-num=10000 \ + -search-num=10000 \ + -wait-after-insert=2m \ + -portforward \ + -portforward-ns=default \ + -portforward-pod-name=${podname} \ + -portforward-pod-port=8081 \ + -kubeconfig=${KUBECONFIG} + env: + DATASET: fashion-mnist-784-euclidean.hdf5 + meta-network-partition: + name: "E2E chaos test (meta network partition)" + runs-on: ubuntu-latest + timeout-minutes: 45 + if: startsWith( github.ref, 'refs/tags/') || github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + steps: + - uses: actions/checkout@v2 + - name: wait for dockers + if: startsWith( github.ref, 'refs/tags/') + run: | + tag=$(cat versions/VALD_VERSION) + for image in \ + vdaas/vald-agent-ngt \ + vdaas/vald-discoverer-k8s \ + vdaas/vald-manager-compressor \ + vdaas/vald-meta-cassandra \ + vdaas/vald-manager-backup-cassandra \ + vdaas/vald-gateway \ + vdaas/vald-manager-index + do + echo "searching ${image}:${tag}" + until curl -s "https://registry.hub.docker.com/v2/repositories/${image}/tags/${tag}" | jq '.name' | grep -v "null"; do + echo "waiting for ${image}:${tag} to be uploaded..." + sleep 2 + done + done + - name: Specify container versions + if: github.event.action == 'labeled' && github.event.label.name == 'actions/e2e-chaos' + run: | + pr_num=`cat $GITHUB_EVENT_PATH | jq -r ".number"` + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-agent-ngt/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set agent.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-discoverer-k8s/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set discoverer.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-compressor/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set compressor.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-meta-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set meta.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-backup-cassandra/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set backupManager.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-gateway/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set gateway.image.tag=pr-${pr_num}" + fi + if curl -s "https://registry.hub.docker.com/v2/repositories/vdaas/vald-manager-index/tags/pr-${pr_num}" | jq '.name' | grep -v "null"; then + export HELM_EXTRA_OPTIONS="${HELM_EXTRA_OPTIONS} --set indexManager.image.tag=pr-${pr_num}" + fi + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" + echo "HELM_EXTRA_OPTIONS=${HELM_EXTRA_OPTIONS}" >> $GITHUB_ENV + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libhdf5-dev + - name: Fetch Helm version + run: | + HELM_VERSION=`make version/helm` + echo "::set-output name=helm::${HELM_VERSION}" + id: version + - uses: rinx/setup-k3d@v0.0.2 + with: + version: latest + name: vald + agents: 3 + - name: check k3d + run: | + kubectl cluster-info + - uses: azure/setup-helm@v1 + with: + version: ${{ steps.version.outputs.helm }} + - name: Helm version + run: | + helm version + - name: deploy vald + run: | + make k8s/external/scylla/deploy + jq_query='.items[] | select( ([ .status.conditions[] | select( .type == "Ready" and .status == "True" ) ] | length ) == 1) | .metadata.namespace + "/" + .metadata.name' + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 1 ] + do + echo "waiting for databases to be ready..." + kubectl get pods + sleep 2 + done + helm repo add vald https://vald.vdaas.org/charts + tag=$(cat versions/VALD_VERSION) + helm install \ + --values .github/helm/values/values-chaos.yaml \ + --set defaults.image.tag=${tag} \ + ${HELM_EXTRA_OPTIONS} \ + vald-cluster vald/vald + until [ $(kubectl get pod -o json | jq -r "$jq_query" | wc -l) -ge 22 ] + do + echo "waiting for Vald to be ready..." + kubectl get pods + sleep 2 + done + kubectl get pods + - name: deploy Chaos Mesh + run: | + helm repo add chaos-mesh https://charts.chaos-mesh.org + curl -sSL https://mirrors.chaos-mesh.org/v1.0.3/crd.yaml | kubectl apply -f - + kubectl create ns chaos-testing + helm install \ + --namespace=chaos-testing \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/k3s/containerd/containerd.sock \ + --set webhook.certManager.enabled=true \ + chaos-mesh chaos-mesh/chaos-mesh + helm install \ + --set networkChaos.partition.enabled=true \ + vald-chaos-test tests/chaos/chart + - name: run Insert and Search jobs + run: | + make hack/benchmark/assets/dataset/${DATASET} + podname=`kubectl get pods --selector=app=vald-gateway | tail -1 | awk '{print $1}'` + go test \ + -v tests/e2e/crud_test.go \ + -tags "e2e" \ + -run "TestE2EInsert|TestE2ESearch" \ + -timeout 15m \ + -host=localhost \ + -port=8081 \ + -dataset=`pwd`/hack/benchmark/assets/dataset/${DATASET} \ + -insert-num=10000 \ + -search-num=10000 \ + -wait-after-insert=2m \ + -portforward \ + -portforward-ns=default \ + -portforward-pod-name=${podname} \ + -portforward-pod-port=8081 \ + -kubeconfig=${KUBECONFIG} + env: + DATASET: fashion-mnist-784-euclidean.hdf5 + slack-notification: + name: "Slack notification" + needs: + - agent-failure + - random-pod-failure + - meta-network-partition + runs-on: ubuntu-latest + if: startsWith( github.ref, 'refs/tags/') + steps: + - uses: technote-space/workflow-conclusion-action@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - uses: 8398a7/action-slack@v2 + with: + author_name: "E2E chaos test" + status: ${{ env.WORKFLOW_CONCLUSION }} + only_mention_fail: channel + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_NOTIFY_WEBHOOK_URL }} diff --git a/tests/chaos/chart/.helmignore b/tests/chaos/chart/.helmignore new file mode 100644 index 00000000000..0e8a0eb36f4 --- /dev/null +++ b/tests/chaos/chart/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/tests/chaos/chart/Chart.yaml b/tests/chaos/chart/Chart.yaml new file mode 100644 index 00000000000..68d1499e1a8 --- /dev/null +++ b/tests/chaos/chart/Chart.yaml @@ -0,0 +1,22 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: v2 +name: vald-chaos-test +description: A Helm chart for testing Vald using Chaos Mesh. +type: application +version: 1.0.0 +appVersion: 1.0.0 diff --git a/tests/chaos/chart/templates/NOTES.txt b/tests/chaos/chart/templates/NOTES.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/chaos/chart/templates/_helpers.tpl b/tests/chaos/chart/templates/_helpers.tpl new file mode 100644 index 00000000000..b4c38025b99 --- /dev/null +++ b/tests/chaos/chart/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chart.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chart.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "chart.labels" -}} +helm.sh/chart: {{ include "chart.chart" . }} +{{ include "chart.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/tests/chaos/chart/templates/network/partition.yaml b/tests/chaos/chart/templates/network/partition.yaml new file mode 100644 index 00000000000..3bd04270a0a --- /dev/null +++ b/tests/chaos/chart/templates/network/partition.yaml @@ -0,0 +1,30 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +{{- if .Values.networkChaos.partition.enabled }} +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: {{ .Values.networkChaos.partition.name }}-{{ .Release.Name }} + namespace: {{ .Release.Namespace }} +spec: + action: partition + mode: one + duration: {{ .Values.networkChaos.partition.duration }} + selector: + {{- toYaml .Values.networkChaos.partition.selector | nindent 4 }} + scheduler: + cron: "{{ .Values.networkChaos.partition.cron }}" +{{- end }} diff --git a/tests/chaos/chart/templates/pod/failure.yaml b/tests/chaos/chart/templates/pod/failure.yaml new file mode 100644 index 00000000000..04c6f8728ac --- /dev/null +++ b/tests/chaos/chart/templates/pod/failure.yaml @@ -0,0 +1,31 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +{{- if .Values.podChaos.failure.enabled }} +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {{ .Values.podChaos.failure.name }}-{{ .Release.Name }} + namespace: {{ .Release.Namespace }} +spec: + action: pod-failure + mode: one + value: '' + duration: {{ .Values.podChaos.failure.duration }} + selector: + {{- toYaml .Values.podChaos.failure.selector | nindent 4 }} + scheduler: + cron: "{{ .Values.podChaos.failure.cron }}" +{{- end }} diff --git a/tests/chaos/chart/templates/pod/kill.yaml b/tests/chaos/chart/templates/pod/kill.yaml new file mode 100644 index 00000000000..ae8ca9f3aee --- /dev/null +++ b/tests/chaos/chart/templates/pod/kill.yaml @@ -0,0 +1,30 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +{{- if .Values.podChaos.kill.enabled }} +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {{ .Values.podChaos.kill.name }}-{{ .Release.Name }} + namespace: {{ .Release.Namespace }} +spec: + action: pod-kill + mode: one + value: '' + selector: + {{- toYaml .Values.podChaos.kill.selector | nindent 4 }} + scheduler: + cron: "{{ .Values.podChaos.kill.cron }}" +{{- end }} diff --git a/tests/chaos/chart/values.yaml b/tests/chaos/chart/values.yaml new file mode 100644 index 00000000000..5bf5332c26e --- /dev/null +++ b/tests/chaos/chart/values.yaml @@ -0,0 +1,51 @@ +# +# Copyright (C) 2019-2020 Vdaas.org Vald team ( kpango, rinx, kmrmt ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +podChaos: + failure: + enabled: false + name: vald-pod-failure + selector: + labelSelectors: + app.kubernetes.io/name: vald + app.kubernetes.io/component: agent + namespaces: + - default + duration: "10s" + cron: "@every 20s" + kill: + enabled: false + name: vald-pod-kill + selector: + labelSelectors: + app.kubernetes.io/name: vald + app.kubernetes.io/component: agent + namespaces: + - default + cron: "@every 2m" + +networkChaos: + partition: + enabled: false + name: vald-network-partition + selector: + labelSelectors: + app.kubernetes.io/name: vald + app.kubernetes.io/component: meta + namespaces: + - default + duration: "100ms" + cron: "@every 3s"