Skip to content

Commit

Permalink
Merge pull request #27 from caesarxuchao/chao-testing
Browse files Browse the repository at this point in the history
Chaos testing
  • Loading branch information
k8s-ci-robot committed May 1, 2019
2 parents a8e4a15 + 70a5487 commit c355c28
Show file tree
Hide file tree
Showing 6 changed files with 511 additions and 32 deletions.
158 changes: 158 additions & 0 deletions test/e2e/chaosmonkey/chaosmonkey.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Copied from k8s.io/kubernetes/test/e2e/chaosmonkey/chaosmonkey.go

package chaosmonkey

import . "github.com/onsi/ginkgo"

// Disruption is the type to construct a chaosmonkey with; see Do for more information.
type Disruption func()

// Test is the type to register with a chaosmonkey. A test will run asynchronously across the
// chaosmonkey's Disruption. A Test takes a Semaphore as an argument. It should call sem.Ready()
// once it's ready for the disruption to start and should then wait until sem.StopCh (which is a
// <-chan struct{}) is closed, which signals that the disruption is over. It should then clean up
// and return. See Do and Semaphore for more information.
type Test func(sem *Semaphore)

// Interface can be implemented if you prefer to define tests without dealing with a Semaphore. You
// may define a struct that implements Interface's three methods (Setup, Test, and Teardown) and
// RegisterInterface. See RegisterInterface for more information.
type Interface interface {
Setup()
Test(stopCh <-chan struct{})
Teardown()
}

type chaosmonkey struct {
disruption Disruption
tests []Test
}

// New creates and returns a chaosmonkey, with which the caller should register Tests and call Do.
// See Do for more information.
func New(disruption Disruption) *chaosmonkey {
return &chaosmonkey{
disruption,
[]Test{},
}
}

// Register registers the given Test with the chaosmonkey, so that the test will run over the
// Disruption.
func (cm *chaosmonkey) Register(test Test) {
cm.tests = append(cm.tests, test)
}

// RegisterInterface registers the given Interface with the chaosmonkey, so the chaosmonkey will
// call Setup, Test, and Teardown properly. Test can tell that the Disruption is finished when
// stopCh is closed.
func (cm *chaosmonkey) RegisterInterface(in Interface) {
cm.Register(func(sem *Semaphore) {
in.Setup()
sem.Ready()
in.Test(sem.StopCh)
in.Teardown()
})
}

// Do performs the Disruption while testing the registered Tests. Once the caller has registered
// all Tests with the chaosmonkey, they call Do. Do starts each registered test asynchronously and
// waits for each test to signal that it is ready by calling sem.Ready(). Do will then do the
// Disruption, and when it's complete, close sem.StopCh to signal to the registered Tests that the
// Disruption is over, and wait for all Tests to return.
func (cm *chaosmonkey) Do() {
sems := []*Semaphore{}
// All semaphores have the same StopCh.
stopCh := make(chan struct{})

for _, test := range cm.tests {
test := test
sem := newSemaphore(stopCh)
sems = append(sems, sem)
go func() {
defer GinkgoRecover()
defer sem.done()
test(sem)
}()
}

By("Waiting for all async tests to be ready")
for _, sem := range sems {
// Wait for test to be ready. We have to wait for ready *or done* because a test
// may panic before signaling that its ready, and we shouldn't block. Since we
// defered sem.done() above, if a test panics, it's marked as done.
sem.waitForReadyOrDone()
}

defer func() {
close(stopCh)
By("Waiting for async validations to complete")
for _, sem := range sems {
sem.waitForDone()
}
}()

By("Starting disruption")
cm.disruption()
By("Disruption complete; stopping async validations")
}

// Semaphore is taken by a Test and provides: Ready(), for the Test to call when it's ready for the
// disruption to start; and StopCh, the closure of which signals to the Test that the disruption is
// finished.
type Semaphore struct {
readyCh chan struct{}
StopCh <-chan struct{}
doneCh chan struct{}
}

func newSemaphore(stopCh <-chan struct{}) *Semaphore {
// We don't want to block on Ready() or done()
return &Semaphore{
make(chan struct{}, 1),
stopCh,
make(chan struct{}, 1),
}
}

// Ready is called by the Test to signal that the Test is ready for the disruption to start.
func (sem *Semaphore) Ready() {
close(sem.readyCh)
}

// done is an internal method for Go to defer, both to wait for all tests to return, but also to
// sense if a test panicked before calling Ready. See waitForReadyOrDone.
func (sem *Semaphore) done() {
close(sem.doneCh)
}

// We would like to just check if all tests are ready, but if they fail (which Ginkgo implements as
// a panic), they may not have called Ready(). We check done as well to see if the function has
// already returned; if it has, we don't care if it's ready, and just continue.
func (sem *Semaphore) waitForReadyOrDone() {
select {
case <-sem.readyCh:
case <-sem.doneCh:
}
}

// waitForDone is an internal method for Go to wait on all Tests returning.
func (sem *Semaphore) waitForDone() {
<-sem.doneCh
}
15 changes: 13 additions & 2 deletions test/e2e/test-cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ REGISTRY=""
VERSION=""

TESTFILE="v1beta2-controllerrevision.proto"
# for etcd server that has enabled mTLS
TLS_ARGS="--cacert /etc/srv/kubernetes/pki/etcd-apiserver-ca.crt --cert /etc/srv/kubernetes/pki/etcd-apiserver-client.crt --key /etc/srv/kubernetes/pki/etcd-apiserver-client.key"


function wait-for-migration()
{
Expand Down Expand Up @@ -78,7 +81,7 @@ function wait-for-migration()
verify-version()
{
version=$(gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
"docker exec $1 /bin/sh -c \"ETCDCTL_API=3 etcdctl get /registry/controllerrevisions/default/sample\" | grep -a apps")
"docker exec $1 /bin/sh -c \"ETCDCTL_API=3 etcdctl ${TLS_ARGS} get /registry/controllerrevisions/default/sample\" | grep -a apps")
# Remove the trailing non-printable character. The data is encoded in proto, so
# it has non-printable characters.
version=$(tr -dc '[[:print:]]' <<< "${version}")
Expand Down Expand Up @@ -120,6 +123,7 @@ gcloud auth configure-docker
# create the object via the apiserver, because apiserver always encode the
# object to the default storage version before storing in etcd.


# Copy the pre-made proto file of the object to the master machine.
user_name=$(gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command "whoami")
gcloud compute scp "${MIGRATORROOT}/test/e2e/${TESTFILE}" "${user_name}@${CLUSTER_NAME}-master:~/" --project "${PROJECT}" --zone "${KUBE_GCE_ZONE}"
Expand All @@ -133,9 +137,16 @@ etcd_container=$(echo "${result}" | grep "etcd-server-${CLUSTER_NAME}-master" |
gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
"docker cp ${TESTFILE} ${etcd_container}:/"

# Check if etcd tls is enabled
gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
"cat /etc/kubernetes/manifests/etcd.manifest | grep '\-\-listen-client-urls https:'" && rc=$? || rc=$?
if [[ $rc -ne 0 ]]; then
TLS_ARGS=""
fi

# Create the object via etcdctl
gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
"docker exec ${etcd_container} /bin/sh -c \"cat /${TESTFILE} | ETCDCTL_API=3 etcdctl put /registry/controllerrevisions/default/sample\""
"docker exec ${etcd_container} /bin/sh -c \"cat /${TESTFILE} | ETCDCTL_API=3 etcdctl ${TLS_ARGS} put /registry/controllerrevisions/default/sample\""

#TODO: remove
# Verify that the ControllerRevision is encoded as apps/v1beta2.
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/test-fully-automated.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ popd

pushd "${MIGRATOR_ROOT}"
make e2e-test
"${ginkgo}" "${MIGRATOR_ROOT}/test/e2e/e2e.test"
"${ginkgo}" -v "$@" "${MIGRATOR_ROOT}/test/e2e/e2e.test"
popd
Loading

0 comments on commit c355c28

Please sign in to comment.