Merge pull request #27 from caesarxuchao/chao-testing

Chaos testing
kubernetes-sigs · May 1, 2019 · c355c28 · c355c28
2 parents a8e4a15 + 70a5487
commit c355c28
Show file tree

Hide file tree

Showing 6 changed files with 511 additions and 32 deletions.
diff --git a/test/e2e/chaosmonkey/chaosmonkey.go b/test/e2e/chaosmonkey/chaosmonkey.go
@@ -0,0 +1,158 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Copied from k8s.io/kubernetes/test/e2e/chaosmonkey/chaosmonkey.go
+
+package chaosmonkey
+
+import . "github.com/onsi/ginkgo"
+
+// Disruption is the type to construct a chaosmonkey with; see Do for more information.
+type Disruption func()
+
+// Test is the type to register with a chaosmonkey.  A test will run asynchronously across the
+// chaosmonkey's Disruption.  A Test takes a Semaphore as an argument.  It should call sem.Ready()
+// once it's ready for the disruption to start and should then wait until sem.StopCh (which is a
+// <-chan struct{}) is closed, which signals that the disruption is over.  It should then clean up
+// and return.  See Do and Semaphore for more information.
+type Test func(sem *Semaphore)
+
+// Interface can be implemented if you prefer to define tests without dealing with a Semaphore.  You
+// may define a struct that implements Interface's three methods (Setup, Test, and Teardown) and
+// RegisterInterface.  See RegisterInterface for more information.
+type Interface interface {
+	Setup()
+	Test(stopCh <-chan struct{})
+	Teardown()
+}
+
+type chaosmonkey struct {
+	disruption Disruption
+	tests      []Test
+}
+
+// New creates and returns a chaosmonkey, with which the caller should register Tests and call Do.
+// See Do for more information.
+func New(disruption Disruption) *chaosmonkey {
+	return &chaosmonkey{
+		disruption,
+		[]Test{},
+	}
+}
+
+// Register registers the given Test with the chaosmonkey, so that the test will run over the
+// Disruption.
+func (cm *chaosmonkey) Register(test Test) {
+	cm.tests = append(cm.tests, test)
+}
+
+// RegisterInterface registers the given Interface with the chaosmonkey, so the chaosmonkey will
+// call Setup, Test, and Teardown properly.  Test can tell that the Disruption is finished when
+// stopCh is closed.
+func (cm *chaosmonkey) RegisterInterface(in Interface) {
+	cm.Register(func(sem *Semaphore) {
+		in.Setup()
+		sem.Ready()
+		in.Test(sem.StopCh)
+		in.Teardown()
+	})
+}
+
+// Do performs the Disruption while testing the registered Tests.  Once the caller has registered
+// all Tests with the chaosmonkey, they call Do.  Do starts each registered test asynchronously and
+// waits for each test to signal that it is ready by calling sem.Ready().  Do will then do the
+// Disruption, and when it's complete, close sem.StopCh to signal to the registered Tests that the
+// Disruption is over, and wait for all Tests to return.
+func (cm *chaosmonkey) Do() {
+	sems := []*Semaphore{}
+	// All semaphores have the same StopCh.
+	stopCh := make(chan struct{})
+
+	for _, test := range cm.tests {
+		test := test
+		sem := newSemaphore(stopCh)
+		sems = append(sems, sem)
+		go func() {
+			defer GinkgoRecover()
+			defer sem.done()
+			test(sem)
+		}()
+	}
+
+	By("Waiting for all async tests to be ready")
+	for _, sem := range sems {
+		// Wait for test to be ready.  We have to wait for ready *or done* because a test
+		// may panic before signaling that its ready, and we shouldn't block.  Since we
+		// defered sem.done() above, if a test panics, it's marked as done.
+		sem.waitForReadyOrDone()
+	}
+
+	defer func() {
+		close(stopCh)
+		By("Waiting for async validations to complete")
+		for _, sem := range sems {
+			sem.waitForDone()
+		}
+	}()
+
+	By("Starting disruption")
+	cm.disruption()
+	By("Disruption complete; stopping async validations")
+}
+
+// Semaphore is taken by a Test and provides: Ready(), for the Test to call when it's ready for the
+// disruption to start; and StopCh, the closure of which signals to the Test that the disruption is
+// finished.
+type Semaphore struct {
+	readyCh chan struct{}
+	StopCh  <-chan struct{}
+	doneCh  chan struct{}
+}
+
+func newSemaphore(stopCh <-chan struct{}) *Semaphore {
+	// We don't want to block on Ready() or done()
+	return &Semaphore{
+		make(chan struct{}, 1),
+		stopCh,
+		make(chan struct{}, 1),
+	}
+}
+
+// Ready is called by the Test to signal that the Test is ready for the disruption to start.
+func (sem *Semaphore) Ready() {
+	close(sem.readyCh)
+}
+
+// done is an internal method for Go to defer, both to wait for all tests to return, but also to
+// sense if a test panicked before calling Ready.  See waitForReadyOrDone.
+func (sem *Semaphore) done() {
+	close(sem.doneCh)
+}
+
+// We would like to just check if all tests are ready, but if they fail (which Ginkgo implements as
+// a panic), they may not have called Ready().  We check done as well to see if the function has
+// already returned; if it has, we don't care if it's ready, and just continue.
+func (sem *Semaphore) waitForReadyOrDone() {
+	select {
+	case <-sem.readyCh:
+	case <-sem.doneCh:
+	}
+}
+
+// waitForDone is an internal method for Go to wait on all Tests returning.
+func (sem *Semaphore) waitForDone() {
+	<-sem.doneCh
+}
diff --git a/test/e2e/test-cmd.sh b/test/e2e/test-cmd.sh
@@ -26,6 +26,9 @@ REGISTRY=""
 VERSION=""
 
 TESTFILE="v1beta2-controllerrevision.proto"
+# for etcd server that has enabled mTLS 
+TLS_ARGS="--cacert /etc/srv/kubernetes/pki/etcd-apiserver-ca.crt --cert /etc/srv/kubernetes/pki/etcd-apiserver-client.crt --key /etc/srv/kubernetes/pki/etcd-apiserver-client.key"
+
 
 function wait-for-migration()
 {
@@ -78,7 +81,7 @@ function wait-for-migration()
 verify-version()
 {
   version=$(gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
-    "docker exec $1 /bin/sh -c \"ETCDCTL_API=3 etcdctl get /registry/controllerrevisions/default/sample\" | grep -a apps")
+    "docker exec $1 /bin/sh -c \"ETCDCTL_API=3 etcdctl ${TLS_ARGS} get /registry/controllerrevisions/default/sample\" | grep -a apps")
   # Remove the trailing non-printable character. The data is encoded in proto, so
   # it has non-printable characters.
   version=$(tr -dc '[[:print:]]' <<< "${version}")
@@ -120,6 +123,7 @@ gcloud auth configure-docker
 # create the object via the apiserver, because apiserver always encode the
 # object to the default storage version before storing in etcd.
 
+
 # Copy the pre-made proto file of the object to the master machine.
 user_name=$(gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command "whoami")
 gcloud compute scp "${MIGRATORROOT}/test/e2e/${TESTFILE}" "${user_name}@${CLUSTER_NAME}-master:~/" --project "${PROJECT}" --zone "${KUBE_GCE_ZONE}"
@@ -133,9 +137,16 @@ etcd_container=$(echo "${result}" | grep "etcd-server-${CLUSTER_NAME}-master" |
 gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
   "docker cp ${TESTFILE} ${etcd_container}:/"
 
+# Check if etcd tls is enabled
+gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
+  "cat /etc/kubernetes/manifests/etcd.manifest | grep '\-\-listen-client-urls https:'" && rc=$? || rc=$?
+if [[ $rc -ne 0 ]]; then
+  TLS_ARGS=""
+fi
+
 # Create the object via etcdctl
 gcloud compute --project "${PROJECT}" ssh --zone "${KUBE_GCE_ZONE}" "${CLUSTER_NAME}-master" --command \
-  "docker exec ${etcd_container} /bin/sh -c \"cat /${TESTFILE} | ETCDCTL_API=3 etcdctl put /registry/controllerrevisions/default/sample\""
+  "docker exec ${etcd_container} /bin/sh -c \"cat /${TESTFILE} | ETCDCTL_API=3 etcdctl ${TLS_ARGS} put /registry/controllerrevisions/default/sample\""
 
 #TODO: remove
 # Verify that the ControllerRevision is encoded as apps/v1beta2.

diff --git a/test/e2e/test-fully-automated.sh b/test/e2e/test-fully-automated.sh
@@ -57,5 +57,5 @@ popd
 
 pushd "${MIGRATOR_ROOT}"
   make e2e-test
-  "${ginkgo}" "${MIGRATOR_ROOT}/test/e2e/e2e.test"
+  "${ginkgo}" -v "$@" "${MIGRATOR_ROOT}/test/e2e/e2e.test"
 popd