Skip to content
This repository has been archived by the owner on Mar 28, 2020. It is now read-only.

Commit

Permalink
Merge pull request #1275 from coreos/flaketest
Browse files Browse the repository at this point in the history
e2e: use absolute retry attempts to avoid rounding
  • Loading branch information
hongchaodeng authored Jul 12, 2017
2 parents 3880d1b + 91a12d4 commit f7ae56b
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 111 deletions.
14 changes: 7 additions & 7 deletions test/e2e/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func TestCreateCluster(t *testing.T) {
}
}()

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd); err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
}
Expand All @@ -63,7 +63,7 @@ func TestPauseControl(t *testing.T) {
}
}()

names, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd)
names, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
Expand All @@ -82,10 +82,10 @@ func TestPauseControl(t *testing.T) {
if err := e2eutil.KillMembers(f.KubeClient, f.Namespace, names[0]); err != nil {
t.Fatal(err)
}
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 2, 10*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 2, 1, testEtcd); err != nil {
t.Fatalf("failed to wait for killed member to die: %v", err)
}
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 3, 10*time.Second, testEtcd); err == nil {
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 3, 1, testEtcd); err == nil {
t.Fatalf("cluster should not be recovered: control is paused")
}

Expand All @@ -96,7 +96,7 @@ func TestPauseControl(t *testing.T) {
t.Fatalf("failed to resume control: %v", err)
}

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd); err != nil {
t.Fatalf("failed to resize to 3 members etcd cluster: %v", err)
}
}
Expand All @@ -119,7 +119,7 @@ func TestEtcdUpgrade(t *testing.T) {
}
}()

err = e2eutil.WaitSizeAndVersionReached(t, f.KubeClient, "3.0.16", 3, 60*time.Second, testEtcd)
err = e2eutil.WaitSizeAndVersionReached(t, f.KubeClient, "3.0.16", 3, 6, testEtcd)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
Expand All @@ -131,7 +131,7 @@ func TestEtcdUpgrade(t *testing.T) {
t.Fatalf("fail to update cluster version: %v", err)
}

err = e2eutil.WaitSizeAndVersionReached(t, f.KubeClient, "3.1.8", 3, 60*time.Second, testEtcd)
err = e2eutil.WaitSizeAndVersionReached(t, f.KubeClient, "3.1.8", 3, 6, testEtcd)
if err != nil {
t.Fatalf("failed to wait new version etcd cluster: %v", err)
}
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/cluster_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestReadyMembersStatus(t *testing.T) {
}
}()

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, size, 30*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, size, 3, testEtcd); err != nil {
t.Fatalf("failed to create %d members etcd cluster: %v", size, err)
}

Expand Down Expand Up @@ -94,11 +94,11 @@ func TestBackupStatus(t *testing.T) {
}
}()

_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 1, 60*time.Second, testEtcd)
_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 1, 6, testEtcd)
if err != nil {
t.Fatalf("failed to create 1 members etcd cluster: %v", err)
}
err = e2eutil.WaitBackupPodUp(t, f.KubeClient, f.Namespace, testEtcd.Metadata.Name, 60*time.Second)
err = e2eutil.WaitBackupPodUp(t, f.KubeClient, f.Namespace, testEtcd.Metadata.Name, 6)
if err != nil {
t.Fatalf("failed to create backup pod: %v", err)
}
Expand Down
9 changes: 4 additions & 5 deletions test/e2e/e2eslow/disruptive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package e2eslow

import (
"testing"
"time"

"github.com/coreos/etcd-operator/test/e2e/e2eutil"
"github.com/coreos/etcd-operator/test/e2e/framework"
Expand All @@ -35,7 +34,7 @@ func TestRestartOperator(t *testing.T) {
}
}()

names, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd)
names, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
Expand All @@ -47,18 +46,18 @@ func TestRestartOperator(t *testing.T) {
if err := e2eutil.KillMembers(f.KubeClient, f.Namespace, names[0]); err != nil {
t.Fatal(err)
}
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 2, 10*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 2, 1, testEtcd); err != nil {
t.Fatalf("failed to wait for killed member to die: %v", err)
}
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 3, 10*time.Second, testEtcd); err == nil {
if _, err := e2eutil.WaitUntilPodSizeReached(t, f.KubeClient, 3, 1, testEtcd); err == nil {
t.Fatalf("cluster should not be recovered: operator is deleted")
}

if err := f.SetupEtcdOperator(); err != nil {
t.Fatalf("fail to restart etcd operator: %v", err)
}

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd); err != nil {
t.Fatalf("failed to resize to 3 members etcd cluster: %v", err)
}
}
8 changes: 4 additions & 4 deletions test/e2e/e2eslow/self_hosted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func testCreateSelfHostedCluster(t *testing.T) {
}
}()

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 240*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 24, testEtcd); err != nil {
t.Fatalf("failed to create 3 members self-hosted etcd cluster: %v", err)
}
}
Expand Down Expand Up @@ -87,7 +87,7 @@ func testCreateSelfHostedClusterWithBootMember(t *testing.T) {
}
}()

if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 120*time.Second, testEtcd); err != nil {
if _, err := e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 12, testEtcd); err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
}
Expand Down Expand Up @@ -149,12 +149,12 @@ func testSelfHostedClusterWithBackup(t *testing.T) {
}
}()

_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, testEtcd)
_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, testEtcd)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
fmt.Println("reached to 3 members cluster")
err = e2eutil.WaitBackupPodUp(t, f.KubeClient, f.Namespace, testEtcd.Metadata.Name, 60*time.Second)
err = e2eutil.WaitBackupPodUp(t, f.KubeClient, f.Namespace, testEtcd.Metadata.Name, 6)
if err != nil {
t.Fatalf("failed to create backup pod: %v", err)
}
Expand Down
3 changes: 1 addition & 2 deletions test/e2e/e2eslow/tls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"math/rand"
"os"
"testing"
"time"

"github.com/coreos/etcd-operator/pkg/spec"
"github.com/coreos/etcd-operator/test/e2e/e2eutil"
Expand Down Expand Up @@ -85,7 +84,7 @@ func testTLS(t *testing.T, selfHosted bool) {
}
}()

_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 60*time.Second, c)
_, err = e2eutil.WaitUntilSizeReached(t, f.KubeClient, 3, 6, c)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
}
Expand Down
20 changes: 0 additions & 20 deletions test/e2e/e2eutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/coreos/etcd-operator/client/experimentalclient"
"github.com/coreos/etcd-operator/pkg/util/constants"
"github.com/coreos/etcd-operator/pkg/util/k8sutil"
"github.com/coreos/etcd-operator/pkg/util/retryutil"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -54,25 +53,6 @@ func KillMembers(kubecli kubernetes.Interface, namespace string, names ...string
return nil
}

func WaitBackupPodUp(t *testing.T, kubecli kubernetes.Interface, ns, clusterName string, timeout time.Duration) error {
ls := labels.SelectorFromSet(k8sutil.BackupSidecarLabels(clusterName))
return retryutil.Retry(5*time.Second, int(timeout/(5*time.Second)), func() (done bool, err error) {
podList, err := kubecli.CoreV1().Pods(ns).List(metav1.ListOptions{
LabelSelector: ls.String(),
})
if err != nil {
return false, err
}
for i := range podList.Items {
if podList.Items[i].Status.Phase == v1.PodRunning {
LogfWithTimestamp(t, "backup pod (%s) is running", podList.Items[i].Name)
return true, nil
}
}
return false, nil
})
}

func MakeBackup(kubecli kubernetes.Interface, ns, clusterName string) error {
ls := labels.SelectorFromSet(k8sutil.BackupSidecarLabels(clusterName))
podList, err := kubecli.CoreV1().Pods(ns).List(metav1.ListOptions{
Expand Down
78 changes: 50 additions & 28 deletions test/e2e/e2eutil/wait_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,23 @@ import (
"k8s.io/client-go/pkg/api/v1"
)

var retryInterval = 10 * time.Second

type acceptFunc func(*spec.Cluster) bool
type filterFunc func(*v1.Pod) bool

func CalculateRestoreWaitTime(needDataClone bool) time.Duration {
waitTime := 240 * time.Second
func CalculateRestoreWaitTime(needDataClone bool) int {
waitTime := 24
if needDataClone {
// Take additional time to clone the data.
waitTime += 60 * time.Second
waitTime += 6
}
return waitTime
}

func WaitUntilPodSizeReached(t *testing.T, kubeClient kubernetes.Interface, size int, timeout time.Duration, cl *spec.Cluster) ([]string, error) {
func WaitUntilPodSizeReached(t *testing.T, kubeClient kubernetes.Interface, size, retries int, cl *spec.Cluster) ([]string, error) {
var names []string
err := retryutil.Retry(10*time.Second, int(timeout/(10*time.Second)), func() (done bool, err error) {
err := retryutil.Retry(retryInterval, retries, func() (done bool, err error) {
podList, err := kubeClient.Core().Pods(cl.Metadata.Namespace).List(k8sutil.ClusterListOpt(cl.Metadata.Name))
if err != nil {
return false, err
Expand All @@ -75,12 +77,12 @@ func WaitUntilPodSizeReached(t *testing.T, kubeClient kubernetes.Interface, size
return names, nil
}

func WaitUntilSizeReached(t *testing.T, kubeClient kubernetes.Interface, size int, timeout time.Duration, cl *spec.Cluster) ([]string, error) {
return waitSizeReachedWithAccept(t, kubeClient, size, timeout, cl)
func WaitUntilSizeReached(t *testing.T, kubeClient kubernetes.Interface, size, retries int, cl *spec.Cluster) ([]string, error) {
return waitSizeReachedWithAccept(t, kubeClient, size, retries, cl)
}

func WaitSizeAndVersionReached(t *testing.T, kubeClient kubernetes.Interface, version string, size int, timeout time.Duration, cl *spec.Cluster) error {
return retryutil.Retry(10*time.Second, int(timeout/(10*time.Second)), func() (done bool, err error) {
func WaitSizeAndVersionReached(t *testing.T, kubeClient kubernetes.Interface, version string, size, retries int, cl *spec.Cluster) error {
return retryutil.Retry(retryInterval, retries, func() (done bool, err error) {
var names []string
podList, err := kubeClient.Core().Pods(cl.Metadata.Namespace).List(k8sutil.ClusterListOpt(cl.Metadata.Name))
if err != nil {
Expand Down Expand Up @@ -115,9 +117,9 @@ func getVersionFromImage(image string) string {
return strings.Split(image, ":v")[1]
}

func waitSizeReachedWithAccept(t *testing.T, kubeClient kubernetes.Interface, size int, timeout time.Duration, cl *spec.Cluster, accepts ...acceptFunc) ([]string, error) {
func waitSizeReachedWithAccept(t *testing.T, kubeClient kubernetes.Interface, size, retries int, cl *spec.Cluster, accepts ...acceptFunc) ([]string, error) {
var names []string
err := retryutil.Retry(10*time.Second, int(timeout/(10*time.Second)), func() (done bool, err error) {
err := retryutil.Retry(retryInterval, retries, func() (done bool, err error) {
currCluster, err := k8sutil.GetClusterTPRObject(kubeClient.CoreV1().RESTClient(), cl.Metadata.Namespace, cl.Metadata.Name)
if err != nil {
return false, err
Expand All @@ -142,9 +144,9 @@ func waitSizeReachedWithAccept(t *testing.T, kubeClient kubernetes.Interface, si
return names, nil
}

func WaitUntilMembersWithNamesDeleted(t *testing.T, kubeClient kubernetes.Interface, timeout time.Duration, cl *spec.Cluster, targetNames ...string) ([]string, error) {
func WaitUntilMembersWithNamesDeleted(t *testing.T, kubeClient kubernetes.Interface, retries int, cl *spec.Cluster, targetNames ...string) ([]string, error) {
var remaining []string
err := retryutil.Retry(10*time.Second, int(timeout/(10*time.Second)), func() (done bool, err error) {
err := retryutil.Retry(retryInterval, retries, func() (done bool, err error) {
currCluster, err := k8sutil.GetClusterTPRObject(kubeClient.CoreV1().RESTClient(), cl.Metadata.Namespace, cl.Metadata.Name)
if err != nil {
return false, err
Expand Down Expand Up @@ -179,8 +181,27 @@ func presentIn(a string, list []string) bool {
return false
}

func WaitBackupPodUp(t *testing.T, kubecli kubernetes.Interface, ns, clusterName string, retries int) error {
ls := labels.SelectorFromSet(k8sutil.BackupSidecarLabels(clusterName))
return retryutil.Retry(retryInterval, retries, func() (done bool, err error) {
podList, err := kubecli.CoreV1().Pods(ns).List(metav1.ListOptions{
LabelSelector: ls.String(),
})
if err != nil {
return false, err
}
for i := range podList.Items {
if podList.Items[i].Status.Phase == v1.PodRunning {
LogfWithTimestamp(t, "backup pod (%s) is running", podList.Items[i].Name)
return true, nil
}
}
return false, nil
})
}

func waitResourcesDeleted(t *testing.T, kubeClient kubernetes.Interface, cl *spec.Cluster) error {
undeletedPods, err := WaitPodsDeleted(kubeClient, cl.Metadata.Namespace, 30*time.Second, k8sutil.ClusterListOpt(cl.Metadata.Name))
undeletedPods, err := WaitPodsDeleted(kubeClient, cl.Metadata.Namespace, 3, k8sutil.ClusterListOpt(cl.Metadata.Name))
if err != nil {
if retryutil.IsRetryFailure(err) && len(undeletedPods) > 0 {
p := undeletedPods[0]
Expand All @@ -197,7 +218,7 @@ func waitResourcesDeleted(t *testing.T, kubeClient kubernetes.Interface, cl *spe
return fmt.Errorf("fail to wait pods deleted: %v", err)
}

err = retryutil.Retry(5*time.Second, 5, func() (done bool, err error) {
err = retryutil.Retry(retryInterval, 3, func() (done bool, err error) {
list, err := kubeClient.CoreV1().Services(cl.Metadata.Namespace).List(k8sutil.ClusterListOpt(cl.Metadata.Name))
if err != nil {
return false, err
Expand All @@ -215,12 +236,13 @@ func waitResourcesDeleted(t *testing.T, kubeClient kubernetes.Interface, cl *spe
}

func WaitBackupDeleted(kubeClient kubernetes.Interface, cl *spec.Cluster, checkerOpt StorageCheckerOptions) error {
retries := 5
retries := 3
if checkerOpt.DeletedFromAPI {
// Currently waiting deployment to be gone from API takes a lot of time.
// TODO: revisit this when we use "background propagate" deletion policy.
retries = 60
retries = 30
}
err := retryutil.Retry(5*time.Second, retries, func() (bool, error) {
err := retryutil.Retry(retryInterval, retries, func() (bool, error) {
d, err := kubeClient.AppsV1beta1().Deployments(cl.Metadata.Namespace).Get(k8sutil.BackupSidecarName(cl.Metadata.Name), metav1.GetOptions{})
// If we don't need to wait deployment to be completely gone, we can say it is deleted
// as long as DeletionTimestamp is not nil. Otherwise, we need to wait it is gone by checking not found error.
Expand All @@ -236,7 +258,7 @@ func WaitBackupDeleted(kubeClient kubernetes.Interface, cl *spec.Cluster, checke
return fmt.Errorf("failed to wait backup Deployment deleted: %v", err)
}

_, err = WaitPodsDeleted(kubeClient, cl.Metadata.Namespace, 10*time.Second,
_, err = WaitPodsDeleted(kubeClient, cl.Metadata.Namespace, 2,
metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
"app": k8sutil.BackupPodSelectorAppField,
Expand All @@ -251,7 +273,7 @@ func WaitBackupDeleted(kubeClient kubernetes.Interface, cl *spec.Cluster, checke
if !cl.Spec.Backup.AutoDelete {
return nil
}
err = retryutil.Retry(5*time.Second, 5, func() (done bool, err error) {
err = retryutil.Retry(retryInterval, 3, func() (done bool, err error) {
switch cl.Spec.Backup.StorageType {
case spec.BackupStorageTypePersistentVolume, spec.BackupStorageTypeDefault:
pl, err := kubeClient.CoreV1().PersistentVolumeClaims(cl.Metadata.Namespace).List(k8sutil.ClusterListOpt(cl.Metadata.Name))
Expand Down Expand Up @@ -279,8 +301,8 @@ func WaitBackupDeleted(kubeClient kubernetes.Interface, cl *spec.Cluster, checke
return nil
}

func WaitPodsWithImageDeleted(kubecli kubernetes.Interface, namespace, image string, timeout time.Duration, lo metav1.ListOptions) ([]*v1.Pod, error) {
return waitPodsDeleted(kubecli, namespace, timeout, lo, func(p *v1.Pod) bool {
func WaitPodsWithImageDeleted(kubecli kubernetes.Interface, namespace, image string, retries int, lo metav1.ListOptions) ([]*v1.Pod, error) {
return waitPodsDeleted(kubecli, namespace, retries, lo, func(p *v1.Pod) bool {
for _, c := range p.Spec.Containers {
if c.Image == image {
return false
Expand All @@ -290,18 +312,18 @@ func WaitPodsWithImageDeleted(kubecli kubernetes.Interface, namespace, image str
})
}

func WaitPodsDeleted(kubecli kubernetes.Interface, namespace string, timeout time.Duration, lo metav1.ListOptions) ([]*v1.Pod, error) {
func WaitPodsDeleted(kubecli kubernetes.Interface, namespace string, retries int, lo metav1.ListOptions) ([]*v1.Pod, error) {
f := func(p *v1.Pod) bool { return p.DeletionTimestamp != nil }
return waitPodsDeleted(kubecli, namespace, timeout, lo, f)
return waitPodsDeleted(kubecli, namespace, retries, lo, f)
}

func WaitPodsDeletedCompletely(kubecli kubernetes.Interface, namespace string, timeout time.Duration, lo metav1.ListOptions) ([]*v1.Pod, error) {
return waitPodsDeleted(kubecli, namespace, timeout, lo)
func WaitPodsDeletedCompletely(kubecli kubernetes.Interface, namespace string, retries int, lo metav1.ListOptions) ([]*v1.Pod, error) {
return waitPodsDeleted(kubecli, namespace, retries, lo)
}

func waitPodsDeleted(kubecli kubernetes.Interface, namespace string, timeout time.Duration, lo metav1.ListOptions, filters ...filterFunc) ([]*v1.Pod, error) {
func waitPodsDeleted(kubecli kubernetes.Interface, namespace string, retries int, lo metav1.ListOptions, filters ...filterFunc) ([]*v1.Pod, error) {
var pods []*v1.Pod
err := retryutil.Retry(5*time.Second, int(timeout/(5*time.Second)), func() (bool, error) {
err := retryutil.Retry(retryInterval, retries, func() (bool, error) {
podList, err := kubecli.CoreV1().Pods(namespace).List(lo)
if err != nil {
return false, err
Expand Down
Loading

0 comments on commit f7ae56b

Please sign in to comment.