diff --git a/tests/actions.go b/tests/actions.go index f3ca75d0d1..caf320daf6 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -28,11 +28,6 @@ import ( "strings" "time" - _ "github.com/go-sql-driver/mysql" - "github.com/golang/glog" - pingcapErrors "github.com/pingcap/errors" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/tidb-operator/tests/pkg/apimachinery" admissionV1beta1 "k8s.io/api/admissionregistration/v1beta1" "k8s.io/api/apps/v1beta1" batchv1 "k8s.io/api/batch/v1" @@ -44,14 +39,20 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + _ "github.com/go-sql-driver/mysql" + "github.com/golang/glog" + pingcapErrors "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" + "github.com/pingcap/tidb-operator/tests/pkg/apimachinery" "github.com/pingcap/tidb-operator/tests/pkg/blockwriter" "github.com/pingcap/tidb-operator/tests/pkg/metrics" "github.com/pingcap/tidb-operator/tests/pkg/util" "github.com/pingcap/tidb-operator/tests/pkg/webhook" + "github.com/pingcap/tidb-operator/tests/slack" ) const ( @@ -306,7 +307,7 @@ func (oa *operatorActions) DeployOperator(info *OperatorConfig) error { func (oa *operatorActions) DeployOperatorOrDie(info *OperatorConfig) { if err := oa.DeployOperator(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -329,7 +330,7 @@ func (oa *operatorActions) CleanOperator(info *OperatorConfig) error { func (oa *operatorActions) CleanOperatorOrDie(info *OperatorConfig) { if err := oa.CleanOperator(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -385,7 +386,7 @@ func (oa *operatorActions) DeployTidbCluster(info *TidbClusterConfig) error { func (oa *operatorActions) DeployTidbClusterOrDie(info *TidbClusterConfig) { if err := oa.DeployTidbCluster(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -461,7 +462,7 @@ func (oa *operatorActions) CleanTidbCluster(info *TidbClusterConfig) error { func (oa *operatorActions) CleanTidbClusterOrDie(info *TidbClusterConfig) { if err := oa.CleanTidbCluster(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -529,7 +530,7 @@ func (oa *operatorActions) CheckTidbClusterStatus(info *TidbClusterConfig) error func (oa *operatorActions) CheckTidbClusterStatusOrDie(info *TidbClusterConfig) { if err := oa.CheckTidbClusterStatus(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -553,7 +554,7 @@ func (oa *operatorActions) BeginInsertDataTo(info *TidbClusterConfig) error { func (oa *operatorActions) BeginInsertDataToOrDie(info *TidbClusterConfig) { err := oa.BeginInsertDataTo(info) if err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -594,7 +595,7 @@ func (oa *operatorActions) ScaleTidbCluster(info *TidbClusterConfig) error { func (oa *operatorActions) ScaleTidbClusterOrDie(info *TidbClusterConfig) { if err := oa.ScaleTidbCluster(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -675,7 +676,7 @@ func (oa *operatorActions) UpgradeTidbCluster(info *TidbClusterConfig) error { func (oa *operatorActions) UpgradeTidbClusterOrDie(info *TidbClusterConfig) { if err := oa.UpgradeTidbCluster(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -1964,7 +1965,7 @@ func strPtr(s string) *string { return &s } func (oa *operatorActions) RegisterWebHookAndServiceOrDie(info *OperatorConfig) { if err := oa.RegisterWebHookAndService(info); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -2109,7 +2110,12 @@ func (oa *operatorActions) StartValidatingAdmissionWebhookServerOrDie(info *Oper } err = server.ListenAndServeTLS("", "") if err != nil { - glog.Errorf("fail to start webhook server err %v", err) + err = fmt.Errorf("fail to start webhook server err %v", err) + glog.Error(err) + sendErr := slack.SendErrMsg(err.Error()) + if sendErr != nil { + glog.Error(sendErr) + } os.Exit(4) } } diff --git a/tests/backup/backupcase.go b/tests/backup/backupcase.go index 649839e251..7d7d36fe13 100644 --- a/tests/backup/backupcase.go +++ b/tests/backup/backupcase.go @@ -17,6 +17,8 @@ import ( "fmt" "time" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/tests" "k8s.io/apimachinery/pkg/util/wait" @@ -116,7 +118,7 @@ func (bc *BackupCase) Run() error { func (bc *BackupCase) RunOrDie() { if err := bc.Run(); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } diff --git a/tests/cmd/stability/main.go b/tests/cmd/stability/main.go index c6d1880a27..57d02811d2 100644 --- a/tests/cmd/stability/main.go +++ b/tests/cmd/stability/main.go @@ -19,6 +19,8 @@ import ( _ "net/http/pprof" "time" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/golang/glog" "github.com/jinzhu/copier" "github.com/pingcap/tidb-operator/tests" @@ -237,5 +239,6 @@ func main() { if err != nil { glog.Errorf("failed to clean temp dirs, this error can be ignored.") } - glog.Infof("\nFinished.") + + slack.NotifyAndCompleted("\nFinished.") } diff --git a/tests/config.go b/tests/config.go index 7d49673891..291801a5f4 100644 --- a/tests/config.go +++ b/tests/config.go @@ -7,6 +7,8 @@ import ( "os" "strings" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/pingcap/tidb-operator/tests/pkg/blockwriter" "github.com/golang/glog" @@ -67,6 +69,7 @@ func NewConfig() (*Config, error) { flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts") flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image") flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned") + flag.StringVar(&slack.WebhookUrl, "slack-webhook-url", "", "slack webhook url") flag.Parse() operatorRepo, err := ioutil.TempDir("", "tidb-operator") @@ -86,10 +89,10 @@ func NewConfig() (*Config, error) { func ParseConfigOrDie() *Config { cfg, err := NewConfig() if err != nil { - panic(err) + slack.NotifyAndPanic(err) } if err := cfg.Parse(); err != nil { - panic(err) + slack.NotifyAndPanic(err) } glog.Infof("using config: %+v", cfg) @@ -138,7 +141,7 @@ func (c *Config) GetTiDBVersion() (string, error) { func (c *Config) GetTiDBVersionOrDie() string { v, err := c.GetTiDBVersion() if err != nil { - panic(err) + slack.NotifyAndPanic(err) } return v @@ -153,7 +156,7 @@ func (c *Config) GetUpgradeTidbVersions() []string { func (c *Config) GetUpgradeTidbVersionsOrDie() []string { versions := c.GetUpgradeTidbVersions() if len(versions) < 1 { - panic("upgrade tidb verions is empty") + slack.NotifyAndPanic(fmt.Errorf("upgrade tidb verions is empty")) } return versions diff --git a/tests/failover.go b/tests/failover.go index 35e4521195..2e9cd19f60 100644 --- a/tests/failover.go +++ b/tests/failover.go @@ -7,6 +7,8 @@ import ( "strings" "time" + "github.com/pingcap/tidb-operator/tests/slack" + _ "github.com/go-sql-driver/mysql" "github.com/golang/glog" "github.com/pingcap/errors" @@ -123,7 +125,7 @@ func (oa *operatorActions) TruncateSSTFileThenCheckFailover(info *TidbClusterCon func (oa *operatorActions) TruncateSSTFileThenCheckFailoverOrDie(info *TidbClusterConfig, tikvFailoverPeriod time.Duration) { if err := oa.TruncateSSTFileThenCheckFailover(info, tikvFailoverPeriod); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -197,7 +199,7 @@ func (oa *operatorActions) CheckFailoverPendingOrDie(clusters []*TidbClusterConf } return true, nil }); err != nil { - panic("failed to check failover pending") + slack.NotifyAndPanic(fmt.Errorf("failed to check failover pending")) } } @@ -278,7 +280,7 @@ func (oa *operatorActions) CheckFailoverOrDie(clusters []*TidbClusterConfig, fau } return true, nil }); err != nil { - panic("failed to check failover") + slack.NotifyAndPanic(fmt.Errorf("failed to check failover")) } } @@ -318,7 +320,7 @@ func (oa *operatorActions) CheckRecoverOrDie(clusters []*TidbClusterConfig) { } return true, nil }); err != nil { - panic("failed to check recover") + slack.NotifyAndPanic(fmt.Errorf("failed to check recover")) } } @@ -475,7 +477,7 @@ func (oa *operatorActions) CheckOneApiserverDownOrDie(operatorConfig *OperatorCo affectedPods := map[string]*corev1.Pod{} apiserverPod, err := GetApiserverPod(oa.kubeCli, faultNode) if err != nil { - panic(fmt.Errorf("can't find apiserver in node:%s", faultNode)) + slack.NotifyAndPanic(fmt.Errorf("can't find apiserver in node:%s", faultNode)) } if apiserverPod != nil { affectedPods[apiserverPod.GetName()] = apiserverPod @@ -496,7 +498,7 @@ func (oa *operatorActions) CheckOneApiserverDownOrDie(operatorConfig *OperatorCo } dnsPod, err := GetDnsPod(oa.kubeCli, faultNode) if err != nil { - panic(fmt.Errorf("can't find controller-manager in node:%s", faultNode)) + slack.NotifyAndPanic(fmt.Errorf("can't find controller-manager in node:%s", faultNode)) } if dnsPod != nil { affectedPods[dnsPod.GetName()] = dnsPod @@ -523,7 +525,7 @@ func (oa *operatorActions) CheckOneApiserverDownOrDie(operatorConfig *OperatorCo func (oa *operatorActions) CheckK8sAvailableOrDie(excludeNodes map[string]string, excludePods map[string]*corev1.Pod) { if err := oa.CheckK8sAvailable(excludeNodes, excludePods); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } diff --git a/tests/fault.go b/tests/fault.go index 0a98a55e38..f5982f7d58 100644 --- a/tests/fault.go +++ b/tests/fault.go @@ -6,6 +6,8 @@ import ( "os" "time" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" "github.com/pingcap/tidb-operator/pkg/controller" @@ -154,7 +156,7 @@ func (fa *faultTriggerActions) StopNodeOrDie() (string, string, time.Time) { var err error var now time.Time if pn, n, now, err = fa.StopNode(); err != nil { - panic(err) + slack.NotifyAndPanic(err) } return pn, n, now } @@ -189,7 +191,7 @@ func (fa *faultTriggerActions) StartNode(physicalNode string, node string) error func (fa *faultTriggerActions) StartNodeOrDie(physicalNode string, node string) { if err := fa.StartNode(physicalNode, node); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -213,7 +215,7 @@ func (fa *faultTriggerActions) StopETCD(nodes ...string) error { func (fa *faultTriggerActions) StopETCDOrDie(nodes ...string) { if err := fa.StopETCD(nodes...); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -237,7 +239,7 @@ func (fa *faultTriggerActions) StartETCD(nodes ...string) error { func (fa *faultTriggerActions) StartETCDOrDie(nodes ...string) { if err := fa.StartETCD(nodes...); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -288,7 +290,7 @@ func (fa *faultTriggerActions) StopKubeAPIServer(node string) error { func (fa *faultTriggerActions) StopKubeAPIServerOrDie(node string) { if err := fa.StopKubeAPIServer(node); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } @@ -299,7 +301,7 @@ func (fa *faultTriggerActions) StartKubeAPIServer(node string) error { func (fa *faultTriggerActions) StartKubeAPIServerOrDie(node string) { if err := fa.StartKubeAPIServer(node); err != nil { - panic(err) + slack.NotifyAndPanic(err) } } diff --git a/tests/manifests/stability/stability.yaml b/tests/manifests/stability/stability.yaml index fcecfff6af..2228dd89d9 100644 --- a/tests/manifests/stability/stability.yaml +++ b/tests/manifests/stability/stability.yaml @@ -47,6 +47,7 @@ spec: command: - /usr/local/bin/stability-test - --config=/etc/tidb-operator-stability/config.yaml + - --slack-webhook-url="" volumeMounts: - mountPath: /logDir name: logdir diff --git a/tests/pkg/client/client.go b/tests/pkg/client/client.go index 080d07d79a..261f7a2f38 100644 --- a/tests/pkg/client/client.go +++ b/tests/pkg/client/client.go @@ -6,6 +6,8 @@ import ( "os" "time" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/juju/errors" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned/typed/pingcap.com/v1alpha1" @@ -30,7 +32,7 @@ func init() { func NewCliOrDie() (versioned.Interface, kubernetes.Interface) { cfg, err := GetConfig() if err != nil { - panic(err) + slack.NotifyAndPanic(err) } return buildClientsOrDie(cfg) @@ -65,7 +67,7 @@ func Union(kube kubernetes.Interface, tidb versioned.Interface) Client { func NewOrDie() Client { cfg, err := clientcmd.BuildConfigFromFlags(masterUrl, kubeconfigPath) if err != nil { - panic(err) + slack.NotifyAndPanic(err) } return Union(kubernetes.NewForConfigOrDie(cfg), versioned.NewForConfigOrDie(cfg)) } @@ -96,12 +98,12 @@ func buildClientsOrDie(cfg *rest.Config) (versioned.Interface, kubernetes.Interf cfg.Timeout = 30 * time.Second cli, err := versioned.NewForConfig(cfg) if err != nil { - panic(err) + slack.NotifyAndPanic(err) } kubeCli, err := kubernetes.NewForConfig(cfg) if err != nil { - panic(err) + slack.NotifyAndPanic(err) } return cli, kubeCli diff --git a/tests/pkg/metrics/annotation_util.go b/tests/pkg/metrics/annotation_util.go index bcd7c59f8e..c2b6fbe805 100644 --- a/tests/pkg/metrics/annotation_util.go +++ b/tests/pkg/metrics/annotation_util.go @@ -25,6 +25,8 @@ import ( "path" "sync" + "github.com/pingcap/tidb-operator/tests/slack" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -93,7 +95,7 @@ func initFunc(port int) { l, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) if err != nil { fmt.Fprintf(os.Stderr, "listening port %d failed, %v", port, err) - panic(err) + slack.NotifyAndPanic(err) } mux.Handle("/metrics", promhttp.Handler()) diff --git a/tests/pkg/webhook/pods.go b/tests/pkg/webhook/pods.go index 21c6cb56b7..c5d01702ac 100644 --- a/tests/pkg/webhook/pods.go +++ b/tests/pkg/webhook/pods.go @@ -7,6 +7,8 @@ import ( "strings" "time" + "github.com/pingcap/tidb-operator/tests/slack" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/golang/glog" @@ -84,7 +86,7 @@ func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { return &reviewResponse } - glog.Infof("delete %s pod [%s]", pod.Labels[label.ComponentLabelKey], pod.GetName()) + glog.Infof("delete pod %s", pod.Labels[label.ComponentLabelKey]) tc, err := versionCli.PingcapV1alpha1().TidbClusters(namespace).Get(pod.Labels[label.InstanceLabelKey], metav1.GetOptions{}) if err != nil { @@ -117,13 +119,19 @@ func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { if info.IsOwner && tc.Status.TiDB.StatefulSet.Replicas > 1 { time.Sleep(10 * time.Second) - glog.Errorf("tidb is ddl owner, can't be deleted namespace %s name %s", namespace, name) + err := fmt.Errorf("tidb is ddl owner, can't be deleted namespace %s name %s", namespace, name) + glog.Error(err) + sendErr := slack.SendErrMsg(err.Error()) + if sendErr != nil { + glog.Error(sendErr) + } os.Exit(3) } else { glog.Infof("savely delete pod namespace %s name %s isowner %t", namespace, name, info.IsOwner) } } else if pod.Labels[label.ComponentLabelKey] == "pd" { + leader, err := pdClient.GetPDLeader() if err != nil { glog.Errorf("fail to get pd leader %v", err) @@ -132,13 +140,19 @@ func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { if leader.Name == name && tc.Status.TiDB.StatefulSet.Replicas > 1 { time.Sleep(10 * time.Second) - glog.Errorf("pd is leader, can't be deleted namespace %s name %s", namespace, name) + err := fmt.Errorf("pd is leader, can't be deleted namespace %s name %s", namespace, name) + glog.Error(err) + sendErr := slack.SendErrMsg(err.Error()) + if sendErr != nil { + glog.Error(sendErr) + } os.Exit(3) } else { glog.Infof("savely delete pod namespace %s name %s leader name %s", namespace, name, leader.Name) } } else if pod.Labels[label.ComponentLabelKey] == "tikv" { + var storeID uint64 storeID = 0 for _, store := range tc.Status.TiKV.Stores { @@ -169,7 +183,12 @@ func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { if beforeCount != 0 && beforeCount <= afterCount && tc.Status.TiKV.StatefulSet.Replicas > 1 { time.Sleep(10 * time.Second) - glog.Errorf("kv leader is not zero, can't be deleted namespace %s name %s leaderCount %d", namespace, name, storeInfo.Status.LeaderCount) + err := fmt.Errorf("kv leader is not zero, can't be deleted namespace %s name %s leaderCount %d", namespace, name, storeInfo.Status.LeaderCount) + glog.Error(err) + sendErr := slack.SendErrMsg(err.Error()) + if sendErr != nil { + glog.Error(sendErr) + } os.Exit(3) } else { glog.Infof("savely delete pod namespace %s name %s before count %d after count %d", namespace, name, beforeCount, afterCount) diff --git a/tests/slack/slack.go b/tests/slack/slack.go index 54ccccf344..14a34dd62c 100644 --- a/tests/slack/slack.go +++ b/tests/slack/slack.go @@ -5,6 +5,9 @@ import ( "encoding/json" "fmt" "net/http" + "time" + + "github.com/golang/glog" ) var ( @@ -55,6 +58,9 @@ func (attachment *Attachment) AddField(field Field) *Attachment { } func Send(webhookUrl string, proxy string, payload Payload) error { + if webhookUrl == "" { + return fmt.Errorf("the webhookUrl have not set") + } body, err := json.Marshal(payload) if err != nil { return err @@ -138,3 +144,20 @@ func SendWarnMsg(msg string) error { } return nil } + +func NotifyAndPanic(err error) { + sendErr := SendErrMsg(err.Error()) + if sendErr != nil { + glog.Warningf("failed to notify slack[%s] the massage: %v,error: %v", WebhookUrl, err, sendErr) + } + time.Sleep(3 * time.Second) + panic(err) +} + +func NotifyAndCompleted(msg string) { + sendErr := SendGoodMsg(msg) + if sendErr != nil { + glog.Warningf("failed to notify slack[%s] the massage: %s,error: %v", WebhookUrl, msg, sendErr) + } + glog.Infof(msg) +} diff --git a/tests/util.go b/tests/util.go index 2531ea00d8..274045f8b0 100644 --- a/tests/util.go +++ b/tests/util.go @@ -17,6 +17,8 @@ import ( "math/rand" "time" + "github.com/pingcap/tidb-operator/tests/slack" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -32,7 +34,7 @@ func KeepOrDie(interval time.Duration, period time.Duration, fun func() error) { } err := fun() if err != nil { - panic(err) + slack.NotifyAndPanic(err) } time.Sleep(interval) }