Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change cert generate method and add pd and kv webhook #406

Merged
merged 11 commits into from
Apr 29, 2019
38 changes: 38 additions & 0 deletions pkg/controller/tidb_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package controller

import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
Expand All @@ -27,12 +28,18 @@ const (
NotDDLOwnerError = "This node is not a ddl owner, can't be resigned."
)

type dbInfo struct {
IsOwner bool `json:"is_owner"`
}

// TiDBControlInterface is the interface that knows how to manage tidb peers
type TiDBControlInterface interface {
// GetHealth returns tidb's health info
GetHealth(tc *v1alpha1.TidbCluster) map[string]bool
// ResignDDLOwner resigns the ddl owner of tidb, if the tidb node is not a ddl owner returns (true,nil),else returns (false,err)
ResignDDLOwner(tc *v1alpha1.TidbCluster, ordinal int32) (bool, error)
// Get TIDB info return tidb's dbInfo
GetInfo(tc *v1alpha1.TidbCluster, ordinal int32) (*dbInfo, error)
}

// defaultTiDBControl is default implementation of TiDBControlInterface.
Expand Down Expand Up @@ -89,6 +96,37 @@ func (tdc *defaultTiDBControl) ResignDDLOwner(tc *v1alpha1.TidbCluster, ordinal
return false, err2
}

func (tdc *defaultTiDBControl) GetInfo(tc *v1alpha1.TidbCluster, ordinal int32) (*dbInfo, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Open an issue to remember to add unit tests to cover this method.

tcName := tc.GetName()
ns := tc.GetNamespace()

hostName := fmt.Sprintf("%s-%d", TiDBMemberName(tcName), ordinal)
url := fmt.Sprintf("http://%s.%s.%s:10080/info", hostName, TiDBPeerMemberName(tcName), ns)
req, err := http.NewRequest("POST", url, nil)
if err != nil {
return nil, err
}
res, err := tdc.httpClient.Do(req)
if err != nil {
return nil, err
}
defer DeferClose(res.Body, &err)
if res.StatusCode != http.StatusOK {
errMsg := fmt.Errorf(fmt.Sprintf("Error response %v", res.StatusCode))
return nil, errMsg
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, err
}
info := dbInfo{}
err = json.Unmarshal(body, &info)
if err != nil {
return nil, err
}
return &info, nil
}

func (tdc *defaultTiDBControl) getBodyOK(apiURL string) ([]byte, error) {
res, err := tdc.httpClient.Get(apiURL)
if err != nil {
Expand Down
6 changes: 6 additions & 0 deletions tests/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,12 @@ func (oa *operatorActions) CheckScaledCorrectly(info *TidbClusterConfig, podUIDs
}

func (oa *operatorActions) UpgradeTidbCluster(info *TidbClusterConfig) error {
// record tikv leader count in webhook first
err := webhook.GetAllKVLeaders(oa.cli, info.Namespace, info.ClusterName)
if err != nil {
return err
}

cmd := fmt.Sprintf("helm upgrade %s %s --set-string %s",
info.ClusterName, chartPath("tidb-cluster", info.OperatorTag), info.TidbClusterHelmSetString(nil))
glog.Info("[UPGRADE] " + cmd)
Expand Down
172 changes: 59 additions & 113 deletions tests/pkg/webhook/pods.go
Original file line number Diff line number Diff line change
@@ -1,94 +1,68 @@
package webhook

import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"strconv"
"strings"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/golang/glog"
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/client/clientset/versioned"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
"github.com/pingcap/tidb-operator/tests/pkg/client"
"k8s.io/api/admission/v1beta1"
)

type dbInfo struct {
IsOwner bool `json:"is_owner"`
}

var (
kvLeaders map[string]int
kvShoot map[string]bool
// Pod name may the same in different namespaces
kvLeaderMap map[string]map[string]int
)

func HttpHandler(url string, method string, httpClient *http.Client) (content []byte, err error) {
req, err := http.NewRequest(method, url, nil)
if err != nil {
glog.Errorf("fail to generator request %v", err)
return nil, err
func GetAllKVLeaders(versionCli versioned.Interface, namespace string, clusterName string) error {

if kvLeaderMap == nil {
kvLeaderMap = make(map[string]map[string]int)
}

res, err := httpClient.Do(req)
if err != nil {
glog.Errorf("fail to send request %v", err)
return nil, err
if kvLeaderMap[namespace] == nil {
kvLeaderMap[namespace] = make(map[string]int)
}
defer res.Body.Close()

content, err = ioutil.ReadAll(res.Body)
tc, err := versionCli.PingcapV1alpha1().TidbClusters(namespace).Get(clusterName, metav1.GetOptions{})

if err != nil {
glog.Errorf("fail to read response %v", err)
return nil, err
glog.Infof("fail to get tc clustername %s namesapce %s %v", clusterName, namespace, err)
return err
}

return content, nil

}
pdClient := controller.NewDefaultPDControl().GetPDClient(tc)

func getAllKVLeaders(tc *v1alpha1.TidbCluster, httpClient *http.Client) (ret map[string]int, err error) {
ret = make(map[string]int)
podIP := tc.Status.PD.Leader.ClientURL
for _, store := range tc.Status.TiKV.Stores {
url := fmt.Sprintf("%s/pd/api/v1/store/%s", podIP, store.ID)

content, err := HttpHandler(url, "GET", httpClient)
storeID, err := strconv.ParseUint(store.ID, 10, 64)
if err != nil {
glog.Errorf("fail to read response %v", err)
return nil, err
glog.Errorf("fail to convert string to int while deleting TIKV err %v", err)
return err
}

storeInfo := &controller.StoreInfo{}
err = json.Unmarshal(content, storeInfo)
storeInfo, err := pdClient.GetStore(storeID)
if err != nil {
glog.Errorf("unmarshal failed, %v", err)
return nil, err
glog.Errorf("fail to read response %v", err)
return err
}

ret[store.PodName] = storeInfo.Status.LeaderCount
kvLeaderMap[namespace][store.PodName] = storeInfo.Status.LeaderCount
}

return ret, nil
return nil
}

// only allow pods to be delete when it is not ddlowner of tidb, not leader of pd and not
// master of tikv.
func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse {
glog.Infof("admitting pods")

if kvShoot == nil {
kvShoot = make(map[string]bool)
}

glog.Infof("kvshoot [%#v]",kvShoot)

httpClient := &http.Client{}
podResource := metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
if ar.Request.Resource != podResource {
err := fmt.Errorf("expect resource to be %s", podResource)
Expand All @@ -99,132 +73,104 @@ func admitPods(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse {
versionCli, kubeCli := client.NewCliOrDie()

name := ar.Request.Name
nameSpace := ar.Request.Namespace
shootName := fmt.Sprintf("%s:%s", nameSpace, name)
namespace := ar.Request.Namespace

reviewResponse := v1beta1.AdmissionResponse{}
reviewResponse.Allowed = true

pod, err := kubeCli.CoreV1().Pods(nameSpace).Get(name, metav1.GetOptions{})
pod, err := kubeCli.CoreV1().Pods(namespace).Get(name, metav1.GetOptions{})
if err != nil {
reviewResponse.Allowed = false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why delete this line? reviewResponse.Allowed default is true.

glog.Infof("%v", err)
glog.Infof("api server send wrong pod info namespace %s name %s err %v", namespace, name, err)
return &reviewResponse
}

glog.Infof("delete pod %s", pod.Labels[label.ComponentLabelKey])

tc, err := versionCli.PingcapV1alpha1().TidbClusters(nameSpace).Get(pod.Labels["app.kubernetes.io/instance"], metav1.GetOptions{})
tc, err := versionCli.PingcapV1alpha1().TidbClusters(namespace).Get(pod.Labels[label.InstanceLabelKey], metav1.GetOptions{})
if err != nil {
reviewResponse.Allowed = false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

glog.Infof("%v", err)
glog.Infof("fail to fetch tidbcluster info namespace %s clustername(instance) %s err %v", namespace, pod.Labels[label.InstanceLabelKey], err)
return &reviewResponse
}

pdClient := controller.NewDefaultPDControl().GetPDClient(tc)
tidbController := controller.NewDefaultTiDBControl()

if pod.Labels[label.ComponentLabelKey] == "tidb" {
podIP := pod.Status.PodIP
url := fmt.Sprintf("http://%s:10080/info", podIP)

content, err := HttpHandler(url, "POST", httpClient)
ordinal, err := strconv.ParseInt(strings.Split(name, "-")[len(strings.Split(name, "-"))-1], 10, 32)
if err != nil {
glog.Errorf("fail to read response %v", err)
glog.Errorf("fail to convert string to int while deleting TiDB err %v", err)
return &reviewResponse
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

}

info := dbInfo{}
err = json.Unmarshal(content, &info)
info, err := tidbController.GetInfo(tc, int32(ordinal))
if err != nil {
glog.Errorf("unmarshal failed,namespace %s name %s error:%v", nameSpace, name, err)
glog.Errorf("fail to get tidb info error:%v", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return &reviewResponse
}

if info.IsOwner && tc.Status.TiDB.StatefulSet.Replicas > 1 {
time.Sleep(10 * time.Second)
glog.Errorf("tidb is ddl owner, can't be deleted namespace %s name %s", nameSpace, name)
glog.Errorf("tidb is ddl owner, can't be deleted namespace %s name %s", namespace, name)
os.Exit(3)
} else {
glog.Infof("savely delete pod namespace %s name %s content %s", nameSpace, name, string(content))
glog.Infof("savely delete pod namespace %s name %s isowner %t", namespace, name, info.IsOwner)
}

kvShoot[shootName] = true

} else if pod.Labels[label.ComponentLabelKey] == "pd" {
podIP := tc.Status.PD.Leader.ClientURL
url := fmt.Sprintf("%s/pd/api/v1/leader", podIP)

content, err := HttpHandler(url, "GET", httpClient)
leader, err := pdClient.GetPDLeader()
if err != nil {
glog.Errorf("fail to read response %v", err)
return &reviewResponse
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

}

leader := &pdpb.Member{}
err = json.Unmarshal(content, leader)
if err != nil {
glog.Errorf("unmarshal failed,namespace %s name %s error:%v", nameSpace, name, err)
glog.Errorf("fail to get pd leader %v", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return &reviewResponse
}

if leader.Name == name && tc.Status.TiDB.StatefulSet.Replicas > 1 {
time.Sleep(10 * time.Second)
glog.Errorf("pd is leader, can't be deleted namespace %s name %s", nameSpace, name)
glog.Errorf("pd is leader, can't be deleted namespace %s name %s", namespace, name)
os.Exit(3)
} else {
glog.Infof("savely delete pod namespace %s name %s leader name %s", nameSpace, name, leader.Name)
glog.Infof("savely delete pod namespace %s name %s leader name %s", namespace, name, leader.Name)
}

kvShoot[shootName] = true

} else if pod.Labels[label.ComponentLabelKey] == "tikv" {

if _, ok := kvShoot[shootName]; ok {
return &reviewResponse
}

var storeID string
podIP := tc.Status.PD.Leader.ClientURL
var storeID uint64
storeID = 0
for _, store := range tc.Status.TiKV.Stores {
if store.PodName == name {
storeID = store.ID
storeID, err = strconv.ParseUint(store.ID, 10, 64)
if err != nil {
glog.Errorf("fail to convert string to int while deleting PD err %v", err)
return &reviewResponse
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

}
break
}
}

url := fmt.Sprintf("%s/pd/api/v1/store/%s", podIP, storeID)

content, err := HttpHandler(url, "GET", httpClient)
if err != nil {
glog.Errorf("fail to read response %v", err)
// Fail to get store in stores
if storeID == 0 {
glog.Errorf("fail to find store in TIKV.Stores podname %s", name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return &reviewResponse
}

storeInfo := &controller.StoreInfo{}
err = json.Unmarshal(content, storeInfo)
storeInfo, err := pdClient.GetStore(storeID)
if err != nil {
glog.Errorf("unmarshal failed,namespace %s name %s error:%v", nameSpace, name, err)
glog.Errorf("fail to read storeID %d response %v", storeID, err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return &reviewResponse
}

beforeCount := kvLeaders[name]
beforeCount := kvLeaderMap[namespace][name]
afterCount := storeInfo.Status.LeaderCount
glog.Infof("before evict the leader is %d after evict the leader is %d", beforeCount, afterCount)

if beforeCount != 0 && afterCount >= beforeCount && tc.Status.TiKV.StatefulSet.Replicas > 1 {
if beforeCount != 0 && beforeCount <= afterCount && tc.Status.TiKV.StatefulSet.Replicas > 1 {
time.Sleep(10 * time.Second)
glog.Errorf("kv leader is not zero, can't be deleted namespace %s name %s leaderCount %d", nameSpace, name, storeInfo.Status.LeaderCount)
glog.Errorf("kv leader is not zero, can't be deleted namespace %s name %s leaderCount %d", namespace, name, storeInfo.Status.LeaderCount)
os.Exit(3)
} else {
glog.Infof("savely delete pod namespace %s name %s", nameSpace, name)
glog.Infof("savely delete pod namespace %s name %s before count %d after count %d", namespace, name, beforeCount, afterCount)
}

kvShoot[shootName] = true

}

if kvLeaders, err = getAllKVLeaders(tc, httpClient); err != nil {
glog.Errorf("fail to get kv infos #v", err)
}

glog.Infof("%#v", kvLeaders)

return &reviewResponse
}