Skip to content

Commit

Permalink
add more logs (#676)
Browse files Browse the repository at this point in the history
  • Loading branch information
weekface authored Jul 23, 2019
1 parent b4342d6 commit 41bceb5
Show file tree
Hide file tree
Showing 15 changed files with 120 additions and 33 deletions.
17 changes: 10 additions & 7 deletions pkg/apis/pingcap.com/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,11 @@ type PDMember struct {

// PDFailureMember is the pd failure member information
type PDFailureMember struct {
PodName string `json:"podName,omitempty"`
MemberID string `json:"memberID,omitempty"`
PVCUID types.UID `json:"pvcUID,omitempty"`
MemberDeleted bool `json:"memberDeleted,omitempty"`
PodName string `json:"podName,omitempty"`
MemberID string `json:"memberID,omitempty"`
PVCUID types.UID `json:"pvcUID,omitempty"`
MemberDeleted bool `json:"memberDeleted,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// TiDBStatus is TiDB status
Expand All @@ -226,7 +227,8 @@ type TiDBMember struct {

// TiDBFailureMember is the tidb failure member information
type TiDBFailureMember struct {
PodName string `json:"podName,omitempty"`
PodName string `json:"podName,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// TiKVStatus is TiKV status
Expand Down Expand Up @@ -254,6 +256,7 @@ type TiKVStore struct {

// TiKVFailureStore is the tikv failure store information
type TiKVFailureStore struct {
PodName string `json:"podName,omitempty"`
StoreID string `json:"storeID,omitempty"`
PodName string `json:"podName,omitempty"`
StoreID string `json:"storeID,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}
9 changes: 6 additions & 3 deletions pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/manager/member/orphan_pods_cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -90,8 +91,10 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string

err = opc.podControl.DeletePod(tc, pod)
if err != nil {
glog.Errorf("orphan pods cleaner: failed to clean orphan pod: %s/%s, %v", ns, podName, err)
return skipReason, err
}
glog.Infof("orphan pods cleaner: clean orphan pod: %s/%s successfully", ns, podName)
}

return skipReason, nil
Expand Down
9 changes: 9 additions & 0 deletions pkg/manager/member/pd_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import (
"strconv"
"time"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/client/clientset/versioned"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/pdapi"
"github.com/pingcap/tidb-operator/pkg/util"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
corelisters "k8s.io/client-go/listers/core/v1"
)

Expand Down Expand Up @@ -99,6 +101,7 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error {

func (pf *pdFailover) Recover(tc *v1alpha1.TidbCluster) {
tc.Status.PD.FailureMembers = nil
glog.Infof("pd failover: clearing pd failoverMembers, %s/%s", tc.GetNamespace(), tc.GetName())
}

func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
Expand Down Expand Up @@ -134,6 +137,7 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
MemberID: pdMember.ID,
PVCUID: pvc.UID,
MemberDeleted: false,
CreatedAt: metav1.Now(),
}
return controller.RequeueErrorf("marking Pod: %s/%s pd member: %s as failure", ns, podName, pdMember.Name)
}
Expand Down Expand Up @@ -165,8 +169,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error
// invoke deleteMember api to delete a member from the pd cluster
err = controller.GetPDClient(pf.pdControl, tc).DeleteMemberByID(memberID)
if err != nil {
glog.Errorf("pd failover: failed to delete member: %d, %v", memberID, err)
return err
}
glog.Infof("pd failover: delete member: %d successfully", memberID)

// The order of old PVC deleting and the new Pod creating is not guaranteed by Kubernetes.
// If new Pod is created before old PVC deleted, new Pod will reuse old PVC.
Expand Down Expand Up @@ -196,8 +202,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error
if pvc != nil && pvc.DeletionTimestamp == nil && pvc.GetUID() == failureMember.PVCUID {
err = pf.pvcControl.DeletePVC(tc, pvc)
if err != nil {
glog.Errorf("pd failover: failed to delete pvc: %s/%s, %v", ns, pvcName, err)
return err
}
glog.Infof("pd failover: pvc: %s/%s successfully", ns, pvcName)
}

setMemberDeleted(tc, failurePodName)
Expand All @@ -208,6 +216,7 @@ func setMemberDeleted(tc *v1alpha1.TidbCluster, podName string) {
failureMember := tc.Status.PD.FailureMembers[podName]
failureMember.MemberDeleted = true
tc.Status.PD.FailureMembers[podName] = failureMember
glog.Infof("pd failover: set pd member: %s/%s deleted", tc.GetName(), podName)
}

type fakePDFailover struct{}
Expand Down
8 changes: 5 additions & 3 deletions pkg/manager/member/pd_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,11 @@ func TestPDFailoverFailover(t *testing.T) {
expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) {
g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3))
g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(1))
g.Expect(tc.Status.PD.FailureMembers).To(Equal(map[string]v1alpha1.PDFailureMember{
"test-pd-1": {PodName: "test-pd-1", MemberID: "12891273174085095651", PVCUID: "pvc-1-uid", MemberDeleted: false},
}))
failureMembers := tc.Status.PD.FailureMembers["test-pd-1"]
g.Expect(failureMembers.PodName).To(Equal("test-pd-1"))
g.Expect(failureMembers.MemberID).To(Equal("12891273174085095651"))
g.Expect(string(failureMembers.PVCUID)).To(Equal("pvc-1-uid"))
g.Expect(failureMembers.MemberDeleted).To(BeFalse())
},
},
{
Expand Down
10 changes: 9 additions & 1 deletion pkg/manager/member/pd_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -102,9 +103,11 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet,

err := controller.GetPDClient(psd.pdControl, tc).DeleteMember(memberName)
if err != nil {
glog.Errorf("pd scale in: failed to delete member %s, %v", memberName, err)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("pd scale in: delete member %s successfully", memberName)

pvcName := ordinalPVCName(v1alpha1.PDMemberType, setName, ordinal)
pvc, err := psd.pvcLister.PersistentVolumeClaims(ns).Get(pvcName)
Expand All @@ -116,13 +119,18 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet,
if pvc.Annotations == nil {
pvc.Annotations = map[string]string{}
}
pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339)
now := time.Now().Format(time.RFC3339)
pvc.Annotations[label.AnnPVCDeferDeleting] = now

_, err = psd.pvcControl.UpdatePVC(tc, pvc)
if err != nil {
glog.Errorf("pd scale in: failed to set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("pd scale in: set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)

decreaseReplicas(newSet, oldSet)
return nil
Expand Down
9 changes: 3 additions & 6 deletions pkg/manager/member/pd_upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package member
import (
"fmt"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/pdapi"
Expand Down Expand Up @@ -44,12 +45,6 @@ func (pu *pdUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet
return pu.gracefulUpgrade(tc, oldSet, newSet)
}

func (pu *pdUpgrader) forceUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
tc.Status.PD.Phase = v1alpha1.UpgradePhase
setUpgradePartition(newSet, 0)
return nil
}

func (pu *pdUpgrader) gracefulUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
ns := tc.GetNamespace()
tcName := tc.GetName()
Expand Down Expand Up @@ -102,8 +97,10 @@ func (pu *pdUpgrader) upgradePDPod(tc *v1alpha1.TidbCluster, ordinal int32, newS
}
err := pu.transferPDLeaderTo(tc, targetName)
if err != nil {
glog.Errorf("pd upgrader: failed to transfer pd leader to: %s, %v", targetName, err)
return err
}
glog.Infof("pd upgrader: transfer pd leader to: %s successfully", targetName)
return controller.RequeueErrorf("tidbcluster: [%s/%s]'s pd member: [%s] is transferring leader to pd member: [%s]", ns, tcName, upgradePodName, targetName)
}

Expand Down
14 changes: 13 additions & 1 deletion pkg/manager/member/scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package member
import (
"fmt"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -70,17 +71,28 @@ func (gs *generalScaler) deleteDeferDeletingPVC(tc *v1alpha1.TidbCluster,
return skipReason, nil
}

return skipReason, gs.pvcControl.DeletePVC(tc, pvc)
err = gs.pvcControl.DeletePVC(tc, pvc)
if err != nil {
glog.Errorf("scale out: failed to delete pvc %s/%s, %v", ns, pvcName, err)
return skipReason, err
}
glog.Infof("scale out: delete pvc %s/%s successfully", ns, pvcName)

return skipReason, nil
}

func resetReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas
}
func increaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas + 1
glog.Infof("pd scale out: increase pd statefulset: %s/%s replicas to %d",
newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas)
}
func decreaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas - 1
glog.Infof("pd scale in: decrease pd statefulset: %s/%s replicas to %d",
newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas)
}

func ordinalPVCName(memberType v1alpha1.MemberType, setName string, ordinal int32) string {
Expand Down
7 changes: 6 additions & 1 deletion pkg/manager/member/tidb_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type tidbFailover struct {
Expand All @@ -40,6 +41,7 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
_, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name]
if exist && tidbMember.Health {
delete(tc.Status.TiDB.FailureMembers, tidbMember.Name)
glog.Infof("tidb failover: delete %s from tidb failoverMembers", tidbMember.Name)
}
}

Expand All @@ -51,7 +53,10 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
_, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name]
deadline := tidbMember.LastTransitionTime.Add(tf.tidbFailoverPeriod)
if !tidbMember.Health && time.Now().After(deadline) && !exist {
tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{PodName: tidbMember.Name}
tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{
PodName: tidbMember.Name,
CreatedAt: metav1.Now(),
}
break
}
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tidb_upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
apps "k8s.io/api/apps/v1beta1"
Expand Down Expand Up @@ -86,9 +87,11 @@ func (tdu *tidbUpgrader) upgradeTiDBPod(tc *v1alpha1.TidbCluster, ordinal int32,
if member, exist := tc.Status.TiDB.Members[tidbPodName(tcName, ordinal)]; exist && member.Health {
hasResign, err := tdu.tidbControl.ResignDDLOwner(tc, ordinal)
if (!hasResign || err != nil) && tc.Status.TiDB.ResignDDLOwnerRetryCount < MaxResignDDLOwnerCount {
glog.Errorf("tidb upgrader: failed to resign ddl owner to %s, %v", member.Name, err)
tc.Status.TiDB.ResignDDLOwnerRetryCount++
return err
}
glog.Infof("tidb upgrader: resign ddl owner to %s successfully", member.Name)
}
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/manager/member/tikv_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type tikvFailover struct {
Expand Down Expand Up @@ -47,8 +48,9 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
tc.Status.TiKV.FailureStores = map[string]v1alpha1.TiKVFailureStore{}
}
tc.Status.TiKV.FailureStores[storeID] = v1alpha1.TiKVFailureStore{
PodName: podName,
StoreID: store.ID,
PodName: podName,
StoreID: store.ID,
CreatedAt: metav1.Now(),
}
}
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/manager/member/tikv_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe
}
if state != v1alpha1.TiKVStateOffline {
if err := controller.GetPDClient(tsd.pdControl, tc).DeleteStore(id); err != nil {
glog.Errorf("tikv scale in: failed to delete store %d, %v", id, err)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("tikv scale in: delete store %d successfully", id)
}
resetReplicas(newSet, oldSet)
return controller.RequeueErrorf("TiKV %s/%s store %d still in cluster, state: %s", ns, podName, id, state)
Expand All @@ -116,12 +118,17 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe
if pvc.Annotations == nil {
pvc.Annotations = map[string]string{}
}
pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339)
now := time.Now().Format(time.RFC3339)
pvc.Annotations[label.AnnPVCDeferDeleting] = now
_, err = tsd.pvcControl.UpdatePVC(tc, pvc)
if err != nil {
glog.Errorf("tikv scale in: failed to set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("tikv scale in: set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)

decreaseReplicas(newSet, oldSet)
return nil
Expand Down
Loading

0 comments on commit 41bceb5

Please sign in to comment.