Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add more logs #676

Merged
merged 6 commits into from
Jul 23, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions pkg/apis/pingcap.com/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,11 @@ type PDMember struct {

// PDFailureMember is the pd failure member information
type PDFailureMember struct {
PodName string `json:"podName,omitempty"`
MemberID string `json:"memberID,omitempty"`
PVCUID types.UID `json:"pvcUID,omitempty"`
MemberDeleted bool `json:"memberDeleted,omitempty"`
PodName string `json:"podName,omitempty"`
MemberID string `json:"memberID,omitempty"`
PVCUID types.UID `json:"pvcUID,omitempty"`
MemberDeleted bool `json:"memberDeleted,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// TiDBStatus is TiDB status
Expand All @@ -226,7 +227,8 @@ type TiDBMember struct {

// TiDBFailureMember is the tidb failure member information
type TiDBFailureMember struct {
PodName string `json:"podName,omitempty"`
PodName string `json:"podName,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// TiKVStatus is TiKV status
Expand Down Expand Up @@ -254,6 +256,7 @@ type TiKVStore struct {

// TiKVFailureStore is the tikv failure store information
type TiKVFailureStore struct {
PodName string `json:"podName,omitempty"`
StoreID string `json:"storeID,omitempty"`
PodName string `json:"podName,omitempty"`
StoreID string `json:"storeID,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}
9 changes: 6 additions & 3 deletions pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/manager/member/orphan_pods_cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -90,8 +91,10 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string

err = opc.podControl.DeletePod(tc, pod)
if err != nil {
glog.Errorf("orphan pods cleaner: failed to clean orphan pod: %s/%s, %v", ns, podName, err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I generally prefer to annotate errors: that way there is a single error data structure with all the information that can be logged in one spot, and if there is an API request sent back in the response.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, our error logs style need a lot of improvements. We may discuss it in a new issue?

This pr tries to add detailed logs when something changes to help us diagnose problems.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is up to you. I think either way you should get a detailed log to help diagnose problems. If there is concurrent logging annotation will be simpler to understand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is "logging annotation"?

return skipReason, err
}
glog.Infof("orphan pods cleaner: clean orphan pod: %s/%s successfully", ns, podName)
}

return skipReason, nil
Expand Down
9 changes: 9 additions & 0 deletions pkg/manager/member/pd_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import (
"strconv"
"time"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/client/clientset/versioned"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/pdapi"
"github.com/pingcap/tidb-operator/pkg/util"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
corelisters "k8s.io/client-go/listers/core/v1"
)

Expand Down Expand Up @@ -99,6 +101,7 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error {

func (pf *pdFailover) Recover(tc *v1alpha1.TidbCluster) {
tc.Status.PD.FailureMembers = nil
glog.Infof("pd failover: clearing pd failoverMembers, %s/%s", tc.GetNamespace(), tc.GetName())
}

func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
Expand Down Expand Up @@ -134,6 +137,7 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error {
MemberID: pdMember.ID,
PVCUID: pvc.UID,
MemberDeleted: false,
CreatedAt: metav1.Now(),
}
return controller.RequeueErrorf("marking Pod: %s/%s pd member: %s as failure", ns, podName, pdMember.Name)
}
Expand Down Expand Up @@ -165,8 +169,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error
// invoke deleteMember api to delete a member from the pd cluster
err = controller.GetPDClient(pf.pdControl, tc).DeleteMemberByID(memberID)
if err != nil {
glog.Errorf("pd failover: failed to delete member: %d, %v", memberID, err)
return err
}
glog.Infof("pd failover: delete member: %d successfully", memberID)

// The order of old PVC deleting and the new Pod creating is not guaranteed by Kubernetes.
// If new Pod is created before old PVC deleted, new Pod will reuse old PVC.
Expand Down Expand Up @@ -196,8 +202,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error
if pvc != nil && pvc.DeletionTimestamp == nil && pvc.GetUID() == failureMember.PVCUID {
err = pf.pvcControl.DeletePVC(tc, pvc)
if err != nil {
glog.Errorf("pd failover: failed to delete pvc: %s/%s, %v", ns, pvcName, err)
return err
}
glog.Infof("pd failover: pvc: %s/%s successfully", ns, pvcName)
}

setMemberDeleted(tc, failurePodName)
Expand All @@ -208,6 +216,7 @@ func setMemberDeleted(tc *v1alpha1.TidbCluster, podName string) {
failureMember := tc.Status.PD.FailureMembers[podName]
failureMember.MemberDeleted = true
tc.Status.PD.FailureMembers[podName] = failureMember
glog.Infof("pd failover: set pd member: %s/%s deleted", tc.GetName(), podName)
}

type fakePDFailover struct{}
Expand Down
8 changes: 5 additions & 3 deletions pkg/manager/member/pd_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,11 @@ func TestPDFailoverFailover(t *testing.T) {
expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) {
g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3))
g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(1))
g.Expect(tc.Status.PD.FailureMembers).To(Equal(map[string]v1alpha1.PDFailureMember{
"test-pd-1": {PodName: "test-pd-1", MemberID: "12891273174085095651", PVCUID: "pvc-1-uid", MemberDeleted: false},
}))
failureMembers := tc.Status.PD.FailureMembers["test-pd-1"]
g.Expect(failureMembers.PodName).To(Equal("test-pd-1"))
g.Expect(failureMembers.MemberID).To(Equal("12891273174085095651"))
g.Expect(string(failureMembers.PVCUID)).To(Equal("pvc-1-uid"))
g.Expect(failureMembers.MemberDeleted).To(BeFalse())
},
},
{
Expand Down
10 changes: 9 additions & 1 deletion pkg/manager/member/pd_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -102,9 +103,11 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet,

err := controller.GetPDClient(psd.pdControl, tc).DeleteMember(memberName)
if err != nil {
glog.Errorf("pd scale in: failed to delete member %s, %v", memberName, err)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("pd scale in: delete member %s successfully", memberName)

pvcName := ordinalPVCName(v1alpha1.PDMemberType, setName, ordinal)
pvc, err := psd.pvcLister.PersistentVolumeClaims(ns).Get(pvcName)
Expand All @@ -116,13 +119,18 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet,
if pvc.Annotations == nil {
pvc.Annotations = map[string]string{}
}
pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339)
now := time.Now().Format(time.RFC3339)
pvc.Annotations[label.AnnPVCDeferDeleting] = now

_, err = psd.pvcControl.UpdatePVC(tc, pvc)
if err != nil {
glog.Errorf("pd scale in: failed to set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("pd scale in: set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)

decreaseReplicas(newSet, oldSet)
return nil
Expand Down
9 changes: 3 additions & 6 deletions pkg/manager/member/pd_upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package member
import (
"fmt"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/pdapi"
Expand Down Expand Up @@ -44,12 +45,6 @@ func (pu *pdUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet
return pu.gracefulUpgrade(tc, oldSet, newSet)
}

func (pu *pdUpgrader) forceUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
tc.Status.PD.Phase = v1alpha1.UpgradePhase
setUpgradePartition(newSet, 0)
return nil
}

func (pu *pdUpgrader) gracefulUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
ns := tc.GetNamespace()
tcName := tc.GetName()
Expand Down Expand Up @@ -102,8 +97,10 @@ func (pu *pdUpgrader) upgradePDPod(tc *v1alpha1.TidbCluster, ordinal int32, newS
}
err := pu.transferPDLeaderTo(tc, targetName)
if err != nil {
glog.Errorf("pd upgrader: failed to transfer pd leader to: %s, %v", targetName, err)
return err
}
glog.Infof("pd upgrader: transfer pd leader to: %s successfully", targetName)
return controller.RequeueErrorf("tidbcluster: [%s/%s]'s pd member: [%s] is transferring leader to pd member: [%s]", ns, tcName, upgradePodName, targetName)
}

Expand Down
14 changes: 13 additions & 1 deletion pkg/manager/member/scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package member
import (
"fmt"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
Expand Down Expand Up @@ -70,17 +71,28 @@ func (gs *generalScaler) deleteDeferDeletingPVC(tc *v1alpha1.TidbCluster,
return skipReason, nil
}

return skipReason, gs.pvcControl.DeletePVC(tc, pvc)
err = gs.pvcControl.DeletePVC(tc, pvc)
if err != nil {
glog.Errorf("scale out: failed to delete pvc %s/%s, %v", ns, pvcName, err)
return skipReason, err
}
glog.Infof("scale out: delete pvc %s/%s successfully", ns, pvcName)

return skipReason, nil
}

func resetReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas
}
func increaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas + 1
glog.Infof("pd scale out: increase pd statefulset: %s/%s replicas to %d",
newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas)
}
func decreaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) {
*newSet.Spec.Replicas = *oldSet.Spec.Replicas - 1
glog.Infof("pd scale in: decrease pd statefulset: %s/%s replicas to %d",
newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas)
}

func ordinalPVCName(memberType v1alpha1.MemberType, setName string, ordinal int32) string {
Expand Down
7 changes: 6 additions & 1 deletion pkg/manager/member/tidb_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type tidbFailover struct {
Expand All @@ -40,6 +41,7 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
_, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name]
if exist && tidbMember.Health {
delete(tc.Status.TiDB.FailureMembers, tidbMember.Name)
glog.Infof("tidb failover: delete %s from tidb failoverMembers", tidbMember.Name)
}
}

Expand All @@ -51,7 +53,10 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error {
_, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name]
deadline := tidbMember.LastTransitionTime.Add(tf.tidbFailoverPeriod)
if !tidbMember.Health && time.Now().After(deadline) && !exist {
tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{PodName: tidbMember.Name}
tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{
PodName: tidbMember.Name,
CreatedAt: metav1.Now(),
}
break
}
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tidb_upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
apps "k8s.io/api/apps/v1beta1"
Expand Down Expand Up @@ -86,9 +87,11 @@ func (tdu *tidbUpgrader) upgradeTiDBPod(tc *v1alpha1.TidbCluster, ordinal int32,
if member, exist := tc.Status.TiDB.Members[tidbPodName(tcName, ordinal)]; exist && member.Health {
hasResign, err := tdu.tidbControl.ResignDDLOwner(tc, ordinal)
if (!hasResign || err != nil) && tc.Status.TiDB.ResignDDLOwnerRetryCount < MaxResignDDLOwnerCount {
glog.Errorf("tidb upgrader: failed to resign ddl owner to %s, %v", member.Name, err)
tc.Status.TiDB.ResignDDLOwnerRetryCount++
return err
}
glog.Infof("tidb upgrader: resign ddl owner to %s successfully", member.Name)
}
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/manager/member/tikv_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type tikvFailover struct {
Expand Down Expand Up @@ -47,8 +48,9 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error {
tc.Status.TiKV.FailureStores = map[string]v1alpha1.TiKVFailureStore{}
}
tc.Status.TiKV.FailureStores[storeID] = v1alpha1.TiKVFailureStore{
PodName: podName,
StoreID: store.ID,
PodName: podName,
StoreID: store.ID,
CreatedAt: metav1.Now(),
}
}
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/manager/member/tikv_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe
}
if state != v1alpha1.TiKVStateOffline {
if err := controller.GetPDClient(tsd.pdControl, tc).DeleteStore(id); err != nil {
glog.Errorf("tikv scale in: failed to delete store %d, %v", id, err)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("tikv scale in: delete store %d successfully", id)
}
resetReplicas(newSet, oldSet)
return controller.RequeueErrorf("TiKV %s/%s store %d still in cluster, state: %s", ns, podName, id, state)
Expand All @@ -116,12 +118,17 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe
if pvc.Annotations == nil {
pvc.Annotations = map[string]string{}
}
pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339)
now := time.Now().Format(time.RFC3339)
pvc.Annotations[label.AnnPVCDeferDeleting] = now
_, err = tsd.pvcControl.UpdatePVC(tc, pvc)
if err != nil {
glog.Errorf("tikv scale in: failed to set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)
resetReplicas(newSet, oldSet)
return err
}
glog.Infof("tikv scale in: set pvc %s/%s annotation: %s to %s",
ns, pvcName, label.AnnPVCDeferDeleting, now)

decreaseReplicas(newSet, oldSet)
return nil
Expand Down
Loading