From acf3e28ab136bf63cd431815ebc7c3492fafe7fa Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 21 May 2019 18:04:45 +0800 Subject: [PATCH] fix operator timeout metrics (#1541) Signed-off-by: Ryan Leung --- server/cluster.go | 2 +- server/schedule/operator.go | 1 - server/schedule/operator_controller.go | 10 +++++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/server/cluster.go b/server/cluster.go index 0784d4f7032..0ea8baaf2ef 100644 --- a/server/cluster.go +++ b/server/cluster.go @@ -601,7 +601,7 @@ func (c *RaftCluster) checkOperators() { log.Info("operator timeout", zap.Uint64("region-id", op.RegionID()), zap.Stringer("operator", op)) - opController.RemoveOperator(op) + opController.RemoveTimeoutOperator(op) } } } diff --git a/server/schedule/operator.go b/server/schedule/operator.go index 3b1ed809662..00547159412 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -370,7 +370,6 @@ func (o *Operator) IsTimeout() bool { timeout = time.Since(o.createTime) > LeaderOperatorWaitTime } if timeout { - operatorCounter.WithLabelValues(o.Desc(), "timeout").Inc() return true } return false diff --git a/server/schedule/operator_controller.go b/server/schedule/operator_controller.go index adb78be4f20..21376701bf2 100644 --- a/server/schedule/operator_controller.go +++ b/server/schedule/operator_controller.go @@ -93,7 +93,7 @@ func (oc *OperatorController) Dispatch(region *core.RegionInfo, source string) { oc.RemoveOperator(op) } else if timeout { log.Info("operator timeout", zap.Uint64("region-id", region.GetID()), zap.Reflect("operator", op)) - oc.RemoveOperator(op) + oc.RemoveTimeoutOperator(op) oc.opRecords.Put(op, pdpb.OperatorStatus_TIMEOUT) } } @@ -238,6 +238,14 @@ func (oc *OperatorController) RemoveOperator(op *Operator) { oc.removeOperatorLocked(op) } +// RemoveTimeoutOperator removes a operator which is timeout from the running operators. +func (oc *OperatorController) RemoveTimeoutOperator(op *Operator) { + oc.Lock() + defer oc.Unlock() + operatorCounter.WithLabelValues(op.Desc(), "timeout").Inc() + oc.removeOperatorLocked(op) +} + // GetOperatorStatus gets the operator and its status with the specify id. func (oc *OperatorController) GetOperatorStatus(id uint64) *OperatorWithStatus { oc.Lock()