Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mcs: fix the scatter error #7241

Merged
merged 5 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pkg/mcs/scheduling/server/grpc_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,11 @@ func (s *Service) ScatterRegions(ctx context.Context, request *schedulingpb.Scat

opsCount, failures, err := c.GetRegionScatterer().ScatterRegionsByID(request.GetRegionsId(), request.GetGroup(), int(request.GetRetryLimit()), request.GetSkipStoreLimit())
if err != nil {
return nil, err
header := s.errorHeader(&schedulingpb.Error{
Type: schedulingpb.ErrorType_UNKNOWN,
Message: err.Error(),
})
return &schedulingpb.ScatterRegionsResponse{Header: header}, nil
}
percentage := 100
if len(failures) > 0 {
Expand Down Expand Up @@ -243,7 +247,7 @@ func (s *Service) GetOperator(ctx context.Context, request *schedulingpb.GetOper
if r == nil {
header := s.errorHeader(&schedulingpb.Error{
Type: schedulingpb.ErrorType_UNKNOWN,
Message: "Not Found",
Message: "region not found",
})
return &schedulingpb.GetOperatorResponse{Header: header}, nil
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/schedule/scatter/region_scatterer.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ var (
scatterUnnecessaryCounter = scatterCounter.WithLabelValues("unnecessary", "")
scatterFailCounter = scatterCounter.WithLabelValues("fail", "")
scatterSuccessCounter = scatterCounter.WithLabelValues("success", "")
errRegionNotFound = errors.New("region not found")
errEmptyRegion = errors.New("empty region")
)

const (
Expand Down Expand Up @@ -165,7 +167,7 @@ func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group s
regions := r.cluster.ScanRegions(startKey, endKey, -1)
if len(regions) < 1 {
scatterSkipEmptyRegionCounter.Inc()
return 0, nil, errors.New("empty region")
return 0, nil, errEmptyRegion
}
failures := make(map[uint64]error, len(regions))
regionMap := make(map[uint64]*core.RegionInfo, len(regions))
Expand All @@ -184,7 +186,14 @@ func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group s
func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, retryLimit int, skipStoreLimit bool) (int, map[uint64]error, error) {
if len(regionsID) < 1 {
scatterSkipEmptyRegionCounter.Inc()
return 0, nil, errors.New("empty region")
return 0, nil, errEmptyRegion
}
if len(regionsID) == 1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's different from previous, do we need to check whether it is nil?

region := r.cluster.GetRegion(regionsID[0])
if region == nil {
scatterSkipNoRegionCounter.Inc()
return 0, nil, errRegionNotFound
}
}
failures := make(map[uint64]error, len(regionsID))
regions := make([]*core.RegionInfo, 0, len(regionsID))
Expand Down Expand Up @@ -219,7 +228,7 @@ func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, r
func (r *RegionScatterer) scatterRegions(regions map[uint64]*core.RegionInfo, failures map[uint64]error, group string, retryLimit int, skipStoreLimit bool) (int, error) {
if len(regions) < 1 {
scatterSkipEmptyRegionCounter.Inc()
return 0, errors.New("empty region")
return 0, errEmptyRegion
}
if retryLimit > maxRetryLimit {
retryLimit = maxRetryLimit
Expand Down
36 changes: 33 additions & 3 deletions server/grpc_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,10 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear
func (s *GrpcServer) updateSchedulingClient(ctx context.Context) (schedulingpb.SchedulingClient, error) {
forwardedHost, _ := s.GetServicePrimaryAddr(ctx, utils.SchedulingServiceName)
pre := s.schedulingClient.Load()
// 1. forwardedHost is not empty and pre is empty, update the schedulingClient
// 2. forwardedHost is not empty and forwardedHost is not equal to pre, update the schedulingClient
// 3. forwardedHost is not empty and forwardedHost is equal to pre, return pre
// 4. forwardedHost is empty, return nil
if forwardedHost != "" && ((pre == nil) || (pre != nil && forwardedHost != pre.(*schedulingClient).getPrimaryAddr())) {
client, err := s.getDelegateClient(ctx, forwardedHost)
if err != nil {
Expand All @@ -1045,6 +1049,8 @@ func (s *GrpcServer) updateSchedulingClient(ctx context.Context) (schedulingpb.S
}
s.schedulingClient.Store(forwardCli)
return forwardCli.getClient(), nil
} else if forwardedHost != "" && (pre != nil && forwardedHost == pre.(*schedulingClient).getPrimaryAddr()) {
return pre.(*schedulingClient).getClient(), nil
}
return nil, ErrNotFoundSchedulingAddr
}
Expand Down Expand Up @@ -1627,8 +1633,13 @@ func (s *GrpcServer) AskSplit(ctx context.Context, request *pdpb.AskSplitRequest
// AskBatchSplit implements gRPC PDServer.
func (s *GrpcServer) AskBatchSplit(ctx context.Context, request *pdpb.AskBatchSplitRequest) (*pdpb.AskBatchSplitResponse, error) {
if s.IsAPIServiceMode() {
s.updateSchedulingClient(ctx)
if s.schedulingClient.Load() != nil {
forwardCli, err := s.updateSchedulingClient(ctx)
if err != nil {
return &pdpb.AskBatchSplitResponse{
Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()),
}, nil
}
if forwardCli != nil {
req := &schedulingpb.AskBatchSplitRequest{
Header: &schedulingpb.RequestHeader{
ClusterId: request.GetHeader().GetClusterId(),
Expand Down Expand Up @@ -1802,7 +1813,14 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg
}, nil
}
if forwardCli != nil {
regionsID := request.GetRegionsId()
var regionsID []uint64
// nolint
if request.GetRegionId() != 0 {
// nolint
regionsID = []uint64{request.GetRegionId()}
} else {
regionsID = request.GetRegionsId()
}
Comment on lines +1816 to +1823
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why need nolint?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetRegionId is deprecated

if len(regionsID) == 0 {
return &pdpb.ScatterRegionResponse{
Header: s.invalidValue("regions id is required"),
Expand Down Expand Up @@ -2134,6 +2152,15 @@ func (s *GrpcServer) invalidValue(msg string) *pdpb.ResponseHeader {
func (s *GrpcServer) convertHeader(header *schedulingpb.ResponseHeader) *pdpb.ResponseHeader {
switch header.GetError().GetType() {
case schedulingpb.ErrorType_UNKNOWN:
if strings.Contains(header.GetError().GetMessage(), "region not found") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about using const value for "region not found"

return &pdpb.ResponseHeader{
ClusterId: header.GetClusterId(),
Error: &pdpb.Error{
Type: pdpb.ErrorType_REGION_NOT_FOUND,
Message: header.GetError().GetMessage(),
},
}
}
return &pdpb.ResponseHeader{
ClusterId: header.GetClusterId(),
Error: &pdpb.Error{
Expand Down Expand Up @@ -2502,6 +2529,9 @@ func forwardRegionHeartbeatClientToServer(forwardStream pdpb.PD_RegionHeartbeatC
}

func (s *GrpcServer) createSchedulingStream(client *grpc.ClientConn) (schedulingpb.Scheduling_RegionHeartbeatClient, context.CancelFunc, error) {
if client == nil {
return nil, nil, errors.New("connection is not set")
}
done := make(chan struct{})
ctx, cancel := context.WithCancel(s.ctx)
go grpcutil.CheckStream(ctx, cancel, done)
Expand Down