Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: stop the server when writing binlog failed (#15324) #15339

Merged
merged 3 commits into from
Mar 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions metrics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ var (
EventStart = "start"
EventGracefulDown = "graceful_shutdown"
// Eventkill occurs when the server.Kill() function is called.
EventKill = "kill"
// EventHang occurs when server meet some critical error. It will close the listening port and hang for ever.
EventHang = "hang"
EventKill = "kill"
EventClose = "close"
ServerEventCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Expand Down
7 changes: 1 addition & 6 deletions server/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,13 +676,8 @@ func (cc *clientConn) Run(ctx context.Context) {
logutil.Logger(ctx).Error("result undetermined, close this connection", zap.Error(err))
return
} else if terror.ErrCritical.Equal(err) {
logutil.Logger(ctx).Error("critical error, stop the server listener", zap.Error(err))
metrics.CriticalErrorCounter.Add(1)
select {
case cc.server.stopListenerCh <- struct{}{}:
default:
}
return
logutil.Logger(ctx).Fatal("critical error, stop the server", zap.Error(err))
}
logutil.Logger(ctx).Warn("dispatch error",
zap.String("connInfo", cc.String()),
Expand Down
30 changes: 1 addition & 29 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,7 @@ type Server struct {
concurrentLimiter *TokenLimiter
clients map[uint32]*clientConn
capability uint32

// stopListenerCh is used when a critical error occurred, we don't want to exit the process, because there may be
// a supervisor automatically restart it, then new client connection will be created, but we can't server it.
// So we just stop the listener and store to force clients to chose other TiDB servers.
stopListenerCh chan struct{}
statusServer *http.Server
statusServer *http.Server
}

// ConnectionCount gets current connection count.
Expand Down Expand Up @@ -200,7 +195,6 @@ func NewServer(cfg *config.Config, driver IDriver) (*Server, error) {
driver: driver,
concurrentLimiter: NewTokenLimiter(cfg.TokenLimit),
clients: make(map[uint32]*clientConn),
stopListenerCh: make(chan struct{}, 1),
}

tlsConfig, err := util.LoadTLSCertificates(s.cfg.Security.SSLCA, s.cfg.Security.SSLKey, s.cfg.Security.SSLCert)
Expand Down Expand Up @@ -293,11 +287,6 @@ func (s *Server) Run() error {
logutil.Logger(context.Background()).Error("accept failed", zap.Error(err))
return errors.Trace(err)
}
if s.shouldStopListener() {
err = conn.Close()
terror.Log(errors.Trace(err))
break
}

clientConn := s.newConn(conn)

Expand Down Expand Up @@ -325,23 +314,6 @@ func (s *Server) Run() error {

go s.onConn(clientConn)
}
err := s.listener.Close()
terror.Log(errors.Trace(err))
s.listener = nil
for {
metrics.ServerEventCounter.WithLabelValues(metrics.EventHang).Inc()
logutil.Logger(context.Background()).Error("listener stopped, waiting for manual kill.")
time.Sleep(time.Minute)
}
}

func (s *Server) shouldStopListener() bool {
select {
case <-s.stopListenerCh:
return true
default:
return false
}
}

// Close closes the server.
Expand Down