Skip to content

Commit

Permalink
set the default charset at startup
Browse files Browse the repository at this point in the history
I am now running tidb with the arguments:

    -character-set-server utf8mb4
    -collation-server utf8mb4_unicode_ci

My understaning is that this doesn't change the behavior of TiDB (it is
equivalent to the current configuration).
However, it allows TiDB to be compatible with standard MySQL setups.
Previously, I would see this failure message when using an application:

    unsupported modify column charset utf8 not match origin utf8mb4

Using the above command line flags, the application now works.
There is still more to do to have proper charset support,
but this is enough to fix my use case.
  • Loading branch information
gregwebs committed Sep 20, 2018
1 parent 334e925 commit 69e4a38
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 44 deletions.
2 changes: 1 addition & 1 deletion ast/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type testAstFormatSuite struct {
}

func getDefaultCharsetAndCollate() (string, string) {
return "utf8", "utf8_bin"
return mysql.DefaultCharset, mysql.DefaultCollationName
}

func (ts *testAstFormatSuite) TestAstFormat(c *C) {
Expand Down
5 changes: 5 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"time"

"github.com/BurntSushi/toml"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/util/logutil"
"github.com/pkg/errors"
tracing "github.com/uber/jaeger-client-go/config"
Expand Down Expand Up @@ -73,6 +74,8 @@ type Config struct {
TiKVClient TiKVClient `toml:"tikv-client" json:"tikv-client"`
Binlog Binlog `toml:"binlog" json:"binlog"`
CompatibleKillQuery bool `toml:"compatible-kill-query" json:"compatible-kill-query"`
CharacterSetServer string `toml:"character-set-server" json:"character-set-server"`
CollationServer string `toml:"collation-server" json:"collation-server"`
}

// Log is the log section of config.
Expand Down Expand Up @@ -320,6 +323,8 @@ var defaultConf = Config{
Binlog: Binlog{
WriteTimeout: "15s",
},
CharacterSetServer: mysql.DefaultCharset,
CollationServer: mysql.DefaultCollationName,
}

var globalConf = defaultConf
Expand Down
2 changes: 1 addition & 1 deletion ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func getDefaultCharsetAndCollate() (string, string) {
// TODO: TableDefaultCharset-->DatabaseDefaultCharset-->SystemDefaultCharset.
// TODO: Change TableOption parser to parse collate.
// This is a tmp solution.
return "utf8", "utf8_bin"
return mysql.DefaultCharset, mysql.DefaultCollationName
}

func setColumnFlagWithConstraint(colMap map[string]*table.Column, v *ast.Constraint) {
Expand Down
4 changes: 2 additions & 2 deletions executor/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,11 @@ func (e *DDLExec) executeDropDatabase(s *ast.DropDatabaseStmt) error {
sessionVars := e.ctx.GetSessionVars()
if err == nil && strings.ToLower(sessionVars.CurrentDB) == dbName.L {
sessionVars.CurrentDB = ""
err = variable.SetSessionSystemVar(sessionVars, variable.CharsetDatabase, types.NewStringDatum("utf8"))
err = variable.SetSessionSystemVar(sessionVars, variable.CharsetDatabase, types.NewStringDatum(mysql.DefaultCharset))
if err != nil {
return errors.Trace(err)
}
err = variable.SetSessionSystemVar(sessionVars, variable.CollationDatabase, types.NewStringDatum("utf8_unicode_ci"))
err = variable.SetSessionSystemVar(sessionVars, variable.CollationDatabase, types.NewStringDatum(mysql.DefaultCollationName))
if err != nil {
return errors.Trace(err)
}
Expand Down
39 changes: 36 additions & 3 deletions mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

package mysql

import "unicode"
import (
"unicode"

"github.com/pkg/errors"
)

// CharsetIDs maps charset name to its default collation ID.
var CharsetIDs = map[string]uint8{
Expand Down Expand Up @@ -553,18 +557,47 @@ var CollationNames = map[string]uint8{
const (
UTF8Charset = "utf8"
UTF8MB4Charset = "utf8mb4"
DefaultCharset = UTF8Charset
DefaultCollationID = 83
BinaryCollationID = 63
UTF8DefaultCollation = "utf8_bin"
DefaultCollationName = UTF8DefaultCollation

// MaxBytesOfCharacter, is the max bytes length of a character,
// refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range
// (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets.
MaxBytesOfCharacter = 4
)

var (
// DefaultCharset is the Default collation for the server
// This can be changes in the server configuration
DefaultCharset = UTF8Charset
// DefaultCollationName is the Default collation for the server
// This can be changes in the server configuration
DefaultCollationName = UTF8DefaultCollation
)

// ValidateCharsetCollation checks if this charset and collation are accepted by TiDB.
func ValidateCharsetCollation(charset string, collation string) error {
if _, ok := Charsets[charset]; !ok {
return errors.Errorf("not a valid charset: %s", charset)
}
if _, ok := CollationNames[collation]; !ok {
return errors.Errorf("not a valid collation: %s", collation)
}
return nil
}

// SetDefaultCharsetCollation updates the default charset for the system.
// This should only be changed at startup.
func SetDefaultCharsetCollation(charset string, collation string) error {
if err := ValidateCharsetCollation(charset, collation); err != nil {
return err
}
DefaultCharset = charset
DefaultCollationName = collation
return nil
}

// IsUTF8Charset checks if charset is utf8 or utf8mb4
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset
Expand Down
4 changes: 2 additions & 2 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ var defaultSysVars = []*SysVar{
{ScopeGlobal, "innodb_max_undo_log_size", ""},
{ScopeGlobal | ScopeSession, "range_alloc_block_size", "4096"},
{ScopeGlobal, ConnectTimeout, "10"},
{ScopeGlobal | ScopeSession, "collation_server", charset.CollationUTF8},
{ScopeGlobal | ScopeSession, "collation_server", mysql.DefaultCollationName},
{ScopeNone, "have_rtree_keys", "YES"},
{ScopeGlobal, "innodb_old_blocks_pct", "37"},
{ScopeGlobal, "innodb_file_format", "Antelope"},
Expand Down Expand Up @@ -248,7 +248,7 @@ var defaultSysVars = []*SysVar{
{ScopeNone, "innodb_buffer_pool_instances", "8"},
{ScopeGlobal | ScopeSession, "block_encryption_mode", "aes-128-ecb"},
{ScopeGlobal | ScopeSession, "max_length_for_sort_data", "1024"},
{ScopeNone, "character_set_system", "utf8"},
{ScopeNone, "character_set_system", mysql.DefaultCharset},
{ScopeGlobal | ScopeSession, "interactive_timeout", "28800"},
{ScopeGlobal, "innodb_optimize_fulltext_only", "OFF"},
{ScopeNone, "character_sets_dir", "/usr/local/mysql-5.6.25-osx10.8-x86_64/share/charsets/"},
Expand Down
3 changes: 2 additions & 1 deletion table/tables/gen_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ import (

"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/parser"
"github.com/pkg/errors"
)

// getDefaultCharsetAndCollate is copyed from ddl/ddl_api.go.
func getDefaultCharsetAndCollate() (string, string) {
return "utf8", "utf8_bin"
return mysql.DefaultCharset, mysql.DefaultCollationName
}

// nameResolver is the visitor to resolve table name and column name.
Expand Down
86 changes: 53 additions & 33 deletions tidb-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/plan"
"github.com/pingcap/tidb/privilege/privileges"
"github.com/pingcap/tidb/server"
Expand All @@ -57,25 +58,27 @@ import (

// Flag Names
const (
nmVersion = "V"
nmConfig = "config"
nmStore = "store"
nmStorePath = "path"
nmHost = "host"
nmAdvertiseAddress = "advertise-address"
nmPort = "P"
nmSocket = "socket"
nmBinlogSocket = "binlog-socket"
nmRunDDL = "run-ddl"
nmLogLevel = "L"
nmLogFile = "log-file"
nmLogSlowQuery = "log-slow-query"
nmReportStatus = "report-status"
nmStatusPort = "status"
nmMetricsAddr = "metrics-addr"
nmMetricsInterval = "metrics-interval"
nmDdlLease = "lease"
nmTokenLimit = "token-limit"
nmVersion = "V"
nmConfig = "config"
nmStore = "store"
nmStorePath = "path"
nmHost = "host"
nmAdvertiseAddress = "advertise-address"
nmPort = "P"
nmSocket = "socket"
nmBinlogSocket = "binlog-socket"
nmRunDDL = "run-ddl"
nmLogLevel = "L"
nmLogFile = "log-file"
nmLogSlowQuery = "log-slow-query"
nmReportStatus = "report-status"
nmStatusPort = "status"
nmMetricsAddr = "metrics-addr"
nmMetricsInterval = "metrics-interval"
nmDdlLease = "lease"
nmTokenLimit = "token-limit"
nmCharacterSetServer = "character-set-server"
nmCollationServer = "collation-server"

nmProxyProtocolNetworks = "proxy-protocol-networks"
nmProxyProtocolHeaderTimeout = "proxy-protocol-header-timeout"
Expand All @@ -86,16 +89,18 @@ var (
configPath = flag.String(nmConfig, "", "config file path")

// Base
store = flag.String(nmStore, "mocktikv", "registered store name, [tikv, mocktikv]")
storePath = flag.String(nmStorePath, "/tmp/tidb", "tidb storage path")
host = flag.String(nmHost, "0.0.0.0", "tidb server host")
advertiseAddress = flag.String(nmAdvertiseAddress, "", "tidb server advertise IP")
port = flag.String(nmPort, "4000", "tidb server port")
socket = flag.String(nmSocket, "", "The socket file to use for connection.")
binlogSocket = flag.String(nmBinlogSocket, "", "socket file to write binlog")
runDDL = flagBoolean(nmRunDDL, true, "run ddl worker on this tidb-server")
ddlLease = flag.String(nmDdlLease, "45s", "schema lease duration, very dangerous to change only if you know what you do")
tokenLimit = flag.Int(nmTokenLimit, 1000, "the limit of concurrent executed sessions")
store = flag.String(nmStore, "mocktikv", "registered store name, [tikv, mocktikv]")
storePath = flag.String(nmStorePath, "/tmp/tidb", "tidb storage path")
host = flag.String(nmHost, "0.0.0.0", "tidb server host")
advertiseAddress = flag.String(nmAdvertiseAddress, "", "tidb server advertise IP")
port = flag.String(nmPort, "4000", "tidb server port")
socket = flag.String(nmSocket, "", "The socket file to use for connection.")
binlogSocket = flag.String(nmBinlogSocket, "", "socket file to write binlog")
runDDL = flagBoolean(nmRunDDL, true, "run ddl worker on this tidb-server")
ddlLease = flag.String(nmDdlLease, "45s", "schema lease duration, very dangerous to change only if you know what you do")
tokenLimit = flag.Int(nmTokenLimit, 1000, "the limit of concurrent executed sessions")
characterSetServer = flag.String(nmCharacterSetServer, mysql.DefaultCharset, "the default character set")
collationServer = flag.String(nmCollationServer, mysql.DefaultCollationName, "the default character collation")

// Log
logLevel = flag.String(nmLogLevel, "info", "log level: info, debug, warn, error, fatal")
Expand Down Expand Up @@ -128,12 +133,17 @@ func main() {
fmt.Println(printer.GetTiDBInfo())
os.Exit(0)
}
var err error
registerStores()
registerMetrics()
loadConfig()
overrideConfig()
validateConfig()
setGlobalVars()
if err = validateConfig(); err != nil {
terror.MustNil(err)
}
if err = setGlobalVars(); err != nil {
terror.MustNil(err)
}
setupLog()
setupTracing() // Should before createServer and after setup config.
printInfo()
Expand Down Expand Up @@ -299,6 +309,12 @@ func overrideConfig() {
if actualFlags[nmTokenLimit] {
cfg.TokenLimit = uint(*tokenLimit)
}
if actualFlags[nmCharacterSetServer] {
cfg.CharacterSetServer = *characterSetServer
}
if actualFlags[nmCollationServer] {
cfg.CollationServer = *collationServer
}

// Log
if actualFlags[nmLogLevel] {
Expand Down Expand Up @@ -337,7 +353,7 @@ func overrideConfig() {
}
}

func validateConfig() {
func validateConfig() error {
if cfg.Security.SkipGrantTable && !hasRootPrivilege() {
log.Error("TiDB run with skip-grant-table need root privilege.")
os.Exit(-1)
Expand Down Expand Up @@ -371,9 +387,11 @@ func validateConfig() {
log.Errorf("lower-case-table-names should be 0 or 1 or 2.")
os.Exit(-1)
}

return mysql.ValidateCharsetCollation(cfg.CharacterSetServer, cfg.CollationServer)
}

func setGlobalVars() {
func setGlobalVars() error {
ddlLeaseDuration := parseDuration(cfg.Lease)
session.SetSchemaLease(ddlLeaseDuration)
runtime.GOMAXPROCS(int(cfg.Performance.MaxProcs))
Expand Down Expand Up @@ -403,6 +421,8 @@ func setGlobalVars() {
tikv.GrpcKeepAliveTimeout = time.Duration(cfg.TiKVClient.GrpcKeepAliveTimeout) * time.Second

tikv.CommitMaxBackoff = int(parseDuration(cfg.TiKVClient.CommitTimeout).Seconds() * 1000)

return mysql.SetDefaultCharsetCollation(cfg.CharacterSetServer, cfg.CollationServer)
}

func setupLog() {
Expand Down
2 changes: 1 addition & 1 deletion util/charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func GetAllCharsets() []*Desc {
func ValidCharsetAndCollation(cs string, co string) bool {
// We will use utf8 as a default charset.
if cs == "" {
cs = "utf8"
cs = mysql.DefaultCharset
}
cs = strings.ToLower(cs)
c, ok := charsets[cs]
Expand Down

0 comments on commit 69e4a38

Please sign in to comment.