Skip to content

Commit

Permalink
*: fix utf8 charset upgrade compatibility (pingcap#9820)
Browse files Browse the repository at this point in the history
  • Loading branch information
crazycs520 committed Mar 25, 2019
1 parent 49c5972 commit a9f6a0e
Show file tree
Hide file tree
Showing 19 changed files with 474 additions and 50 deletions.
36 changes: 20 additions & 16 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ type Config struct {
TiKVClient TiKVClient `toml:"tikv-client" json:"tikv-client"`
Binlog Binlog `toml:"binlog" json:"binlog"`
CompatibleKillQuery bool `toml:"compatible-kill-query" json:"compatible-kill-query"`
CheckMb4ValueInUtf8 bool `toml:"check-mb4-value-in-utf8" json:"check-mb4-value-in-utf8"`
CheckMb4ValueInUTF8 bool `toml:"check-mb4-value-in-utf8" json:"check-mb4-value-in-utf8"`
// TreatOldVersionUTF8AsUTF8MB4 is use to treat old version table/column UTF8 charset as UTF8MB4. This is for compatibility.
// Currently not support dynamic modify, because this need to reload all old version schema.
TreatOldVersionUTF8AsUTF8MB4 bool `toml:"treat-old-version-utf8-as-utf8mb4" json:"treat-old-version-utf8-as-utf8mb4"`
}

// Log is the log section of config.
Expand Down Expand Up @@ -252,22 +255,23 @@ type Binlog struct {
}

var defaultConf = Config{
Host: "0.0.0.0",
AdvertiseAddress: "",
Port: 4000,
Store: "mocktikv",
Path: "/tmp/tidb",
RunDDL: true,
SplitTable: true,
Lease: "45s",
TokenLimit: 1000,
OOMAction: "log",
MemQuotaQuery: 32 << 30,
EnableStreaming: false,
CheckMb4ValueInUtf8: true,
Host: "0.0.0.0",
AdvertiseAddress: "",
Port: 4000,
Store: "mocktikv",
Path: "/tmp/tidb",
RunDDL: true,
SplitTable: true,
Lease: "45s",
TokenLimit: 1000,
OOMAction: "log",
MemQuotaQuery: 32 << 30,
EnableStreaming: false,
CheckMb4ValueInUTF8: true,
TreatOldVersionUTF8AsUTF8MB4: true,
TxnLocalLatches: TxnLocalLatches{
Enabled: false,
Capacity: 10240000,
Enabled: true,
Capacity: 2048000,
},
LowerCaseTableNames: 2,
Log: Log{
Expand Down
3 changes: 3 additions & 0 deletions config/config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ compatible-kill-query = false
# check mb4 value in utf8 is used to control whether to check the mb4 characters when the charset is utf8.
check-mb4-value-in-utf8 = true

# treat-old-version-utf8-as-utf8mb4 use for upgrade compatibility. Set to true will treat old version table/column UTF8 charset as UTF8MB4.
treat-old-version-utf8-as-utf8mb4 = true

[log]
# Log level: debug, info, warn, error, fatal.
level = "info"
Expand Down
2 changes: 1 addition & 1 deletion config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (s *testConfigSuite) TestConfig(c *C) {
conf.Binlog.IgnoreError = true
conf.Binlog.BinlogSocket = "/tmp/socket"
conf.TiKVClient.CommitTimeout = "10s"
conf.CheckMb4ValueInUtf8 = true
conf.CheckMb4ValueInUTF8 = true
configFile := "config.toml"
_, localFile, _, _ := runtime.Caller(0)
configFile = path.Join(path.Dir(localFile), configFile)
Expand Down
4 changes: 0 additions & 4 deletions ddl/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,10 +472,6 @@ func generateOriginDefaultValue(col *model.ColumnInfo) (interface{}, error) {
if odValue == strings.ToUpper(ast.CurrentTimestamp) {
if col.Tp == mysql.TypeTimestamp {
odValue = time.Now().UTC().Format(types.TimeFormat)
// Version = 1: For OriginDefaultValue and DefaultValue of timestamp column will stores the default time in UTC time zone.
// This will fix bug in version 0.
// TODO: remove this version field after there is no old version 0.
col.Version = model.ColumnInfoVersion1
} else if col.Tp == mysql.TypeDatetime {
odValue = time.Now().Format(types.TimeFormat)
}
Expand Down
388 changes: 388 additions & 0 deletions ddl/db_integration_test.go

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,6 @@ func convertTimestampDefaultValToUTC(ctx sessionctx.Context, defaultVal interfac
return defaultVal, errors.Trace(err)
}
defaultVal = t.String()
// Version = 1: For OriginDefaultValue and DefaultValue of timestamp column will stores the default time in UTC time zone.
// This will fix bug in version 0.
// TODO: remove this version field after there is no old version 0.
col.Version = model.ColumnInfoVersion1
}
}
return defaultVal, nil
Expand All @@ -356,6 +352,8 @@ func columnDefToCol(ctx sessionctx.Context, offset int, colDef *ast.ColumnDef, o
Offset: offset,
Name: colDef.Name.Name,
FieldType: *colDef.Tp,
// TODO: remove this version field after there is no old version.
Version: model.CurrLatestColumnInfoVersion,
})

if !isExplicitTimeStamp() {
Expand Down Expand Up @@ -994,6 +992,7 @@ func (d *ddl) CreateTable(ctx sessionctx.Context, s *ast.CreateTableStmt) (err e
}
if err = checkDuplicateColumn(colDefs); err != nil {
return errors.Trace(err)

}
if err = checkGeneratedColumn(colDefs); err != nil {
return errors.Trace(err)
Expand Down Expand Up @@ -1866,6 +1865,7 @@ func (d *ddl) getModifiableColumnJob(ctx sessionctx.Context, ident ast.Ident, or
OriginDefaultValue: col.OriginDefaultValue,
FieldType: *specNewColumn.Tp,
Name: newColName,
Version: col.Version,
})

// TODO: Remove it when all table versions are greater than or equal to TableInfoVersion1.
Expand Down Expand Up @@ -2082,8 +2082,9 @@ func (d *ddl) AlterTableCharsetAndCollate(ctx sessionctx.Context, ident ast.Iden
return errors.Trace(err)
}
}

if origCharset == toCharset && origCollate == toCollate {
// Old version schema charset maybe modified when load schema if TreatOldVersionUTF8AsUTF8MB4 was enable.
// So even if the origCharset equal toCharset, we still need to do the ddl for old version schema.
if origCharset == toCharset && origCollate == toCollate && tb.Meta().Version >= model.TableInfoVersion2 {
// nothing to do.
return nil
}
Expand Down
6 changes: 6 additions & 0 deletions domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ func (do *Domain) fetchSchemasWithTables(schemas []*model.DBInfo, m *meta.Meta,
done <- err
return
}
// If TreatOldVersionUTF8AsUTF8MB4 was enable, need to convert the old version schema UTF8 charset to UTF8MB4.
if config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 {
for _, tbInfo := range tables {
infoschema.ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo)
}
}
di.Tables = make([]*model.TableInfo, 0, len(tables))
for _, tbl := range tables {
if tbl.State != model.StatePublic {
Expand Down
4 changes: 2 additions & 2 deletions executor/statement_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ func (s *testSuite) TestStatementContext(c *C) {
_, err = tk.Exec("insert t1 values (unhex('F0A48BAE'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
config.GetGlobalConfig().CheckMb4ValueInUtf8 = false
config.GetGlobalConfig().CheckMb4ValueInUTF8 = false
tk.MustExec("insert t1 values (unhex('f09f8c80'))")
config.GetGlobalConfig().CheckMb4ValueInUtf8 = true
config.GetGlobalConfig().CheckMb4ValueInUTF8 = true
_, err = tk.Exec("insert t1 values (unhex('F0A48BAE'))")
c.Assert(err, NotNil)
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,4 @@ require (
gopkg.in/yaml.v2 v2.2.1 // indirect
)

replace github.com/pingcap/parser => github.com/lysu/parser v0.0.0-20190325074808-d880cf39390b
replace github.com/pingcap/parser => github.com/crazycs520/parser v0.0.0-20190325101044-851e66e9b3be
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ github.com/coreos/go-systemd v0.0.0-20180202092358-40e2722dffea h1:IHPWgevPcOUjT
github.com/coreos/go-systemd v0.0.0-20180202092358-40e2722dffea/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf h1:CAKfRE2YtTUIjjh1bkBtyYFaUT/WmOqsJjgtihT0vMI=
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/crazycs520/parser v0.0.0-20190325101044-851e66e9b3be h1:iu84jJ3Pvv0uNyQIFuLxeJnOcsQUMWeHrd3A6twtHvo=
github.com/crazycs520/parser v0.0.0-20190325101044-851e66e9b3be/go.mod h1:xLjI+gnWYexq011WPMEvCNS8rFM9qe1vdojIEzSKPuc=
github.com/cznic/mathutil v0.0.0-20160613104831-78ad7f262603 h1:hhR9hTi0ligs11JjfGDBP332clNOJRdW0Ci5oHtEC+0=
github.com/cznic/mathutil v0.0.0-20160613104831-78ad7f262603/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM=
github.com/cznic/sortutil v0.0.0-20150617083342-4c7342852e65 h1:hxuZop6tSoOi0sxFzoGGYdRqNrPubyaIf9KoBG9tPiE=
Expand Down
21 changes: 21 additions & 0 deletions infoschema/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
"sort"

"github.com/pingcap/errors"
"github.com/pingcap/parser/charset"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/meta"
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/perfschema"
Expand Down Expand Up @@ -171,6 +173,8 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i
fmt.Sprintf("(Table ID %d)", tableID),
)
}
ConvertOldVersionUTF8ToUTF8MB4IfNeed(tblInfo)

if alloc == nil {
schemaID := dbInfo.ID
alloc = autoid.NewAllocator(b.handle.store, tblInfo.GetDBID(schemaID), tblInfo.IsAutoIncColUnsigned())
Expand All @@ -194,6 +198,23 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i
return nil
}

// ConvertOldVersionUTF8ToUTF8MB4IfNeed convert old version UTF8 to UTF8MB4 if config.TreatOldVersionUTF8AsUTF8MB4 is enable.
func ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo *model.TableInfo) {
if !config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 || tbInfo.Version >= model.TableInfoVersion2 {
return
}
if tbInfo.Charset == charset.CharsetUTF8 {
tbInfo.Charset = charset.CharsetUTF8MB4
tbInfo.Collate = charset.CollationUTF8MB4
}
for _, col := range tbInfo.Columns {
if col.Version < model.ColumnInfoVersion2 && col.Charset == charset.CharsetUTF8 {
col.Charset = charset.CharsetUTF8MB4
col.Collate = charset.CollationUTF8MB4
}
}
}

func (b *Builder) applyDropTable(dbInfo *model.DBInfo, tableID int64) {
bucketIdx := tableBucketIdx(tableID)
sortedTbls := b.is.sortedTablesBuckets[bucketIdx]
Expand Down
4 changes: 2 additions & 2 deletions server/http_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,9 @@ func (h settingsHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if checkMb4ValueInUtf8 := req.Form.Get("check_mb4_value_in_utf8"); checkMb4ValueInUtf8 != "" {
switch checkMb4ValueInUtf8 {
case "0":
config.GetGlobalConfig().CheckMb4ValueInUtf8 = false
config.GetGlobalConfig().CheckMb4ValueInUTF8 = false
case "1":
config.GetGlobalConfig().CheckMb4ValueInUtf8 = true
config.GetGlobalConfig().CheckMb4ValueInUTF8 = true
default:
writeError(w, errors.New("illegal argument"))
return
Expand Down
6 changes: 3 additions & 3 deletions server/http_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,20 +656,20 @@ func (ts *HTTPHandlerTestSuite) TestPostSettings(c *C) {
resp, err = http.PostForm("http://127.0.0.1:10090/settings", form)
c.Assert(err, IsNil)
c.Assert(resp.StatusCode, Equals, http.StatusOK)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUtf8, Equals, true)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUTF8, Equals, true)
txn1, err := dbt.db.Begin()
c.Assert(err, IsNil)
_, err = txn1.Exec("insert t2 values (unhex('F0A48BAE'));")
c.Assert(err, NotNil)
txn1.Commit()

// Disable CheckMb4ValueInUtf8.
// Disable CheckMb4ValueInUTF8.
form = make(url.Values)
form.Set("check_mb4_value_in_utf8", "0")
resp, err = http.PostForm("http://127.0.0.1:10090/settings", form)
c.Assert(err, IsNil)
c.Assert(resp.StatusCode, Equals, http.StatusOK)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUtf8, Equals, false)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUTF8, Equals, false)
dbt.mustExec("insert t2 values (unhex('f09f8c80'));")
}

Expand Down
4 changes: 2 additions & 2 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,8 @@ func (s *SessionVars) SetSystemVar(name string, val string) error {
s.setDDLReorgPriority(val)
case TiDBForcePriority:
atomic.StoreInt32(&ForcePriority, int32(mysql.Str2Priority(val)))
case TiDBCheckMb4ValueInUtf8:
config.GetGlobalConfig().CheckMb4ValueInUtf8 = TiDBOptOn(val)
case TiDBCheckMb4ValueInUTF8:
config.GetGlobalConfig().CheckMb4ValueInUTF8 = TiDBOptOn(val)
}
s.systems[name] = val
return nil
Expand Down
2 changes: 1 addition & 1 deletion sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ var defaultSysVars = []*SysVar{
{ScopeGlobal, TiDBDDLReorgBatchSize, strconv.Itoa(DefTiDBDDLReorgBatchSize)},
{ScopeSession, TiDBDDLReorgPriority, "PRIORITY_LOW"},
{ScopeSession, TiDBForcePriority, mysql.Priority2Str[DefTiDBForcePriority]},
{ScopeSession, TiDBCheckMb4ValueInUtf8, BoolToIntStr(config.GetGlobalConfig().CheckMb4ValueInUtf8)},
{ScopeSession, TiDBCheckMb4ValueInUTF8, BoolToIntStr(config.GetGlobalConfig().CheckMb4ValueInUTF8)},
}

// SynonymsSysVariables is synonyms of system variables.
Expand Down
4 changes: 2 additions & 2 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ const (
// tidb_enable_table_partition is used to enable table partition feature.
TiDBEnableTablePartition = "tidb_enable_table_partition"

// TiDBCheckMb4ValueInUtf8 is used to control whether to enable the check wrong utf8 value.
TiDBCheckMb4ValueInUtf8 = "tidb_check_mb4_value_in_utf8"
// TiDBCheckMb4ValueInUTF8 is used to control whether to enable the check wrong utf8 value.
TiDBCheckMb4ValueInUTF8 = "tidb_check_mb4_value_in_utf8"
)

// TiDB system variable names that both in session and global scope.
Expand Down
6 changes: 3 additions & 3 deletions sessionctx/variable/varsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ func GetSessionOnlySysVars(s *SessionVars, key string) (string, bool, error) {
return strconv.FormatUint(atomic.LoadUint64(&config.GetGlobalConfig().Log.SlowThreshold), 10), true, nil
case TiDBQueryLogMaxLen:
return strconv.FormatUint(atomic.LoadUint64(&config.GetGlobalConfig().Log.QueryLogMaxLen), 10), true, nil
case TiDBCheckMb4ValueInUtf8:
return BoolToIntStr(config.GetGlobalConfig().CheckMb4ValueInUtf8), true, nil
case TiDBCheckMb4ValueInUTF8:
return BoolToIntStr(config.GetGlobalConfig().CheckMb4ValueInUTF8), true, nil
}
sVal, ok := s.systems[key]
if ok {
Expand Down Expand Up @@ -322,7 +322,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string,
case AutocommitVar, TiDBSkipUTF8Check, TiDBOptAggPushDown,
TiDBOptInSubqUnFolding, TiDBEnableTablePartition,
TiDBBatchInsert, TiDBDisableTxnAutoRetry, TiDBEnableStreaming,
TiDBBatchDelete, TiDBCheckMb4ValueInUtf8:
TiDBBatchDelete, TiDBCheckMb4ValueInUTF8:
if strings.EqualFold(value, "ON") || value == "1" || strings.EqualFold(value, "OFF") || value == "0" {
return value, nil
}
Expand Down
14 changes: 8 additions & 6 deletions sessionctx/variable/varsutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,16 @@ func (s *testVarsutilSuite) TestVarsutil(c *C) {
c.Assert(val, Equals, "1")
c.Assert(v.EnableTablePartition, IsTrue)

SetSessionSystemVar(v, TiDBCheckMb4ValueInUtf8, types.NewStringDatum("1"))
val, err = GetSessionSystemVar(v, TiDBCheckMb4ValueInUtf8)
err = SetSessionSystemVar(v, TiDBCheckMb4ValueInUTF8, types.NewStringDatum("1"))
c.Assert(err, IsNil)
val, err = GetSessionSystemVar(v, TiDBCheckMb4ValueInUTF8)
c.Assert(err, IsNil)
c.Assert(val, Equals, "1")
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUtf8, Equals, true)
SetSessionSystemVar(v, TiDBCheckMb4ValueInUtf8, types.NewStringDatum("0"))
val, err = GetSessionSystemVar(v, TiDBCheckMb4ValueInUtf8)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUTF8, Equals, true)
err = SetSessionSystemVar(v, TiDBCheckMb4ValueInUTF8, types.NewStringDatum("0"))
c.Assert(err, IsNil)
val, err = GetSessionSystemVar(v, TiDBCheckMb4ValueInUTF8)
c.Assert(err, IsNil)
c.Assert(val, Equals, "0")
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUtf8, Equals, false)
c.Assert(config.GetGlobalConfig().CheckMb4ValueInUTF8, Equals, false)
}
3 changes: 2 additions & 1 deletion table/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (
}
str := casted.GetString()
utf8Charset := col.Charset == mysql.UTF8Charset
doMB4CharCheck := utf8Charset && config.GetGlobalConfig().CheckMb4ValueInUTF8
for i, w := 0, 0; i < len(str); i += w {
runeValue, width := utf8.DecodeRuneInString(str[i:])
if runeValue == utf8.RuneError {
Expand All @@ -189,7 +190,7 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (
}
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
} else if width > 3 && utf8Charset && config.GetGlobalConfig().CheckMb4ValueInUtf8 {
} else if width > 3 && doMB4CharCheck {
// Handle non-BMP characters.
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
Expand Down

0 comments on commit a9f6a0e

Please sign in to comment.