Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddl: support altering the other charset to utf8 or utf8mb4 #8037

Merged
merged 28 commits into from
Dec 10, 2018
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e882210
ddl: support altering the other charset to utf8 or utf8mb4
winkyao Oct 24, 2018
c0ec35d
Merge branch 'master' into support_change_charset_utf8
winkyao Oct 24, 2018
a54cb1e
fix ci
winkyao Oct 24, 2018
7627a0a
Merge branch 'support_change_charset_utf8' of https://github.com/wink…
winkyao Oct 24, 2018
c1da1fa
Merge remote-tracking branch 'upstream/master' into support_change_ch…
winkyao Nov 5, 2018
1069ffe
remove mistakenly added files
winkyao Nov 5, 2018
d5421ad
Merge remote-tracking branch 'upstream/master' into support_change_ch…
winkyao Nov 12, 2018
e3fd359
replace go module
winkyao Nov 12, 2018
a80e568
Merge branch 'master' into support_change_charset_utf8
winkyao Nov 12, 2018
e479c8d
Merge branch 'master' into support_change_charset_utf8
winkyao Nov 12, 2018
2d991a4
Merge remote-tracking branch 'upstream/master' into support_change_ch…
winkyao Dec 3, 2018
bc7b5e3
Merge branch 'support_change_charset_utf8' of https://github.com/wink…
winkyao Dec 3, 2018
13ebfc5
Merge branch 'master' into support_change_charset_utf8
winkyao Dec 3, 2018
8032e69
Merge branch 'master' into support_change_charset_utf8
winkyao Dec 3, 2018
2869497
fix go mod tidy
winkyao Dec 3, 2018
3869585
Merge branch 'support_change_charset_utf8' of https://github.com/wink…
winkyao Dec 3, 2018
ba963c2
Merge remote-tracking branch 'upstream/master' into support_change_ch…
winkyao Dec 4, 2018
e6df64f
fix ci
winkyao Dec 4, 2018
5d2e89f
Merge branch 'master' into support_change_charset_utf8
winkyao Dec 4, 2018
aeba21e
fix ci
winkyao Dec 4, 2018
d4a9027
fix ci
winkyao Dec 4, 2018
b15d3b1
address comments
winkyao Dec 6, 2018
b4dce09
go mod tidy
winkyao Dec 6, 2018
c1e004a
handle once
winkyao Dec 6, 2018
9cb2d5d
Merge remote-tracking branch 'upstream/master' into support_change_ch…
winkyao Dec 7, 2018
36681fa
update parser to fix ci
winkyao Dec 7, 2018
a114ea4
address comment
winkyao Dec 7, 2018
aa53216
Merge branch 'master' into support_change_charset_utf8
winkyao Dec 10, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ddl/column_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,7 @@ func (s *testColumnSuite) TestModifyColumn(c *C) {
{"int", "int unsigned", errUnsupportedModifyColumn.GenWithStackByArgs("length 10 is less than origin 11")},
{"varchar(10)", "text", nil},
{"varbinary(10)", "blob", nil},
{"text", "blob", errUnsupportedModifyColumn.GenWithStackByArgs("charset binary not match origin utf8mb4")},
{"text", "blob", errUnsupportedModifyCharset.GenWithStackByArgs("charset from utf8mb4 to binary")},
{"varchar(10)", "varchar(8)", errUnsupportedModifyColumn.GenWithStackByArgs("length 8 is less than origin 10")},
{"varchar(10)", "varchar(11)", nil},
{"varchar(10) character set utf8 collate utf8_bin", "varchar(10) character set utf8", nil},
Expand Down
2 changes: 1 addition & 1 deletion ddl/db_change_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ func (s *testStateChangeSuite) TestAppendEnum(c *C) {
c.Assert(err.Error(), Equals, "[ddl:203]unsupported modify column the number of enum column's elements is less than the original: 2")
failAlterTableSQL2 := "alter table t change c2 c2 int default 0"
_, err = s.se.Execute(context.Background(), failAlterTableSQL2)
c.Assert(err.Error(), Equals, "[ddl:203]unsupported modify column charset binary not match origin utf8mb4")
c.Assert(err.Error(), Equals, "[ddl:208]unsupported modify charset from utf8mb4 to binary")
alterTableSQL := "alter table t change c2 c2 enum('N','Y','A') DEFAULT 'A'"
_, err = s.se.Execute(context.Background(), alterTableSQL)
c.Assert(err, IsNil)
Expand Down
50 changes: 50 additions & 0 deletions ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,56 @@ func (s *testIntegrationSuite) TestEndIncluded(c *C) {
tk.MustExec("admin check table t")
}

func (s *testIntegrationSuite) TestChangingCharsetToUtf8(c *C) {
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("USE test")
tk.MustExec("create table t(a char(10) charset latin1)")
tk.MustExec("alter table t modify column a char(10) charset latin1")
tk.MustExec("alter table t modify column a char(10) charset utf8")
tk.MustExec("alter table t modify column a char(10) charset utf8mb4")
rs, err := tk.Exec("alter table t modify column a char(10) charset utf8mb4 collate utf8bin")
if rs != nil {
rs.Close()
}
c.Assert(err, NotNil)
tk.MustExec("alter table t modify column a char(10) charset utf8mb4 collate utf8mb4_bin")
rs, err = tk.Exec("alter table t modify column a char(10) charset utf8 collate utf8_bin")
if rs != nil {
rs.Close()
}

c.Assert(err, NotNil)
tk.MustExec("alter table t modify column a char(10) charset utf8mb4 collate utf8mb4_general_ci")
}

func (s *testIntegrationSuite) TestChangingTableCharset(c *C) {
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("USE test")
tk.MustExec("create table t(a char(10)) charset latin1 collate latin1_bin")
rs, err := tk.Exec("alter table t charset gbk")
if rs != nil {
rs.Close()
}
c.Assert(err.Error(), Equals, "Unknown charset gbk")
tk.MustExec("alter table t charset utf8")
tk.MustExec("alter table t charset utf8 collate utf8_bin")
rs, err = tk.Exec("alter table t charset utf8 collate latin1_bin")
if rs != nil {
rs.Close()
}
c.Assert(err, NotNil)
tk.MustExec("alter table t charset utf8mb4")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin")

rs, err = tk.Exec("alter table t charset utf8 collate utf8_bin")
if rs != nil {
rs.Close()
}
c.Assert(err, NotNil)
}

func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
store, err := mockstore.NewMockTikvStore()
if err != nil {
Expand Down
93 changes: 93 additions & 0 deletions ddl/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
. "github.com/pingcap/check"
"github.com/pingcap/errors"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/charset"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
tmysql "github.com/pingcap/parser/mysql"
Expand Down Expand Up @@ -3722,6 +3723,98 @@ func (s *testDBSuite) TestPartitionAddIndex(c *C) {
tk.MustExec("drop table partition_add_idx")
}

func (s *testDBSuite) TestAlterTableCharset(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("create database test_charset")
defer tk.MustExec("drop database test_charset")
tk.MustExec("use test_charset")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int) charset latin1")
ctx := tk.Se.(sessionctx.Context)
is := domain.GetDomain(ctx).InfoSchema()
t, err := is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(err, IsNil)
c.Assert(t.Meta().Charset, Equals, "latin1")
defCollate, err := charset.GetDefaultCollation("latin1")
c.Assert(err, IsNil)
c.Assert(t.Meta().Collate, Equals, defCollate)

tk.MustExec("alter table t charset utf8")
is = domain.GetDomain(ctx).InfoSchema()
t, err = is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(t.Meta().Charset, Equals, "utf8")
defCollate, err = charset.GetDefaultCollation("utf8")
c.Assert(err, IsNil)
c.Assert(t.Meta().Collate, Equals, defCollate)

tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_general_ci")
is = domain.GetDomain(ctx).InfoSchema()
t, err = is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(t.Meta().Charset, Equals, "utf8mb4")
c.Assert(t.Meta().Collate, Equals, "utf8mb4_general_ci")

rs, err := tk.Exec("alter table t charset utf8")
if rs != nil {
rs.Close()
}

c.Assert(err.Error(), Equals, "[ddl:208]unsupported modify charset from utf8mb4 to utf8")
}

func (s *testDBSuite) TestAlterColumnCharset(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("create database test_charset")
defer tk.MustExec("drop database test_charset")
tk.MustExec("use test_charset")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(10) charset latin1)")
ctx := tk.Se.(sessionctx.Context)
is := domain.GetDomain(ctx).InfoSchema()
t, err := is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(err, IsNil)
col := model.FindColumnInfo(t.Meta().Columns, "a")
c.Assert(col, NotNil)
c.Assert(col.Charset, Equals, "latin1")
defCollate, err := charset.GetDefaultCollation("latin1")
c.Assert(err, IsNil)
c.Assert(col.Collate, Equals, defCollate)

tk.MustExec("alter table t modify column a char(10) charset utf8")
is = domain.GetDomain(ctx).InfoSchema()
t, err = is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(err, IsNil)
col = model.FindColumnInfo(t.Meta().Columns, "a")
c.Assert(col, NotNil)
c.Assert(col.Charset, Equals, "utf8")
defCollate, err = charset.GetDefaultCollation("utf8")
c.Assert(err, IsNil)
c.Assert(col.Collate, Equals, defCollate)

tk.MustExec("alter table t modify column a char(10) charset utf8 collate utf8_general_ci")
is = domain.GetDomain(ctx).InfoSchema()
t, err = is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(err, IsNil)
col = model.FindColumnInfo(t.Meta().Columns, "a")
c.Assert(col, NotNil)
c.Assert(col.Charset, Equals, "utf8")
c.Assert(col.Collate, Equals, "utf8_general_ci")

tk.MustExec("alter table t modify column a char(10) charset utf8mb4 collate utf8mb4_general_ci")
is = domain.GetDomain(ctx).InfoSchema()
t, err = is.TableByName(model.NewCIStr("test_charset"), model.NewCIStr("t"))
c.Assert(err, IsNil)
col = model.FindColumnInfo(t.Meta().Columns, "a")
c.Assert(col, NotNil)
c.Assert(col.Charset, Equals, "utf8mb4")
c.Assert(col.Collate, Equals, "utf8mb4_general_ci")

rs, err := tk.Exec("alter table t modify column a char(10) charset utf8")
if rs != nil {
rs.Close()
}
c.Assert(err.Error(), Equals, "[ddl:208]unsupported modify charset from utf8mb4 to utf8")
}

func (s *testDBSuite) TestDropSchemaWithPartitionTable(c *C) {
s.tk = testkit.NewTestKit(c, s.store)
s.tk.MustExec("drop database if exists test_db_with_partition")
Expand Down
10 changes: 6 additions & 4 deletions ddl/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ var (
errInvalidStoreVer = terror.ClassDDL.New(codeInvalidStoreVer, "invalid storage current version")

// We don't support dropping column with index covered now.
errCantDropColWithIndex = terror.ClassDDL.New(codeCantDropColWithIndex, "can't drop column with index")
errUnsupportedAddColumn = terror.ClassDDL.New(codeUnsupportedAddColumn, "unsupported add column")
errUnsupportedModifyColumn = terror.ClassDDL.New(codeUnsupportedModifyColumn, "unsupported modify column %s")
errUnsupportedPKHandle = terror.ClassDDL.New(codeUnsupportedDropPKHandle,
errCantDropColWithIndex = terror.ClassDDL.New(codeCantDropColWithIndex, "can't drop column with index")
errUnsupportedAddColumn = terror.ClassDDL.New(codeUnsupportedAddColumn, "unsupported add column")
errUnsupportedModifyColumn = terror.ClassDDL.New(codeUnsupportedModifyColumn, "unsupported modify column %s")
errUnsupportedModifyCharset = terror.ClassDDL.New(codeUnsupportedModifyCharset, "unsupported modify %s")
errUnsupportedPKHandle = terror.ClassDDL.New(codeUnsupportedDropPKHandle,
"unsupported drop integer primary key")
errUnsupportedCharset = terror.ClassDDL.New(codeUnsupportedCharset, "unsupported charset %s collate %s")

Expand Down Expand Up @@ -587,6 +588,7 @@ const (
codeUnsupportedShardRowIDBits = 207
codeUnsupportedAddPartition = 208
codeUnsupportedCoalescePartition = 209
codeUnsupportedModifyCharset = 210
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems the code above is TiDB special. I think we need some time to move it to the other place.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree.


codeFileNotFound = 1017
codeErrorOnRename = 1025
Expand Down
131 changes: 123 additions & 8 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,17 @@ func (d *ddl) handleAutoIncID(tbInfo *model.TableInfo, schemaID int64) error {
return nil
}

func setDefaultTableCharsetAndCollation(tbInfo *model.TableInfo) (err error) {
if len(tbInfo.Charset) == 0 {
tiancaiamao marked this conversation as resolved.
Show resolved Hide resolved
tbInfo.Charset = mysql.DefaultCharset
}

if len(tbInfo.Collate) == 0 {
tbInfo.Collate, err = charset.GetDefaultCollation(tbInfo.Charset)
}
return
}

// handleTableOptions updates tableInfo according to table options.
func handleTableOptions(options []*ast.TableOption, tbInfo *model.TableInfo) error {
for _, op := range options {
Expand All @@ -1059,6 +1070,8 @@ func handleTableOptions(options []*ast.TableOption, tbInfo *model.TableInfo) err
}
}
}

setDefaultTableCharsetAndCollation(tbInfo)
return nil
}

Expand All @@ -1078,6 +1091,28 @@ func isIgnorableSpec(tp ast.AlterTableType) bool {
return tp == ast.AlterTableLock || tp == ast.AlterTableAlgorithm
}

// getCharsetAndCollateInTableOption will iterate the charset and collate in the options,
// and returns the last charset and collate in options.
func getCharsetAndCollateInTableOption(startIdx int, options []*ast.TableOption) (charset, collate string) {
for i := startIdx; i < len(options); i++ {
opt := options[i]
// we set the charset to the last option. example: alter table t charset latin1 charset utf8 collate utf8_bin;
// the charset will be utf8, collate will be utf8_bin
switch opt.Tp {
case ast.TableOptionCharset:
charset = opt.StrValue
// this opt is handled, so we can skipped in the next iteration.
options[i].Skipped = true
case ast.TableOptionCollate:
collate = opt.StrValue
// this opt is handled, so we can skipped in the next iteration.
options[i].Skipped = true
}
}

return charset, collate
}

func (d *ddl) AlterTable(ctx sessionctx.Context, ident ast.Ident, specs []*ast.AlterTableSpec) (err error) {
// Only handle valid specs.
validSpecs := make([]*ast.AlterTableSpec, 0, len(specs))
Expand Down Expand Up @@ -1141,7 +1176,11 @@ func (d *ddl) AlterTable(ctx sessionctx.Context, ident ast.Ident, specs []*ast.A
case ast.AlterTableRenameIndex:
err = d.RenameIndex(ctx, ident, spec)
case ast.AlterTableOption:
for _, opt := range spec.Options {
for i, opt := range spec.Options {
if opt.Skipped {
continue
}

switch opt.Tp {
case ast.TableOptionShardRowID:
if opt.UintValue > shardRowIDBitsMax {
Expand All @@ -1153,7 +1192,11 @@ func (d *ddl) AlterTable(ctx sessionctx.Context, ident ast.Ident, specs []*ast.A
case ast.TableOptionComment:
spec.Comment = opt.StrValue
err = d.AlterTableComment(ctx, ident, spec)
case ast.TableOptionCharset, ast.TableOptionCollate:
toCharset, toCollate := getCharsetAndCollateInTableOption(i, spec.Options)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is weird to me.
Why not return a charset/collate array in getCharsetAndCollateInTableOption, and handle the array in a loop calling d.AlterTableCharsetAndCollate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed.

err = d.AlterTableCharsetAndCollate(ctx, ident, toCharset, toCollate)
}

if err != nil {
return errors.Trace(err)
}
Expand Down Expand Up @@ -1492,6 +1535,29 @@ func (d *ddl) DropColumn(ctx sessionctx.Context, ti ast.Ident, colName model.CIS
return errors.Trace(err)
}

// modifiableCharsetAndCollation returns error when the charset or collation is not modifiable.
func modifiableCharsetAndCollation(toCharset, toCollate, origCharset, origCollate string) error {
if !charset.ValidCharsetAndCollation(toCharset, toCollate) {
return ErrUnknownCharacterSet.GenWithStackByArgs(toCharset, toCollate)
}

if toCharset == charset.CharsetUTF8MB4 || (toCharset == charset.CharsetUTF8 && origCharset != charset.CharsetUTF8MB4) {
// TiDB treats all the data as utf8mb4, so we support changing the charset to utf8mb4.
// And not allow to change utf8mb4 to utf8.
return nil
}

if toCharset != origCharset {
msg := fmt.Sprintf("charset from %s to %s", origCharset, toCharset)
return errUnsupportedModifyCharset.GenWithStackByArgs(msg)
}
if toCollate != origCollate {
msg := fmt.Sprintf("collate from %s to %s", origCollate, toCollate)
return errUnsupportedModifyCharset.GenWithStackByArgs(msg)
}
return nil
}

// modifiable checks if the 'origin' type can be modified to 'to' type with out the need to
// change or check existing data in the table.
// It returns true if the two types has the same Charset and Collation, the same sign, both are
Expand All @@ -1505,14 +1571,10 @@ func modifiable(origin *types.FieldType, to *types.FieldType) error {
msg := fmt.Sprintf("decimal %d is less than origin %d", to.Decimal, origin.Decimal)
return errUnsupportedModifyColumn.GenWithStackByArgs(msg)
}
if to.Charset != origin.Charset {
msg := fmt.Sprintf("charset %s not match origin %s", to.Charset, origin.Charset)
return errUnsupportedModifyColumn.GenWithStackByArgs(msg)
}
if to.Collate != origin.Collate {
msg := fmt.Sprintf("collate %s not match origin %s", to.Collate, origin.Collate)
return errUnsupportedModifyColumn.GenWithStackByArgs(msg)
if err := modifiableCharsetAndCollation(to.Charset, to.Collate, origin.Charset, origin.Collate); err != nil {
return errors.Trace(err)
}

toUnsigned := mysql.HasUnsignedFlag(to.Flag)
originUnsigned := mysql.HasUnsignedFlag(origin.Flag)
if originUnsigned != toUnsigned {
Expand Down Expand Up @@ -1875,6 +1937,59 @@ func (d *ddl) AlterTableComment(ctx sessionctx.Context, ident ast.Ident, spec *a
return errors.Trace(err)
}

// AlterTableCharset changes the table charset and collate.
func (d *ddl) AlterTableCharsetAndCollate(ctx sessionctx.Context, ident ast.Ident, toCharset string, toCollate string) error {
if toCharset == "" && toCollate == "" {
return errors.Errorf("toCharset and toCollate can't be empty")
zimulala marked this conversation as resolved.
Show resolved Hide resolved
}

is := d.infoHandle.Get()
schema, ok := is.SchemaByName(ident.Schema)
if !ok {
return infoschema.ErrDatabaseNotExists.GenWithStackByArgs(ident.Schema)
}

tb, err := is.TableByName(ident.Schema, ident.Name)
if err != nil {
return errors.Trace(infoschema.ErrTableNotExists.GenWithStackByArgs(ident.Schema, ident.Name))
}

origCharset := tb.Meta().Charset
origCollate := tb.Meta().Collate
if toCharset == "" {
// charset does not change.
toCharset = origCharset
}

if toCollate == "" {
// get the default collation of the charset.
toCollate, err = charset.GetDefaultCollation(toCharset)
if err != nil {
return errors.Trace(err)
}
}

if origCharset == toCharset && origCollate == toCollate {
// nothing to do.
return nil
}

if err = modifiableCharsetAndCollation(toCharset, toCollate, origCharset, origCollate); err != nil {
return errors.Trace(err)
}

job := &model.Job{
SchemaID: schema.ID,
TableID: tb.Meta().ID,
Type: model.ActionModifyTableCharsetAndCollate,
BinlogInfo: &model.HistoryInfo{},
Args: []interface{}{toCharset, toCollate},
}
err = d.doDDLJob(ctx, job)
err = d.callHookOnChanged(err)
return errors.Trace(err)
}

// RenameIndex renames an index.
// In TiDB, indexes are case-insensitive (so index 'a' and 'A" are considered the same index),
// but index names are case-sensitive (we can rename index 'a' to 'A')
Expand Down
Loading