Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser, ddl: support decoding binary literal in set/enum #35822

Merged
merged 9 commits into from
Jun 30, 2022
Merged
30 changes: 30 additions & 0 deletions cmd/explaintest/r/new_character_set_invalid.result
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,33 @@ a b c
? ? ?
中文?中文 asdf?fdsa 字符集?字符集
@@ @@ @@
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59));
show create table t;
Table Create Table
t CREATE TABLE `t` (
`f` set('һ','??') DEFAULT NULL,
`e` enum('??','jY') DEFAULT NULL
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
drop table t;
create table t( e enum(0xBAEC,0x6A59));
show create table t;
Table Create Table
t CREATE TABLE `t` (
`e` enum('??','jY') DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
Table Create Table
t CREATE TABLE `t` (
`f` set('一','三') COLLATE gbk_bin DEFAULT NULL,
`e` enum('红','jY') COLLATE gbk_bin DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=gbk COLLATE=gbk_bin
drop table t;
create table t( e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
Table Create Table
t CREATE TABLE `t` (
`e` enum('红','jY') COLLATE gbk_bin DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=gbk COLLATE=gbk_bin
13 changes: 13 additions & 0 deletions cmd/explaintest/t/new_character_set_invalid.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,16 @@ insert into t values ('À', 'ø', '😂');
insert into t values ('中文À中文', 'asdføfdsa', '字符集😂字符集');
insert into t values (0x4040ffff, 0x4040ffff, 0x4040ffff);
select * from t;

Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59));
show create table t;
drop table t;
create table t( e enum(0xBAEC,0x6A59));
show create table t;
drop table t;
create table t(f set(0xD2BB, 0xC8FD), e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
drop table t;
create table t( e enum(0xBAEC,0x6A59)) collate gbk_bin;
show create table t;
20 changes: 20 additions & 0 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/dbterror"
"github.com/pingcap/tidb/util/domainutil"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/mock"
Expand Down Expand Up @@ -820,6 +821,23 @@ func setCharsetCollationFlenDecimal(tp *types.FieldType, colName, colCharset, co
return checkTooBigFieldLengthAndTryAutoConvert(tp, colName, sessVars)
}

func decodeEnumSetBinaryLiteralToUTF8(tp *types.FieldType, chs string) {
if tp.GetType() != mysql.TypeEnum && tp.GetType() != mysql.TypeSet {
return
}
enc := charset.FindEncoding(chs)
for i, elem := range tp.GetElems() {
if !tp.GetElemIsBinaryLit(i) {
continue
}
s, err := enc.Transform(nil, hack.Slice(elem), charset.OpDecodeReplace)
if err != nil {
logutil.BgLogger().Warn("decode enum binary literal to utf-8 failed", zap.Error(err))
}
tp.SetElem(i, string(hack.String(s)))
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}
}

// buildColumnAndConstraint builds table.Column and ast.Constraint from the parameters.
// outPriKeyConstraint is the primary key constraint out of column definition. For example:
// `create table t1 (id int , age int, primary key(id));`
Expand Down Expand Up @@ -852,6 +870,7 @@ func buildColumnAndConstraint(
if err := setCharsetCollationFlenDecimal(colDef.Tp, colDef.Name.Name.O, chs, coll, ctx.GetSessionVars()); err != nil {
return nil, nil, errors.Trace(err)
}
decodeEnumSetBinaryLiteralToUTF8(colDef.Tp, chs)
col, cts, err := columnDefToCol(ctx, offset, colDef, outPriKeyConstraint)
if err != nil {
return nil, nil, errors.Trace(err)
Expand Down Expand Up @@ -4521,6 +4540,7 @@ func (d *ddl) getModifiableColumnJob(ctx context.Context, sctx sessionctx.Contex
if err = setCharsetCollationFlenDecimal(&newCol.FieldType, newCol.Name.O, chs, coll, sctx.GetSessionVars()); err != nil {
return nil, errors.Trace(err)
}
decodeEnumSetBinaryLiteralToUTF8(&newCol.FieldType, chs)

// Check the column with foreign key, waiting for the default flen and decimal.
if fkInfo := getColumnForeignKeyInfo(originalColName.L, t.Meta().ForeignKeys); fkInfo != nil {
Expand Down
22 changes: 0 additions & 22 deletions parser/ast/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/tidb/parser/auth"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/format"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
Expand Down Expand Up @@ -3619,27 +3618,6 @@ type TextString struct {
IsBinaryLiteral bool
}

// TransformTextStrings converts a slice of TextString to strings.
// This is only used by enum/set strings.
func TransformTextStrings(ts []*TextString, _ string) []string {
// The UTF-8 encoding rather than other encoding is used
// because parser is not possible to determine the "real"
// charset that a binary literal string should be converted to.
enc := charset.EncodingUTF8Impl
ret := make([]string, 0, len(ts))
for _, t := range ts {
if !t.IsBinaryLiteral {
ret = append(ret, t.Value)
} else {
// Validate the binary literal string.
// See https://github.com/pingcap/tidb/issues/30740.
r, _ := enc.Transform(nil, charset.HackSlice(t.Value), charset.OpDecodeNoErr)
ret = append(ret, charset.HackString(r))
}
}
return ret
}

type BinaryLiteral interface {
ToString() string
}
Expand Down
22 changes: 12 additions & 10 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -19881,12 +19881,13 @@ yynewstate:
tp := types.NewFieldType(mysql.TypeEnum)
elems := yyS[yypt-2].item.([]*ast.TextString)
opt := yyS[yypt-0].item.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
tp.SetElems(make([]string, len(elems)))
fieldLen := -1 // enum_flen = max(ele_flen)
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
if len(tp.GetElem(i)) > fieldLen {
fieldLen = len(tp.GetElem(i))
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
if len(trimmed) > fieldLen {
fieldLen = len(trimmed)
}
}
tp.SetFlen(fieldLen)
Expand All @@ -19901,11 +19902,12 @@ yynewstate:
tp := types.NewFieldType(mysql.TypeSet)
elems := yyS[yypt-2].item.([]*ast.TextString)
opt := yyS[yypt-0].item.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
fieldLen := len(tp.GetElems()) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
fieldLen += len(tp.GetElem(i))
tp.SetElems(make([]string, len(elems)))
fieldLen := len(elems) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
fieldLen += len(trimmed)
}
tp.SetFlen(fieldLen)
tp.SetCharset(opt.Charset)
Expand Down
22 changes: 12 additions & 10 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -11896,12 +11896,13 @@ StringType:
tp := types.NewFieldType(mysql.TypeEnum)
elems := $3.([]*ast.TextString)
opt := $5.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
tp.SetElems(make([]string, len(elems)))
fieldLen := -1 // enum_flen = max(ele_flen)
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
if len(tp.GetElem(i)) > fieldLen {
fieldLen = len(tp.GetElem(i))
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
if len(trimmed) > fieldLen {
fieldLen = len(trimmed)
}
}
tp.SetFlen(fieldLen)
Expand All @@ -11916,11 +11917,12 @@ StringType:
tp := types.NewFieldType(mysql.TypeSet)
elems := $3.([]*ast.TextString)
opt := $5.(*ast.OptBinary)
tp.SetElems(ast.TransformTextStrings(elems, opt.Charset))
fieldLen := len(tp.GetElems()) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i := range tp.GetElems() {
tp.SetElem(i, strings.TrimRight(tp.GetElem(i), " "))
fieldLen += len(tp.GetElem(i))
tp.SetElems(make([]string, len(elems)))
fieldLen := len(elems) - 1 // set_flen = sum(ele_flen) + number_of_ele - 1
for i, e := range elems {
trimmed := strings.TrimRight(e.Value, " ")
tp.SetElemWithIsBinaryLit(i, trimmed, e.IsBinaryLiteral)
fieldLen += len(trimmed)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why it is not len(tp.GetElem())?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are the same string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I knew. My point is that tp.GetElem() makes less assumption about what SetElemWithIsBinaryLit is doing. I won't insist on the change.

Copy link
Contributor Author

@tangenta tangenta Jun 30, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the fieldLen should not be calculated here because it may change after decoding. Let's remove it.

The one who changes SetElemWithIsBinaryLit should check all the usages.

}
tp.SetFlen(fieldLen)
tp.SetCharset(opt.Charset)
Expand Down
21 changes: 20 additions & 1 deletion parser/types/field_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ type FieldType struct {
// collate represent collate rules of the charset
collate string
// elems is the element list for enum and set type.
elems []string
elems []string
elemsIsBinaryLit []bool
xhebox marked this conversation as resolved.
Show resolved Hide resolved
}

// NewFieldType returns a FieldType,
Expand Down Expand Up @@ -180,10 +181,28 @@ func (ft *FieldType) SetElem(idx int, element string) {
ft.elems[idx] = element
}

func (ft *FieldType) SetElemWithIsBinaryLit(idx int, element string, isBinaryLit bool) {
ft.elems[idx] = element
if isBinaryLit {
// Create the binary literal flags lazily.
if ft.elemsIsBinaryLit == nil {
ft.elemsIsBinaryLit = make([]bool, len(ft.elems))
}
ft.elemsIsBinaryLit[idx] = true
}
}

func (ft *FieldType) GetElem(idx int) string {
return ft.elems[idx]
}

func (ft *FieldType) GetElemIsBinaryLit(idx int) bool {
if len(ft.elemsIsBinaryLit) == 0 {
return false
}
return ft.elemsIsBinaryLit[idx]
}

// Clone returns a copy of itself.
func (ft *FieldType) Clone() *FieldType {
ret := *ft
Expand Down