diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index 388986e96fe..820d7eb99ab 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -361,7 +361,7 @@ const TablesWithSize80 = `SELECT t.table_name, i.allocated_size FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8_general_ci + ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE t.table_schema = database() AND not t.create_options <=> 'partitioned' UNION ALL @@ -374,7 +374,7 @@ UNION ALL SUM(i.allocated_size) FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8_general_ci ) + ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci ) WHERE t.table_schema = database() AND t.create_options <=> 'partitioned' GROUP BY diff --git a/go/vt/dbconfigs/dbconfigs.go b/go/vt/dbconfigs/dbconfigs.go index 940652094c9..c904c273632 100644 --- a/go/vt/dbconfigs/dbconfigs.go +++ b/go/vt/dbconfigs/dbconfigs.go @@ -26,6 +26,8 @@ import ( "github.com/spf13/pflag" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/vt/servenv" "vitess.io/vitess/go/vt/vttls" @@ -416,6 +418,6 @@ func NewTestDBConfigs(genParams, appDebugParams mysql.ConnParams, dbname string) replParams: genParams, externalReplParams: genParams, DBName: dbname, - Charset: "utf8mb4_general_ci", + Charset: collations.Default().Get().Name(), } } diff --git a/go/vt/sqlparser/constants.go b/go/vt/sqlparser/constants.go index f9eeafb363d..450522fb8d5 100644 --- a/go/vt/sqlparser/constants.go +++ b/go/vt/sqlparser/constants.go @@ -211,7 +211,7 @@ const ( Utf16Str = "_utf16" Utf16leStr = "_utf16le" Utf32Str = "_utf32" - Utf8Str = "_utf8" + Utf8mb3Str = "_utf8mb3" Utf8mb4Str = "_utf8mb4" NStringStr = "N" diff --git a/go/vt/sqlparser/parse_test.go b/go/vt/sqlparser/parse_test.go index 516d3c28e53..24360c6b9c1 100644 --- a/go/vt/sqlparser/parse_test.go +++ b/go/vt/sqlparser/parse_test.go @@ -2599,7 +2599,8 @@ var ( }, { input: "select 1 from t where foo = _binary 'bar'", }, { - input: "select 1 from t where foo = _utf8 'bar' and bar = _latin1 'sjösjuk'", + input: "select 1 from t where foo = _utf8 'bar' and bar = _latin1 'sjösjuk'", + output: "select 1 from t where foo = _utf8mb3 'bar' and bar = _latin1 'sjösjuk'", }, { input: "select 1 from t where foo = _binary'bar'", output: "select 1 from t where foo = _binary 'bar'", @@ -2610,10 +2611,10 @@ var ( output: "select 1 from t where foo = _utf8mb4 'bar'", }, { input: "select 1 from t where foo = _utf8mb3 'bar'", - output: "select 1 from t where foo = _utf8 'bar'", + output: "select 1 from t where foo = _utf8mb3 'bar'", }, { - input: "select 1 from t where foo = _utf8mb3'bar'", - output: "select 1 from t where foo = _utf8 'bar'", + input: "select 1 from t where foo = _utf8'bar'", + output: "select 1 from t where foo = _utf8mb3 'bar'", }, { input: "select match(a) against ('foo') from t", }, { @@ -4036,13 +4037,13 @@ func TestIntroducers(t *testing.T) { output: "select _utf32 'x' from dual", }, { input: "select _utf8 'x'", - output: "select _utf8 'x' from dual", + output: "select _utf8mb3 'x' from dual", }, { input: "select _utf8mb4 'x'", output: "select _utf8mb4 'x' from dual", }, { input: "select _utf8mb3 'x'", - output: "select _utf8 'x' from dual", + output: "select _utf8mb3 'x' from dual", }} for _, tcase := range validSQL { t.Run(tcase.input, func(t *testing.T) { diff --git a/go/vt/sqlparser/sql.go b/go/vt/sqlparser/sql.go index eac852ae9e6..15500a26c7f 100644 --- a/go/vt/sqlparser/sql.go +++ b/go/vt/sqlparser/sql.go @@ -12042,7 +12042,7 @@ yydefault: yyDollar = yyS[yypt-1 : yypt+1] //line sql.y:1897 { - yyVAL.str = Utf8Str + yyVAL.str = Utf8mb3Str } case 291: yyDollar = yyS[yypt-1 : yypt+1] @@ -12054,7 +12054,7 @@ yydefault: yyDollar = yyS[yypt-1 : yypt+1] //line sql.y:1905 { - yyVAL.str = Utf8Str + yyVAL.str = Utf8mb3Str } case 295: yyDollar = yyS[yypt-1 : yypt+1] diff --git a/go/vt/sqlparser/sql.y b/go/vt/sqlparser/sql.y index a9b38ae03d6..99a2756173d 100644 --- a/go/vt/sqlparser/sql.y +++ b/go/vt/sqlparser/sql.y @@ -1895,7 +1895,7 @@ underscore_charsets: } | UNDERSCORE_UTF8 { - $$ = Utf8Str + $$ = Utf8mb3Str } | UNDERSCORE_UTF8MB4 { @@ -1903,7 +1903,7 @@ underscore_charsets: } | UNDERSCORE_UTF8MB3 { - $$ = Utf8Str + $$ = Utf8mb3Str } literal_or_null: diff --git a/go/vt/sqlparser/testdata/select_cases.txt b/go/vt/sqlparser/testdata/select_cases.txt index 0c62f5257d1..1112593cd13 100644 --- a/go/vt/sqlparser/testdata/select_cases.txt +++ b/go/vt/sqlparser/testdata/select_cases.txt @@ -8,7 +8,7 @@ INPUT select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1; END OUTPUT -select concat(a, if(b > 10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1 +select concat(a, if(b > 10, _utf8mb3 0xC3A6, _utf8mb3 0xC3AF)) from t1 END INPUT select a as 'x', t1.*, b as 'x' from t1; @@ -404,7 +404,7 @@ INPUT select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2 collate utf8_bin); END OUTPUT -select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2 collate utf8_bin) from dual +select locate(_utf8mb3 0xD091, _utf8mb3 0xD0B0D0B1D0B2 collate utf8_bin) from dual END INPUT select hex('a'), hex('a '); @@ -1004,7 +1004,7 @@ INPUT select soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB); END OUTPUT -select soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB) from dual +select soundex(_utf8mb3 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB) from dual END INPUT select t1.a, (case t1.a when 0 then 0 else t1.b end) d from t1 join t2 on t1.a=t2.c where b=11120436154190595086 order by d; @@ -1676,7 +1676,7 @@ INPUT select hex(soundex(_utf8 0xD091D092D093)); END OUTPUT -select hex(soundex(_utf8 0xD091D092D093)) from dual +select hex(soundex(_utf8mb3 0xD091D092D093)) from dual END INPUT select * from t1 where btn like "ff%"; @@ -2450,7 +2450,7 @@ INPUT select length(uuid()), charset(uuid()), length(unhex(replace(uuid(),_utf8'-',_utf8''))); END OUTPUT -select length(uuid()), charset(uuid()), length(unhex(replace(uuid(), _utf8 '-', _utf8 ''))) from dual +select length(uuid()), charset(uuid()), length(unhex(replace(uuid(), _utf8mb3 '-', _utf8mb3 ''))) from dual END INPUT select substring('hello', 4294967296, 4294967296); @@ -2588,7 +2588,7 @@ INPUT select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); END OUTPUT -select _utf8 0xD0B0D0B1D0B2 like concat(_utf8 '%', _utf8 0xD0B1, _utf8 '%') from dual +select _utf8mb3 0xD0B0D0B1D0B2 like concat(_utf8mb3 '%', _utf8mb3 0xD0B1, _utf8mb3 '%') from dual END INPUT select * from t1 where MATCH(a,b) AGAINST ("indexes"); @@ -2666,7 +2666,7 @@ INPUT select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1; END OUTPUT -select concat(a, if(b > 10, _utf8 'æ', _utf8 'ß')) from t1 +select concat(a, if(b > 10, _utf8mb3 'æ', _utf8mb3 'ß')) from t1 END INPUT select hex(group_concat(a separator ',')) from t1; @@ -2702,7 +2702,7 @@ INPUT select hex(_utf8 X'616263FF'); END OUTPUT -select hex(_utf8 X'616263FF') from dual +select hex(_utf8mb3 X'616263FF') from dual END INPUT select t2.count, t1.name from t2 inner join t1 using (color); @@ -3098,7 +3098,7 @@ INPUT select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2); END OUTPUT -select locate(_utf8 0xD091, _utf8 0xD0B0D0B1D0B2) from dual +select locate(_utf8mb3 0xD091, _utf8mb3 0xD0B0D0B1D0B2) from dual END INPUT select group_concat(distinct a, c order by a desc, c desc) from t1; @@ -3308,7 +3308,7 @@ INPUT select i from t1 where a=repeat(_utf8 0xD0B1,200); END OUTPUT -select i from t1 where a = repeat(_utf8 0xD0B1, 200) +select i from t1 where a = repeat(_utf8mb3 0xD0B1, 200) END INPUT select @@read_rnd_buffer_size; @@ -3422,7 +3422,7 @@ INPUT select t1.*,t2.* from t1 left join t2 on (t1.b=t2.b) where charset(t2.a) = _utf8'binary' order by t1.a,t2.a; END OUTPUT -select t1.*, t2.* from t1 left join t2 on t1.b = t2.b where charset(t2.a) = _utf8 'binary' order by t1.a asc, t2.a asc +select t1.*, t2.* from t1 left join t2 on t1.b = t2.b where charset(t2.a) = _utf8mb3 'binary' order by t1.a asc, t2.a asc END INPUT select 1 from (select 1) as a; @@ -5846,7 +5846,7 @@ INPUT select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1; END OUTPUT -select concat(a, if(b > 10, _utf8 'x', _utf8 'y')) from t1 +select concat(a, if(b > 10, _utf8mb3 'x', _utf8mb3 'y')) from t1 END INPUT select /lib32/ /libx32/ user, host, db, info from information_schema.processlist where state = 'User lock' and info = 'select get_lock('ee_16407_2', 60)'; @@ -6458,7 +6458,7 @@ INPUT select right(_utf8 0xD0B0D0B2D0B2,1); END OUTPUT -select right(_utf8 0xD0B0D0B2D0B2, 1) from dual +select right(_utf8mb3 0xD0B0D0B2D0B2, 1) from dual END INPUT select 5 div 2; @@ -7790,7 +7790,7 @@ INPUT select user() like _utf8"%@%"; END OUTPUT -select user() like _utf8 '%@%' from dual +select user() like _utf8mb3 '%@%' from dual END INPUT select st_distance(linestring(point(26,87),point(13,95)), geometrycollection(point(4.297374e+307,8.433875e+307), point(1e308, 1e308))) as dist; @@ -8462,7 +8462,7 @@ INPUT select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2); END OUTPUT -select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2) from dual +select locate(_utf8mb3 0xD0B1, _utf8mb3 0xD0B0D091D0B2) from dual END INPUT select 18446744073709551615, 18446744073709551615 DIV 1, 18446744073709551615 DIV 2; @@ -9182,7 +9182,7 @@ INPUT select locate(_utf8 0xD0B1, _utf8 0xD0B0D0B1D0B2); END OUTPUT -select locate(_utf8 0xD0B1, _utf8 0xD0B0D0B1D0B2) from dual +select locate(_utf8mb3 0xD0B1, _utf8mb3 0xD0B0D0B1D0B2) from dual END INPUT select * from t1 where b like 'foob%'; @@ -11186,7 +11186,7 @@ INPUT select (_utf8 X'616263FF'); END OUTPUT -select _utf8 X'616263FF' from dual +select _utf8mb3 X'616263FF' from dual END INPUT select f1, group_concat(f1+1) from t1 group by f1 with rollup; @@ -12368,7 +12368,7 @@ INPUT select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1; END OUTPUT -select concat(a, if(b > 10, _utf8 0x78, _utf8 0x79)) from t1 +select concat(a, if(b > 10, _utf8mb3 0x78, _utf8mb3 0x79)) from t1 END INPUT select cast(19999999999999999999 as signed); @@ -12626,7 +12626,7 @@ INPUT select length(_utf8 0xD0B1), bit_length(_utf8 0xD0B1), char_length(_utf8 0xD0B1); END OUTPUT -select length(_utf8 0xD0B1), bit_length(_utf8 0xD0B1), char_length(_utf8 0xD0B1) from dual +select length(_utf8mb3 0xD0B1), bit_length(_utf8mb3 0xD0B1), char_length(_utf8mb3 0xD0B1) from dual END INPUT select @@keycache1.key_buffer_size; @@ -12794,7 +12794,7 @@ INPUT select version()>=_utf8"3.23.29"; END OUTPUT -select version() >= _utf8 '3.23.29' from dual +select version() >= _utf8mb3 '3.23.29' from dual END INPUT select table_name, column_name, privileges from information_schema.columns where table_schema = 'mysqltest' and table_name = 'v1' order by table_name, column_name; @@ -13538,7 +13538,7 @@ INPUT select left(_utf8 0xD0B0D0B1D0B2,1); END OUTPUT -select left(_utf8 0xD0B0D0B1D0B2, 1) from dual +select left(_utf8mb3 0xD0B0D0B1D0B2, 1) from dual END INPUT select * from information_schema.SCHEMA_PRIVILEGES where grantee like '%mysqltest_1%'; @@ -14426,7 +14426,7 @@ INPUT select i from t1 where a=repeat(_utf8 'a',200); END OUTPUT -select i from t1 where a = repeat(_utf8 'a', 200) +select i from t1 where a = repeat(_utf8mb3 'a', 200) END INPUT select time_format('100:00:00', '%H %k %h %I %l'); @@ -14684,7 +14684,7 @@ INPUT select collation(charset(_utf8'a')), collation(collation(_utf8'a')); END OUTPUT -select collation(charset(_utf8 'a')), collation(collation(_utf8 'a')) from dual +select collation(charset(_utf8mb3 'a')), collation(collation(_utf8mb3 'a')) from dual END INPUT select last_day('2000-02-05') as f1, last_day('2002-12-31') as f2, last_day('2003-03-32') as f3, last_day('2003-04-01') as f4, last_day('2001-01-01 01:01:01') as f5, last_day(NULL), last_day('2001-02-12'); @@ -15800,7 +15800,7 @@ INPUT select hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)); END OUTPUT -select hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)) from dual +select hex(soundex(_utf8mb3 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)) from dual END INPUT select a2 from t3 join (t1 join t2 using (a1)) on b=c1 join t4 using (c2); @@ -16802,7 +16802,7 @@ INPUT select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2 collate utf8_bin); END OUTPUT -select locate(_utf8 0xD0B1, _utf8 0xD0B0D091D0B2 collate utf8_bin) from dual +select locate(_utf8mb3 0xD0B1, _utf8mb3 0xD0B0D091D0B2 collate utf8_bin) from dual END INPUT select insert('hello', 1, -4294967295, 'hi'); @@ -17906,7 +17906,7 @@ INPUT select export_set(3, _latin1'foo', _utf8'bar', ',', 4); END OUTPUT -select export_set(3, _latin1 'foo', _utf8 'bar', ',', 4) from dual +select export_set(3, _latin1 'foo', _utf8mb3 'bar', ',', 4) from dual END INPUT select a,hex(a) from t1; @@ -18116,7 +18116,7 @@ INPUT select database() = _utf8"test"; END OUTPUT -select database() = _utf8 'test' from dual +select database() = _utf8mb3 'test' from dual END INPUT select collation(char(123)), collation(char(123 using binary)); @@ -18746,7 +18746,7 @@ INPUT select charset(charset(_utf8'a')), charset(collation(_utf8'a')); END OUTPUT -select charset(charset(_utf8 'a')), charset(collation(_utf8 'a')) from dual +select charset(charset(_utf8mb3 'a')), charset(collation(_utf8mb3 'a')) from dual END INPUT select * from `information_schema`.`key_column_usage` where `TABLE_NAME` = NULL; @@ -18944,7 +18944,7 @@ INPUT select i from t1 where b=repeat(_utf8 'b',310); END OUTPUT -select i from t1 where b = repeat(_utf8 'b', 310) +select i from t1 where b = repeat(_utf8mb3 'b', 310) END INPUT select * from t1 where not(not(a)); @@ -18962,13 +18962,13 @@ INPUT select ifnull(NULL, _utf8'string'); END OUTPUT -select ifnull(null, _utf8 'string') from dual +select ifnull(null, _utf8mb3 'string') from dual END INPUT select hex(_utf8 B'001111111111'); END OUTPUT -select hex(_utf8 B'001111111111') from dual +select hex(_utf8mb3 B'001111111111') from dual END INPUT select right('hello', -18446744073709551615); @@ -19430,7 +19430,7 @@ INPUT select t1.*,t2.* from t1 left join t2 on (t1.b=t2.b) where collation(t2.a) = _utf8'binary' order by t1.a,t2.a; END OUTPUT -select t1.*, t2.* from t1 left join t2 on t1.b = t2.b where collation(t2.a) = _utf8 'binary' order by t1.a asc, t2.a asc +select t1.*, t2.* from t1 left join t2 on t1.b = t2.b where collation(t2.a) = _utf8mb3 'binary' order by t1.a asc, t2.a asc END INPUT select * from t1 where i = 2; @@ -19646,7 +19646,7 @@ INPUT select greatest(1,_utf16'.',_utf8''); END OUTPUT -select greatest(1, _utf16 '.', _utf8 '') from dual +select greatest(1, _utf16 '.', _utf8mb3 '') from dual END INPUT select round(1e1,308), truncate(1e1, 308); @@ -20552,7 +20552,7 @@ INPUT select soundex(_utf8 0xD091D092D093); END OUTPUT -select soundex(_utf8 0xD091D092D093) from dual +select soundex(_utf8mb3 0xD091D092D093) from dual END INPUT select sum(a) from t1 where a > 10; @@ -20840,7 +20840,7 @@ INPUT select repeat(_utf8'+',3) as h union select NULL; END OUTPUT -select repeat(_utf8 '+', 3) as h from dual union select null from dual +select repeat(_utf8mb3 '+', 3) as h from dual union select null from dual END INPUT select fld1,fld3 FROM t2 where fld1 like "25050%"; @@ -20978,7 +20978,7 @@ INPUT select hex(_utf8 0x616263FF); END OUTPUT -select hex(_utf8 0x616263FF) from dual +select hex(_utf8mb3 0x616263FF) from dual END INPUT select avg(a) as x from t1 having x=2; diff --git a/go/vt/sqlparser/testdata/union_cases.txt b/go/vt/sqlparser/testdata/union_cases.txt index 529ebdb5efd..8e2def0e04e 100644 --- a/go/vt/sqlparser/testdata/union_cases.txt +++ b/go/vt/sqlparser/testdata/union_cases.txt @@ -572,7 +572,7 @@ INPUT select repeat(_utf8'+',3) as h union select NULL; END OUTPUT -select repeat(_utf8 '+', 3) as h from dual union select null from dual +select repeat(_utf8mb3 '+', 3) as h from dual union select null from dual END INPUT SELECT * FROM t1 UNION SELECT /*+ MAX_EXECUTION_TIME(0) */ * FROM t1; diff --git a/go/vt/vtgate/evalengine/fn_string.go b/go/vt/vtgate/evalengine/fn_string.go index 7146ac03b68..211e0c46fd6 100644 --- a/go/vt/vtgate/evalengine/fn_string.go +++ b/go/vt/vtgate/evalengine/fn_string.go @@ -431,7 +431,7 @@ func (c *builtinCollation) eval(env *ExpressionEnv) (eval, error) { col := evalCollation(arg).Collation.Get() - // the collation of a `COLLATION` expr is hardcoded to `utf8_general_ci`, + // the collation of a `COLLATION` expr is hardcoded to `utf8mb3_general_ci`, // not to the default collation of our connection. this is probably a bug in MySQL, but we match it return newEvalText([]byte(col.Name()), collationUtf8mb3), nil } diff --git a/go/vt/vtgate/executor_test.go b/go/vt/vtgate/executor_test.go index 3a0ce80a2e9..bf7f1eac211 100644 --- a/go/vt/vtgate/executor_test.go +++ b/go/vt/vtgate/executor_test.go @@ -644,49 +644,50 @@ func TestExecutorShow(t *testing.T) { _, err = executor.Execute(ctx, nil, "TestExecute", session, fmt.Sprintf("show full columns from unknown from %v", KsTestUnsharded), nil) require.NoError(t, err) - for _, query := range []string{"show charset", "show character set"} { + for _, query := range []string{"show charset like 'utf8%'", "show character set like 'utf8%'"} { qr, err := executor.Execute(ctx, nil, "TestExecute", session, query, nil) require.NoError(t, err) wantqr := &sqltypes.Result{ - Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), + Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Uint32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), Rows: [][]sqltypes.Value{ append(buildVarCharRow( - "utf8", + "utf8mb3", "UTF-8 Unicode", - "utf8_general_ci"), sqltypes.NewInt32(3)), + "utf8mb3_general_ci"), + sqltypes.NewUint32(3)), append(buildVarCharRow( "utf8mb4", "UTF-8 Unicode", - "utf8mb4_general_ci"), - sqltypes.NewInt32(4)), + collations.Default().Get().Name()), + sqltypes.NewUint32(4)), }, } utils.MustMatch(t, wantqr, qr, query) } - for _, query := range []string{"show charset like '%foo'", "show character set like 'foo%'", "show charset like 'foo%'", "show character set where foo like 'utf8'", "show character set where charset like '%foo'", "show charset where charset = '%foo'"} { + for _, query := range []string{"show charset like '%foo'", "show character set like 'foo%'", "show charset like 'foo%'", "show character set where charset like '%foo'", "show charset where charset = '%foo'"} { qr, err := executor.Execute(ctx, nil, "TestExecute", session, query, nil) require.NoError(t, err) wantqr := &sqltypes.Result{ - Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), - Rows: [][]sqltypes.Value{}, + Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Uint32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), RowsAffected: 0, } utils.MustMatch(t, wantqr, qr, query) } - for _, query := range []string{"show charset like 'utf8'", "show character set like 'utf8'", "show charset where charset = 'utf8'", "show character set where charset = 'utf8'"} { + for _, query := range []string{"show charset like 'utf8mb3'", "show character set like 'utf8mb3'", "show charset where charset = 'utf8mb3'", "show character set where charset = 'utf8mb3'"} { qr, err := executor.Execute(ctx, nil, "TestExecute", session, query, nil) require.NoError(t, err) wantqr := &sqltypes.Result{ - Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), + Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Uint32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), Rows: [][]sqltypes.Value{ append(buildVarCharRow( - "utf8", + "utf8mb3", "UTF-8 Unicode", - "utf8_general_ci"), sqltypes.NewInt32(3)), + "utf8mb3_general_ci"), + sqltypes.NewUint32(3)), }, } @@ -697,18 +698,23 @@ func TestExecutorShow(t *testing.T) { qr, err := executor.Execute(ctx, nil, "TestExecute", session, query, nil) require.NoError(t, err) wantqr := &sqltypes.Result{ - Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), + Fields: append(buildVarCharFields("Charset", "Description", "Default collation"), &querypb.Field{Name: "Maxlen", Type: sqltypes.Uint32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)}), Rows: [][]sqltypes.Value{ append(buildVarCharRow( "utf8mb4", "UTF-8 Unicode", - "utf8mb4_general_ci"), - sqltypes.NewInt32(4)), + collations.Default().Get().Name()), + sqltypes.NewUint32(4)), }, } utils.MustMatch(t, wantqr, qr, query) } + for _, query := range []string{"show character set where foo like '%foo'"} { + _, err := executor.Execute(ctx, nil, "TestExecute", session, query, nil) + require.Error(t, err) + } + query = "show engines" qr, err = executor.Execute(ctx, nil, "TestExecute", session, query, nil) require.NoError(t, err) diff --git a/go/vt/vtgate/planbuilder/set.go b/go/vt/vtgate/planbuilder/set.go index 8508a791d41..7b1e584132d 100644 --- a/go/vt/vtgate/planbuilder/set.go +++ b/go/vt/vtgate/planbuilder/set.go @@ -261,7 +261,7 @@ func extractValue(expr *sqlparser.SetExpr, boolean bool) (string, error) { } case *sqlparser.ColName: // this is a little of a hack. it's used when the setting is not a normal expression, but rather - // an enumeration, such as utf8, utf8mb4, etc + // an enumeration, such as utf8mb3, utf8mb4, etc switch node.Name.Lowered() { case "on": return "1", nil diff --git a/go/vt/vtgate/planbuilder/show.go b/go/vt/vtgate/planbuilder/show.go index 27b184b2bcc..a7cc0159aa9 100644 --- a/go/vt/vtgate/planbuilder/show.go +++ b/go/vt/vtgate/planbuilder/show.go @@ -21,6 +21,7 @@ import ( "regexp" "sort" "strings" + "sync" "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" @@ -29,6 +30,7 @@ import ( querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vschemapb "vitess.io/vitess/go/vt/proto/vschema" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sidecardb" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vterrors" @@ -39,9 +41,6 @@ import ( ) const ( - utf8 = "utf8" - utf8mb4 = "utf8mb4" - both = "both" charset = "charset" ) @@ -134,16 +133,13 @@ func buildShowTargetPlan(vschema plancontext.VSchema) (engine.Primitive, error) func buildCharsetPlan(show *sqlparser.ShowBasic) (engine.Primitive, error) { fields := buildVarCharFields("Charset", "Description", "Default collation") - maxLenField := &querypb.Field{Name: "Maxlen", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)} + maxLenField := &querypb.Field{Name: "Maxlen", Type: sqltypes.Uint32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_NOT_NULL_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NO_DEFAULT_VALUE_FLAG)} fields = append(fields, maxLenField) - - charsets := []string{utf8, utf8mb4} - rows, err := generateCharsetRows(show.Filter, charsets) + cs, err := generateCharsetRows(show.Filter) if err != nil { return nil, err } - - return engine.NewRowsPrimitive(rows, fields), nil + return engine.NewRowsPrimitive(cs, fields), nil } func buildSendAnywherePlan(show *sqlparser.ShowBasic, vschema plancontext.VSchema) (engine.Primitive, error) { @@ -355,20 +351,13 @@ func buildVarCharRow(values ...string) []sqltypes.Value { return row } -func generateCharsetRows(showFilter *sqlparser.ShowFilter, colNames []string) ([][]sqltypes.Value, error) { +func generateCharsetRows(showFilter *sqlparser.ShowFilter) ([][]sqltypes.Value, error) { if showFilter == nil { - return buildCharsetRows(both), nil + return charsets(), nil } - var filteredColName string - var err error - if showFilter.Like != "" { - filteredColName, err = checkLikeOpt(showFilter.Like, colNames) - if err != nil { - return nil, err - } - + return filterLike(showFilter.Like, charsets()) } else { cmpExp, ok := showFilter.Filter.(*sqlparser.ComparisonExpr) if !ok { @@ -390,61 +379,84 @@ func generateCharsetRows(showFilter *sqlparser.ShowFilter, colNames []string) ([ switch cmpExp.Operator { case sqlparser.EqualOp: - for _, colName := range colNames { + for _, row := range charsets() { + colName := row[0].ToString() if rightString == colName { - filteredColName = colName + return [][]sqltypes.Value{row}, nil } } + return nil, nil case sqlparser.LikeOp: - filteredColName, err = checkLikeOpt(rightString, colNames) - if err != nil { - return nil, err - } + return filterLike(rightString, charsets()) } + } else { + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.BadFieldError, "Unknown column '%s' in 'where clause'", left.Name.String()) } - } - return buildCharsetRows(filteredColName), nil + return charsets(), nil } -func buildCharsetRows(colName string) [][]sqltypes.Value { - row0 := buildVarCharRow( - "utf8", - "UTF-8 Unicode", - "utf8_general_ci") - row0 = append(row0, sqltypes.NewInt32(3)) - row1 := buildVarCharRow( - "utf8mb4", - "UTF-8 Unicode", - "utf8mb4_general_ci") - row1 = append(row1, sqltypes.NewInt32(4)) - - switch colName { - case utf8: - return [][]sqltypes.Value{row0} - case utf8mb4: - return [][]sqltypes.Value{row1} - case both: - return [][]sqltypes.Value{row0, row1} - } - - return [][]sqltypes.Value{} +var once sync.Once +var charsetRows [][]sqltypes.Value + +func charsets() [][]sqltypes.Value { + once.Do(func() { + charsetRows = [][]sqltypes.Value{ + append(buildVarCharRow("armscii8", "ARMSCII-8 Armenian", "armscii8_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("ascii", "US ASCII", "ascii_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("binary", "Binary pseudo charset", "binary"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp1250", "Windows Central European", "cp1250_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp1251", "Windows Cyrillic", "cp1251_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp1256", "Windows Arabic", "cp1256_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp1257", "Windows Baltic", "cp1257_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp850", "DOS West European", "cp850_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp852", "DOS Central European", "cp852_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp866", "DOS Russian", "cp866_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("cp932", "SJIS for Windows Japanese", "cp932_japanese_ci"), sqltypes.NewUint32(2)), + append(buildVarCharRow("dec8", "DEC West European", "dec8_swedish_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("eucjpms", "UJIS for Windows Japanese", "eucjpms_japanese_ci"), sqltypes.NewUint32(3)), + append(buildVarCharRow("euckr", "EUC-KR Korean", "euckr_korean_ci"), sqltypes.NewUint32(2)), + append(buildVarCharRow("gb2312", "GB2312 Simplified Chinese", "gb2312_chinese_ci"), sqltypes.NewUint32(2)), + append(buildVarCharRow("geostd8", "GEOSTD8 Georgian", "geostd8_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("greek", "ISO 8859-7 Greek", "greek_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("hebrew", "ISO 8859-8 Hebrew", "hebrew_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("hp8", "HP West European", "hp8_english_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("keybcs2", "DOS Kamenicky Czech-Slovak", "keybcs2_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("koi8r", "KOI8-R Relcom Russian", "koi8r_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("koi8u", "KOI8-U Ukrainian", "koi8u_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("latin1", "cp1252 West European", "latin1_swedish_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("latin2", "ISO 8859-2 Central European", "latin2_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("latin5", "ISO 8859-9 Turkish", "latin5_turkish_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("latin7", "ISO 8859-13 Baltic", "latin7_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("macce", "Mac Central European", "macce_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("macroman", "Mac West European", "macroman_general_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("sjis", "Shift-JIS Japanese", "sjis_japanese_ci"), sqltypes.NewUint32(2)), + append(buildVarCharRow("swe7", "7bit Swedish", "swe7_swedish_ci"), sqltypes.NewUint32(1)), + append(buildVarCharRow("ucs2", "UCS-2 Unicode", "ucs2_general_ci"), sqltypes.NewUint32(2)), + append(buildVarCharRow("ujis", "EUC-JP Japanese", "ujis_japanese_ci"), sqltypes.NewUint32(3)), + append(buildVarCharRow("utf16", "UTF-16 Unicode", "utf16_general_ci"), sqltypes.NewUint32(4)), + append(buildVarCharRow("utf16le", "UTF-16LE Unicode", "utf16le_general_ci"), sqltypes.NewUint32(4)), + append(buildVarCharRow("utf32", "UTF-32 Unicode", "utf32_general_ci"), sqltypes.NewUint32(4)), + append(buildVarCharRow("utf8mb3", "UTF-8 Unicode", "utf8mb3_general_ci"), sqltypes.NewUint32(3)), + append(buildVarCharRow("utf8mb4", "UTF-8 Unicode", "utf8mb4_0900_ai_ci"), sqltypes.NewUint32(4)), + } + }) + + return charsetRows } -func checkLikeOpt(likeOpt string, colNames []string) (string, error) { - likeRegexp := strings.ReplaceAll(likeOpt, "%", ".*") - for _, v := range colNames { - match, err := regexp.MatchString(likeRegexp, v) - if err != nil { - return "", err - } - if match { - return v, nil +func filterLike(likeOpt string, charsets [][]sqltypes.Value) ([][]sqltypes.Value, error) { + likeRegexp := sqlparser.LikeToRegexp(likeOpt) + var results [][]sqltypes.Value + for _, row := range charsets { + colName := row[0].ToString() + if likeRegexp.MatchString(colName) { + results = append(results, row) } } - return "", nil + return results, nil } func buildShowCreatePlan(show *sqlparser.ShowCreate, vschema plancontext.VSchema) (engine.Primitive, error) { diff --git a/go/vt/vtgate/planbuilder/show_test.go b/go/vt/vtgate/planbuilder/show_test.go index 5d84a77c0a9..3caae74bf27 100644 --- a/go/vt/vtgate/planbuilder/show_test.go +++ b/go/vt/vtgate/planbuilder/show_test.go @@ -23,6 +23,8 @@ import ( "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtgate/vindexes" @@ -61,56 +63,56 @@ func TestBuildDBPlan(t *testing.T) { } func TestGenerateCharsetRows(t *testing.T) { - rows := make([][]sqltypes.Value, 0, 4) rows0 := [][]sqltypes.Value{ append(buildVarCharRow( - "utf8", + "utf8mb3", "UTF-8 Unicode", - "utf8_general_ci"), - sqltypes.NewInt32(3)), + "utf8mb3_general_ci"), + sqltypes.NewUint32(3)), } rows1 := [][]sqltypes.Value{ append(buildVarCharRow( "utf8mb4", "UTF-8 Unicode", - "utf8mb4_general_ci"), - sqltypes.NewInt32(4)), + collations.Default().Get().Name()), + sqltypes.NewUint32(4)), } rows2 := [][]sqltypes.Value{ append(buildVarCharRow( - "utf8", + "utf8mb3", "UTF-8 Unicode", - "utf8_general_ci"), - sqltypes.NewInt32(3)), + "utf8mb3_general_ci"), + sqltypes.NewUint32(3)), append(buildVarCharRow( "utf8mb4", "UTF-8 Unicode", - "utf8mb4_general_ci"), - sqltypes.NewInt32(4)), + collations.Default().Get().Name()), + sqltypes.NewUint32(4)), } testcases := []struct { input string expected [][]sqltypes.Value }{ - {input: "show charset", expected: rows2}, - {input: "show character set", expected: rows2}, - {input: "show charset where charset like 'foo%'", expected: rows}, - {input: "show charset where charset like 'utf8%'", expected: rows0}, - {input: "show charset where charset = 'utf8'", expected: rows0}, - {input: "show charset where charset = 'foo%'", expected: rows}, + {input: "show charset", expected: charsets()}, + {input: "show character set", expected: charsets()}, + {input: "show charset where charset like 'foo%'", expected: nil}, + {input: "show charset where charset like 'utf8%'", expected: rows2}, + {input: "show charset where charset like 'utf8mb3%'", expected: rows0}, + {input: "show charset where charset like 'foo%'", expected: nil}, + {input: "show character set where charset like '%foo'", expected: nil}, + {input: "show charset where charset = 'utf8mb3'", expected: rows0}, + {input: "show charset where charset = 'foo%'", expected: nil}, {input: "show charset where charset = 'utf8mb4'", expected: rows1}, } - charsets := []string{"utf8", "utf8mb4"} - for _, tc := range testcases { t.Run(tc.input, func(t *testing.T) { stmt, err := sqlparser.Parse(tc.input) require.NoError(t, err) match := stmt.(*sqlparser.Show).Internal.(*sqlparser.ShowBasic) filter := match.Filter - actual, err := generateCharsetRows(filter, charsets) + actual, err := generateCharsetRows(filter) require.NoError(t, err) require.Equal(t, tc.expected, actual) }) diff --git a/go/vt/vtgate/planbuilder/testdata/show_cases.json b/go/vt/vtgate/planbuilder/testdata/show_cases.json index 84bbf3eb3ea..bd685baf71b 100644 --- a/go/vt/vtgate/planbuilder/testdata/show_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/show_cases.json @@ -165,9 +165,9 @@ "Charset": "VARCHAR", "Default collation": "VARCHAR", "Description": "VARCHAR", - "Maxlen": "INT32" + "Maxlen": "UINT32" }, - "RowCount": 2 + "RowCount": 37 } } },