From f2152d964b379d0a8c54349cf8a6dd716d60eb4a Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Thu, 20 Jul 2023 11:47:41 +0200 Subject: [PATCH 1/2] vreplication: Move to use collations package The logic here for transcoding values was still using the Go transcoding layer even though we have our own collations implementation which is much more complete. This moves logic over to our internal package and removes the Go transcoders. Saves also about a quarter of an MB in binary size since we don't need to include the Go character maps anymore. Signed-off-by: Dirkjan Bussink --- go/mysql/constants.go | 31 ------------------- go/vt/vttablet/onlineddl/vrepl.go | 24 +++++++++----- .../vreplication/replicator_plan.go | 20 +++++------- 3 files changed, 23 insertions(+), 52 deletions(-) diff --git a/go/mysql/constants.go b/go/mysql/constants.go index 17fc17057f0..50806147af4 100644 --- a/go/mysql/constants.go +++ b/go/mysql/constants.go @@ -20,10 +20,6 @@ import ( "strconv" "strings" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/charmap" - "golang.org/x/text/encoding/simplifiedchinese" - "vitess.io/vitess/go/mysql/binlog" ) @@ -660,33 +656,6 @@ const ( SSQueryInterrupted = "70100" ) -// CharacterSetEncoding maps a charset name to a golang encoder. -// golang does not support encoders for all MySQL charsets. -// A charset not in this map is unsupported. -// A trivial encoding (e.g. utf8) has a `nil` encoder -var CharacterSetEncoding = map[string]encoding.Encoding{ - "cp850": charmap.CodePage850, - "koi8r": charmap.KOI8R, - "latin1": charmap.Windows1252, - "latin2": charmap.ISO8859_2, - "ascii": nil, - "hebrew": charmap.ISO8859_8, - "greek": charmap.ISO8859_7, - "cp1250": charmap.Windows1250, - "gbk": simplifiedchinese.GBK, - "latin5": charmap.ISO8859_9, - "utf8": nil, - "utf8mb3": nil, - "cp866": charmap.CodePage866, - "cp852": charmap.CodePage852, - "latin7": charmap.ISO8859_13, - "utf8mb4": nil, - "cp1251": charmap.Windows1251, - "cp1256": charmap.Windows1256, - "cp1257": charmap.Windows1257, - "binary": nil, -} - // IsNum returns true if a MySQL type is a numeric value. // It is the same as IS_NUM defined in mysql.h. func IsNum(typ uint8) bool { diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 5b31b7663cf..1264465d4c3 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -31,7 +31,8 @@ import ( "strconv" "strings" - "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/charset" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/textutil" "vitess.io/vitess/go/vt/binlog/binlogplayer" @@ -497,20 +498,19 @@ func (v *VRepl) generateFilterQuery(ctx context.Context) error { case sourceCol.Type == vrepl.StringColumnType: // Check source and target charset/encoding. If needed, create // a binlogdatapb.CharsetConversion entry (later written to vreplication) - fromEncoding, ok := mysql.CharacterSetEncoding[sourceCol.Charset] - if !ok { + fromCollation := collations.Local().DefaultCollationForCharset(sourceCol.Charset) + if fromCollation == nil { return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", sourceCol.Charset, sourceCol.Name) } - toEncoding, ok := mysql.CharacterSetEncoding[targetCol.Charset] + toCollation := collations.Local().DefaultCollationForCharset(targetCol.Charset) // Let's see if target col is at all textual - if targetCol.Type == vrepl.StringColumnType && !ok { + if targetCol.Type == vrepl.StringColumnType && toCollation == nil { return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", targetCol.Charset, targetCol.Name) } - if fromEncoding == nil && toEncoding == nil && targetCol.Type != vrepl.JSONColumnType { - // Both source and target have trivial charsets + + if trivialCharset(fromCollation) && trivialCharset(toCollation) && targetCol.Type != vrepl.JSONColumnType { sb.WriteString(escapeName(name)) } else { - // encoding can be nil for trivial charsets, like utf8, ascii, binary, etc. v.convertCharset[targetName] = &binlogdatapb.CharsetConversion{ FromCharset: sourceCol.Charset, ToCharset: targetCol.Charset, @@ -533,6 +533,14 @@ func (v *VRepl) generateFilterQuery(ctx context.Context) error { return nil } +func trivialCharset(c collations.Collation) bool { + if c == nil { + return true + } + utf8mb4Charset := charset.Charset_utf8mb4{} + return utf8mb4Charset.IsSuperset(c.Charset()) || c.ID() == collations.CollationBinaryID +} + func (v *VRepl) analyzeBinlogSource(ctx context.Context) { bls := &binlogdatapb.BinlogSource{ Keyspace: v.keyspace, diff --git a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go index b07933519a6..fc0f0149098 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go +++ b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go @@ -22,12 +22,12 @@ import ( "sort" "strings" + "vitess.io/vitess/go/mysql/collations/charset" "vitess.io/vitess/go/vt/vttablet" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/bytes2" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/collations" vjson "vitess.io/vitess/go/mysql/json" "vitess.io/vitess/go/sqltypes" @@ -317,21 +317,15 @@ func (tp *TablePlan) isOutsidePKRange(bindvars map[string]*querypb.BindVariable, func (tp *TablePlan) bindFieldVal(field *querypb.Field, val *sqltypes.Value) (*querypb.BindVariable, error) { if conversion, ok := tp.ConvertCharset[field.Name]; ok && !val.IsNull() { // Non-null string value, for which we have a charset conversion instruction - valString := val.ToString() - fromEncoding, encodingOK := mysql.CharacterSetEncoding[conversion.FromCharset] - if !encodingOK { + fromCollation := collations.Local().DefaultCollationForCharset(conversion.FromCharset) + if fromCollation == nil { return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", conversion.FromCharset, field.Name) } - if fromEncoding != nil { - // As reminder, encoding can be nil for trivial charsets, like utf8 or ascii. - // encoding will be non-nil for charsets like latin1, gbk, etc. - var err error - valString, err = fromEncoding.NewDecoder().String(valString) - if err != nil { - return nil, err - } + out, err := charset.Convert(nil, charset.Charset_utf8mb4{}, val.Raw(), fromCollation.Charset()) + if err != nil { + return nil, err } - return sqltypes.StringBindVariable(valString), nil + return sqltypes.StringBindVariable(string(out)), nil } if tp.ConvertIntToEnum[field.Name] && !val.IsNull() { // An integer converted to an enum. We must write the textual value of the int. i.e. 0 turns to '0' From 5310f10e0acb0a6267430e648134f7fee19238b4 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Thu, 20 Jul 2023 13:56:02 +0200 Subject: [PATCH 2/2] Remove GKB test Vitess does not actually support GBK, see https://vitess.io/docs/17.0/user-guides/configuration-basic/collations/ and we now match the rest of Vitess here. Signed-off-by: Dirkjan Bussink --- .../testdata/gbk-charset/create.sql | 25 ------------------- .../testdata/gbk-charset/extra_args | 0 2 files changed, 25 deletions(-) delete mode 100644 go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql delete mode 100644 go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/extra_args diff --git a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql b/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql deleted file mode 100644 index b9a14cdc156..00000000000 --- a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql +++ /dev/null @@ -1,25 +0,0 @@ -drop table if exists onlineddl_test; -create table onlineddl_test ( - id int(11) NOT NULL AUTO_INCREMENT, - name varchar(512) DEFAULT NULL, - v varchar(255) DEFAULT NULL COMMENT '添加普通列测试', - PRIMARY KEY (id) -) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=gbk; - -insert into onlineddl_test values (null, 'gbk-test-initial', '添加普通列测试-添加普通列测试'); -insert into onlineddl_test values (null, 'gbk-test-initial', '添加普通列测试-添加普通列测试'); - -drop event if exists onlineddl_test; -delimiter ;; -create event onlineddl_test - on schedule every 1 second - starts current_timestamp - ends current_timestamp + interval 60 second - on completion not preserve - enable - do -begin - insert into onlineddl_test (name) values ('gbk-test-default'); - insert into onlineddl_test values (null, 'gbk-test', '添加普通列测试-添加普通列测试'); - update onlineddl_test set v='添加普通列测试' where v='添加普通列测试-添加普通列测试' order by id desc limit 1; -end ;; diff --git a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/extra_args b/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/extra_args deleted file mode 100644 index e69de29bb2d..00000000000