Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

collation: add utf8mb4_zh_pinyin_tidb_as_cs collation interface #20504

Merged
merged 25 commits into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ebcccb6
add utf8mb4_general_zh_ci interface
xiongjiwei Oct 16, 2020
b60a3fd
fmt
xiongjiwei Oct 19, 2020
d50b27b
use parser method instead access collation directly
xiongjiwei Oct 19, 2020
64059f8
update parser
xiongjiwei Oct 19, 2020
971101e
Merge remote-tracking branch 'upstream/master' into pinyin-order
xiongjiwei Oct 19, 2020
fbe58f6
update parser
xiongjiwei Oct 19, 2020
61b834b
use collate to get collation id
xiongjiwei Oct 19, 2020
cecafd0
change test
xiongjiwei Oct 19, 2020
bd4a76d
update test
xiongjiwei Oct 19, 2020
ac5b989
update go.mod
xiongjiwei Oct 22, 2020
2be2d9d
Merge remote-tracking branch 'upstream/master' into pinyin-order
xiongjiwei Oct 22, 2020
6e5972f
Merge remote-tracking branch 'upstream/master' into pinyin-order
xiongjiwei Oct 22, 2020
961a626
change code style
xiongjiwei Oct 22, 2020
09fe5af
rename file name
xiongjiwei Oct 23, 2020
4f634ea
move const number
xiongjiwei Oct 23, 2020
91db9f5
update license
xiongjiwei Oct 23, 2020
ebd5321
Merge branch 'master' into pinyin-order
xiongjiwei Oct 27, 2020
a705702
address comments
xiongjiwei Oct 28, 2020
31c2182
Merge branch 'master' into pinyin-order
xiongjiwei Oct 28, 2020
7cdd8ee
rename file
xiongjiwei Oct 28, 2020
7cb682c
remove dup log
xiongjiwei Oct 28, 2020
bc80c35
Merge branch 'master' into pinyin-order
xiongjiwei Oct 29, 2020
d32f607
rename collator
xiongjiwei Oct 29, 2020
46a7270
Merge branch 'master' into pinyin-order
ti-srebot Nov 3, 2020
dc9c281
Merge branch 'master' into pinyin-order
ti-srebot Nov 3, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddl/serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,7 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) {
tk.MustExec("alter table t collate utf8mb4_general_ci")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_zh_pinyin_tidb_as_cs")
// Change the default collation of database is allowed.
tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci")
}
Expand Down
1 change: 1 addition & 0 deletions executor/seqtest/seq_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,7 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) {
"utf8mb4_bin utf8mb4 46 Yes Yes 1",
"utf8mb4_general_ci utf8mb4 45 Yes 1",
"utf8mb4_unicode_ci utf8mb4 224 Yes 1",
"utf8mb4_zh_pinyin_tidb_as_cs utf8mb4 2048 Yes 1",
)
tk.MustQuery("show collation").Check(expectRows)
tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows)
Expand Down
1 change: 1 addition & 0 deletions expression/distsql_builtin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) {
{"some_error_collation", "utf8mb4_bin", 46, 46},
{"utf8_unicode_ci", "utf8_unicode_ci", 192, 192},
{"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224},
{"utf8mb4_zh_pinyin_tidb_as_cs", "utf8mb4_zh_pinyin_tidb_as_cs", 2048, 2048},
}

for _, cs := range cases {
Expand Down
11 changes: 6 additions & 5 deletions expression/expr_to_pb.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/parser/charset"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/sessionctx/stmtctx"
Expand Down Expand Up @@ -174,8 +175,8 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType {
}

func collationToProto(c string) int32 {
if v, ok := mysql.CollationNames[c]; ok {
return collate.RewriteNewCollationIDIfNeeded(int32(v))
if coll, err := charset.GetCollationByName(c); err == nil {
return collate.RewriteNewCollationIDIfNeeded(int32(coll.ID))
}
v := collate.RewriteNewCollationIDIfNeeded(int32(mysql.DefaultCollationID))
logutil.BgLogger().Warn(
Expand All @@ -188,9 +189,9 @@ func collationToProto(c string) int32 {
}

func protoToCollation(c int32) string {
v, ok := mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(c))]
if ok {
return v
coll, err := charset.GetCollationByID(int(collate.RestoreCollationIDIfNeeded(c)))
if err == nil {
return coll.Name
}
logutil.BgLogger().Warn(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log the err

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should log it as a warning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The err and the original log say the same thing and the original log have more detail. It's better to use the original code.

"Unable to get collation name from ID, use name of the default collation instead",
Expand Down
2 changes: 2 additions & 0 deletions expression/expr_to_pb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 7), "utf8mb4_zh_pinyin_tidb_as_cs"))
pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified)
c.Assert(len(pushed), Equals, len(colExprs))
pbExprs, err := ExpressionsToPBList(sc, colExprs, client)
Expand All @@ -800,6 +801,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
"{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAY=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-192,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAc=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-2048,\"charset\":\"\"}}",
}
for i, pbExpr := range pbExprs {
c.Assert(pbExprs, NotNil)
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module github.com/pingcap/tidb

require (
cloud.google.com/go v0.51.0 // indirect
github.com/BurntSushi/toml v0.3.1
github.com/HdrHistogram/hdrhistogram-go v0.9.0 // indirect
github.com/Jeffail/gabs/v2 v2.5.1
Expand Down
3 changes: 0 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxK
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go v0.51.0 h1:PvKAVQWCtlGUSlZkGW3QLelKaWq7KYv/MW1EboG8bfM=
cloud.google.com/go v0.51.0/go.mod h1:hWtGJ6gnXH+KgDv+V0zFGDvpi07n3z8ZNj3T1RW0Gcw=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
Expand Down Expand Up @@ -217,7 +215,6 @@ github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPg
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200407044318-7d83b28da2e9 h1:K+lX49/3eURCE1IjlaZN//u6c+9nfDAMnyQ9E2dsJbY=
github.com/google/pprof v0.0.0-20200407044318-7d83b28da2e9/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
Expand Down
2 changes: 1 addition & 1 deletion store/mockstore/mocktikv/cop_handler_dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,6 @@ func fieldTypeFromPBColumn(col *tipb.ColumnInfo) *types.FieldType {
Flen: int(col.GetColumnLen()),
Decimal: int(col.GetDecimal()),
Elems: col.Elems,
Collate: mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(col.GetCollation()))],
Collate: collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(col.GetCollation())),
}
}
2 changes: 1 addition & 1 deletion store/mockstore/unistore/cophandler/cop_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ func fieldTypeFromPBColumn(col *tipb.ColumnInfo) *types.FieldType {
Flen: int(col.GetColumnLen()),
Decimal: int(col.GetDecimal()),
Elems: col.Elems,
Collate: mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(col.GetCollation()))],
Collate: collate.CollationID2Name(collate.RestoreCollationIDIfNeeded(col.GetCollation())),
}
}

Expand Down
79 changes: 66 additions & 13 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,17 @@ var (
ErrIllegalMix3Collation = dbterror.ClassExpression.NewStd(mysql.ErrCantAggregate3collations)
)

// DefaultLen is set for datum if the string datum don't know its length.
const (
// DefaultLen is set for datum if the string datum don't know its length.
DefaultLen = 0
// first byte of a 2-byte encoding starts 110 and carries 5 bits of data
b2Mask = 0x1F // 0001 1111
// first byte of a 3-byte encoding starts 1110 and carries 4 bits of data
b3Mask = 0x0F // 0000 1111
// first byte of a 4-byte encoding starts 11110 and carries 3 bits of data
b4Mask = 0x07 // 0000 0111
// non-first bytes start 10 and carry 6 bits of data
mbMask = 0x3F // 0011 1111
)

// Collator provides functionality for comparing strings for a given
Expand Down Expand Up @@ -164,16 +172,25 @@ func GetCollatorByID(id int) Collator {
// CollationID2Name return the collation name by the given id.
// If the id is not found in the map, the default collation is returned.
func CollationID2Name(id int32) string {
name, ok := mysql.Collations[uint8(id)]
if !ok {
collation, err := charset.GetCollationByID(int(id))
if err != nil {
// TODO(bb7133): fix repeating logs when the following code is uncommented.
//logutil.BgLogger().Warn(
// "Unable to get collation name from ID, use default collation instead.",
// zap.Int32("ID", id),
// zap.Stack("stack"))
return mysql.DefaultCollationName
}
return name
return collation.Name
}

// CollationName2ID return the collation id by the given name.
// If the name is not found in the map, the default collation id is returned
func CollationName2ID(name string) int {
if coll, err := charset.GetCollationByName(name); err == nil {
return coll.ID
}
return mysql.DefaultCollationID
}

// GetCollationByName wraps charset.GetCollationByName, it checks the collation.
Expand Down Expand Up @@ -221,6 +238,40 @@ func truncateTailingSpace(str string) string {
return str
}

func sign(i int) int {
if i < 0 {
return -1
} else if i > 0 {
return 1
}
return 0
}

// decode rune by hand
func decodeRune(s string, si int) (r rune, newIndex int) {
switch b := s[si]; {
case b < 0x80:
r = rune(b)
newIndex = si + 1
case b < 0xE0:
r = rune(b&b2Mask)<<6 |
rune(s[1+si]&mbMask)
newIndex = si + 2
case b < 0xF0:
r = rune(b&b3Mask)<<12 |
rune(s[si+1]&mbMask)<<6 |
rune(s[si+2]&mbMask)
newIndex = si + 3
default:
r = rune(b&b4Mask)<<18 |
rune(s[si+1]&mbMask)<<12 |
rune(s[si+2]&mbMask)<<6 |
rune(s[si+3]&mbMask)
newIndex = si + 4
}
return
}

// IsCICollation returns if the collation is case-sensitive
func IsCICollation(collate string) bool {
return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" ||
Expand All @@ -232,21 +283,23 @@ func init() {
newCollatorIDMap = make(map[int]Collator)

newCollatorMap["binary"] = &binCollator{}
newCollatorIDMap[int(mysql.CollationNames["binary"])] = &binCollator{}
newCollatorIDMap[CollationName2ID("binary")] = &binCollator{}
newCollatorMap["ascii_bin"] = &binPaddingCollator{}
newCollatorIDMap[int(mysql.CollationNames["ascii_bin"])] = &binPaddingCollator{}
newCollatorIDMap[CollationName2ID("ascii_bin")] = &binPaddingCollator{}
newCollatorMap["latin1_bin"] = &binPaddingCollator{}
newCollatorIDMap[int(mysql.CollationNames["latin1_bin"])] = &binPaddingCollator{}
newCollatorIDMap[CollationName2ID("latin1_bin")] = &binPaddingCollator{}
newCollatorMap["utf8mb4_bin"] = &binPaddingCollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_bin"])] = &binPaddingCollator{}
newCollatorIDMap[CollationName2ID("utf8mb4_bin")] = &binPaddingCollator{}
newCollatorMap["utf8_bin"] = &binPaddingCollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_bin"])] = &binPaddingCollator{}
newCollatorIDMap[CollationName2ID("utf8_bin")] = &binPaddingCollator{}
newCollatorMap["utf8mb4_general_ci"] = &generalCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_general_ci"])] = &generalCICollator{}
newCollatorIDMap[CollationName2ID("utf8mb4_general_ci")] = &generalCICollator{}
newCollatorMap["utf8_general_ci"] = &generalCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_general_ci"])] = &generalCICollator{}
newCollatorIDMap[CollationName2ID("utf8_general_ci")] = &generalCICollator{}
newCollatorMap["utf8mb4_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_unicode_ci"])] = &unicodeCICollator{}
newCollatorIDMap[CollationName2ID("utf8mb4_unicode_ci")] = &unicodeCICollator{}
newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_unicode_ci"])] = &unicodeCICollator{}
newCollatorIDMap[CollationName2ID("utf8_unicode_ci")] = &unicodeCICollator{}
newCollatorMap["utf8mb4_zh_pinyin_tidb_as_cs"] = &zhPinyinTiDBASCS{}
newCollatorIDMap[CollationName2ID("utf8mb4_zh_pinyin_tidb_as_cs")] = &zhPinyinTiDBASCS{}
}
4 changes: 4 additions & 0 deletions util/collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &zhPinyinTiDBASCS{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{})
Expand All @@ -206,6 +207,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(2048), FitsTypeOf, &zhPinyinTiDBASCS{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{})

SetNewCollationEnabledForTest(false)
Expand All @@ -216,6 +218,7 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{})
Expand All @@ -224,5 +227,6 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollatorByID(33), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(2048), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binCollator{})
}
9 changes: 0 additions & 9 deletions util/collate/general_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,6 @@ import (
type generalCICollator struct {
}

func sign(i int) int {
if i < 0 {
return -1
} else if i > 0 {
return 1
}
return 0
}

// compilePatternGeneralCI handles escapes and wild cards, generate pattern weights and types.
// This function is modified from stringutil.CompilePattern.
func compilePatternGeneralCI(pattern string, escape byte) (patWeights []uint16, patTypes []byte) {
Expand Down
32 changes: 32 additions & 0 deletions util/collate/pinyin_tidb_as_cs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package collate

type zhPinyinTiDBASCS struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does 'ASCS' mean exactly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rename it to zhPinyinTiDBASCSCollator and add comments. 'ASCS' for accent-sensitive and case-sensitive

}

// Collator interface, no implements now.
func (g zhPinyinTiDBASCS) Compare(a, b string) int {
panic("implement me")
}

// Collator interface, no implements now.
func (g zhPinyinTiDBASCS) Key(str string) []byte {
panic("implement me")
}

// Collator interface, no implements now.
func (g zhPinyinTiDBASCS) Pattern() WildcardPattern {
panic("implement me")
}
36 changes: 0 additions & 36 deletions util/collate/unicode_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,8 @@ import (
const (
// magic number indicate weight has 2 uint64, should get from `longRuneMap`
longRune uint64 = 0xFFFD
// first byte of a 2-byte encoding starts 110 and carries 5 bits of data
b2Mask = 0x1F // 0001 1111

// first byte of a 3-byte encoding starts 1110 and carries 4 bits of data
b3Mask = 0x0F // 0000 1111

// first byte of a 4-byte encoding starts 11110 and carries 3 bits of data
b4Mask = 0x07 // 0000 0111

// non-first bytes start 10 and carry 6 bits of data
mbMask = 0x3F // 0011 1111
)

// decode rune by hand
func decodeRune(s string, si int) (r rune, newIndex int) {
switch b := s[si]; {
case b < 0x80:
r = rune(b)
newIndex = si + 1
case b < 0xE0:
r = rune(b&b2Mask)<<6 |
rune(s[1+si]&mbMask)
newIndex = si + 2
case b < 0xF0:
r = rune(b&b3Mask)<<12 |
rune(s[si+1]&mbMask)<<6 |
rune(s[si+2]&mbMask)
newIndex = si + 3
default:
r = rune(b&b4Mask)<<18 |
rune(s[si+1]&mbMask)<<12 |
rune(s[si+2]&mbMask)<<6 |
rune(s[si+3]&mbMask)
newIndex = si + 4
}
return
}

// unicodeCICollator implements UCA. see http://unicode.org/reports/tr10/
type unicodeCICollator struct {
}
Expand Down