Skip to content

Commit

Permalink
expression, util: add KeyWithoutTrimRightSpace for collator (#35475) (
Browse files Browse the repository at this point in the history
#35518)

close #35451
  • Loading branch information
ti-srebot authored Aug 3, 2022
1 parent 450db87 commit f28371f
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 8 deletions.
7 changes: 7 additions & 0 deletions expression/integration_serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -843,9 +843,16 @@ func TestCollateStringFunction(t *testing.T) {
tk.MustQuery("select locate('S', 'a' collate utf8mb4_general_ci);").Check(testkit.Rows("0"))
// MySQL return 0 here, I believe it is a bug in MySQL since 'ß' == 's' under utf8mb4_general_ci collation.
tk.MustQuery("select locate('ß', 's' collate utf8mb4_general_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('world', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("7"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("6"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("0"))

tk.MustQuery("select locate('S', 's' collate utf8mb4_unicode_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('S', 'a' collate utf8mb4_unicode_ci);").Check(testkit.Rows("0"))
tk.MustQuery("select locate('ß', 'ss' collate utf8mb4_unicode_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('world', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("7"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("6"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("0"))

tk.MustExec("truncate table t1;")
tk.MustExec("insert into t1 (a) values (1);")
Expand Down
8 changes: 4 additions & 4 deletions expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,8 @@ func SubstituteCorCol2Constant(expr Expression) (Expression, error) {

func locateStringWithCollation(str, substr, coll string) int64 {
collator := collate.GetCollator(coll)
strKey := collator.Key(str)
subStrKey := collator.Key(substr)
strKey := collator.KeyWithoutTrimRightSpace(str)
subStrKey := collator.KeyWithoutTrimRightSpace(substr)

index := bytes.Index(strKey, subStrKey)
if index == -1 || index == 0 {
Expand All @@ -405,8 +405,8 @@ func locateStringWithCollation(str, substr, coll string) int64 {
for {
r, size := utf8.DecodeRuneInString(str)
count += 1
index -= len(collator.Key(string(r)))
if index == 0 {
index -= len(collator.KeyWithoutTrimRightSpace(string(r)))
if index <= 0 {
return count + 1
}
str = str[size:]
Expand Down
10 changes: 10 additions & 0 deletions util/collate/bin.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ func (bc *binCollator) Key(str string) []byte {
return []byte(str)
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (bc *binCollator) KeyWithoutTrimRightSpace(str string) []byte {
return []byte(str)
}

// Pattern implements Collator interface.
func (bc *binCollator) Pattern() WildcardPattern {
return &binPattern{}
Expand All @@ -49,6 +54,11 @@ func (bpc *binPaddingCollator) Key(str string) []byte {
return []byte(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (bpc *binPaddingCollator) KeyWithoutTrimRightSpace(str string) []byte {
return []byte(str)
}

// Pattern implements Collator interface.
// Notice that trailing spaces are significant.
func (bpc *binPaddingCollator) Pattern() WildcardPattern {
Expand Down
2 changes: 2 additions & 0 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ type Collator interface {
Compare(a, b string) int
// Key returns the collate key for str. If the collation is padding, make sure the PadLen >= len(rune[]str) in opt.
Key(str string) []byte
// KeyWithoutTrimRightSpace returns the collate key for str. The difference with Key is str will not be trimed.
KeyWithoutTrimRightSpace(str string) []byte
// Pattern get a collation-aware WildcardPattern.
Pattern() WildcardPattern
}
Expand Down
6 changes: 5 additions & 1 deletion util/collate/gbk_bin.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ func (g *gbkBinCollator) Compare(a, b string) int {

// Key implement Collator interface.
func (g *gbkBinCollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return g.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (g *gbkBinCollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str))
for len(str) > 0 {
l := runeLen(str[0])
Expand Down
6 changes: 5 additions & 1 deletion util/collate/gbk_chinese_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ func (g *gbkChineseCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (g *gbkChineseCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return g.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (g *gbkChineseCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str)*2)
i := 0
r := rune(0)
Expand Down
6 changes: 5 additions & 1 deletion util/collate/general_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ func (gc *generalCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (gc *generalCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return gc.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implements Collator interface.
func (gc *generalCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str))
i := 0
r := rune(0)
Expand Down
5 changes: 5 additions & 0 deletions util/collate/pinyin_tidb_as_cs.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ func (py *zhPinyinTiDBASCSCollator) Key(str string) []byte {
panic("implement me")
}

// Collator interface, no implements now.
func (py *zhPinyinTiDBASCSCollator) KeyWithoutTrimRightSpace(str string) []byte {
panic("implement me")
}

// Collator interface, no implements now.
func (py *zhPinyinTiDBASCSCollator) Pattern() WildcardPattern {
panic("implement me")
Expand Down
6 changes: 5 additions & 1 deletion util/collate/unicode_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ func (uc *unicodeCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (uc *unicodeCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return uc.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implements Collator interface.
func (uc *unicodeCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str)*2)
r := rune(0)
si := 0 // decode index of s
Expand Down

0 comments on commit f28371f

Please sign in to comment.