diff --git a/go/mysql/icuregex/icu_test.go b/go/mysql/icuregex/icu_test.go index 42c98dde5db..9e9be505df7 100644 --- a/go/mysql/icuregex/icu_test.go +++ b/go/mysql/icuregex/icu_test.go @@ -181,9 +181,7 @@ func (tp *TestPattern) parseMatch(orig string) error { func ParseTestFile(t testing.TB, filename string) []TestPattern { f, err := os.Open(filename) - if err != nil { - t.Fatalf("failed to open test data: %v", err) - } + require.NoError(t, err) defer f.Close() scanner := bufio.NewScanner(f) @@ -229,9 +227,8 @@ func ParseTestFile(t testing.TB, filename string) []TestPattern { patterns = append(patterns, tp) } - if err := scanner.Err(); err != nil { - t.Fatal(err) - } + err = scanner.Err() + require.NoError(t, err) return patterns } @@ -394,9 +391,7 @@ func TestCornerCases(t *testing.T) { for _, tc := range cases { t.Run(tc.Pattern, func(t *testing.T) { _, err := icuregex.CompileString(tc.Pattern, tc.Flags) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) }) } } @@ -407,9 +402,7 @@ func TestOne(t *testing.T) { const Flags = 0 re, err := icuregex.CompileString(Pattern, Flags) - if err != nil { - t.Fatalf("compilation failed: %v", err) - } + require.NoError(t, err) re.Dump(os.Stderr) diff --git a/go/mysql/icuregex/internal/ucase/loader.go b/go/mysql/icuregex/internal/ucase/loader.go index 83a6b6c59a7..2ac25cc0f6f 100644 --- a/go/mysql/icuregex/internal/ucase/loader.go +++ b/go/mysql/icuregex/internal/ucase/loader.go @@ -34,7 +34,6 @@ var ucaseOnce sync.Once var ucase struct { trie *utrie.UTrie2 exceptions []uint16 - unfold []uint16 } func trie() *utrie.UTrie2 { @@ -47,11 +46,6 @@ func exceptions() []uint16 { return ucase.exceptions } -func unfold() []uint16 { - loadUCase() - return ucase.unfold -} - func loadUCase() { ucaseOnce.Do(func() { b := udata.NewBytes(icudata.UCase) @@ -102,9 +96,6 @@ func readData(bytes *udata.Bytes) error { if n := indexes[ixExcLength]; n > 0 { ucase.exceptions = bytes.Uint16Slice(n) } - if n := indexes[ixUnfoldLength]; n > 0 { - ucase.unfold = bytes.Uint16Slice(n) - } return nil } diff --git a/go/mysql/icuregex/internal/uemoji/loader.go b/go/mysql/icuregex/internal/uemoji/loader.go index 392dabc5cb8..7015491d069 100644 --- a/go/mysql/icuregex/internal/uemoji/loader.go +++ b/go/mysql/icuregex/internal/uemoji/loader.go @@ -1,7 +1,27 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package uemoji import ( - "errors" "sync" "vitess.io/vitess/go/mysql/icuregex/internal/icudata" @@ -11,8 +31,7 @@ import ( var uemojiOnce sync.Once var uemoji struct { - trie *utrie.UcpTrie - stringTries []string + trie *utrie.UcpTrie } func loadUEmoji() { @@ -24,26 +43,11 @@ func loadUEmoji() { }) } -func stringTries() []string { - loadUEmoji() - return uemoji.stringTries -} - func trie() *utrie.UcpTrie { loadUEmoji() return uemoji.trie } -const ( - ixCpTrieOffset = 0 - ixBasicEmojiTrieOffset = 4 - ixRgiEmojiZwjSequenceTrieOffset = 9 -) - -func getStringTrieIndex(i int) int { - return i - ixBasicEmojiTrieOffset -} - func readData(bytes *udata.Bytes) error { err := bytes.ReadHeader(func(info *udata.DataInfo) bool { return info.DataFormat[0] == 0x45 && @@ -56,36 +60,10 @@ func readData(bytes *udata.Bytes) error { return err } - startPos := bytes.Position() - cpTrieOffset := bytes.Int32() - indexesLength := cpTrieOffset / 4 - if indexesLength <= ixRgiEmojiZwjSequenceTrieOffset { - return errors.New("not enough indexes") - } - inIndexes := make([]int32, indexesLength) - inIndexes[0] = cpTrieOffset - for i := 1; i < int(indexesLength); i++ { - inIndexes[i] = bytes.Int32() - } - - i := ixCpTrieOffset + 1 - nextOffset := inIndexes[i] + bytes.Skip(bytes.Int32() - 4) uemoji.trie, err = utrie.UcpTrieFromBytes(bytes) if err != nil { return err } - pos := bytes.Position() - startPos - bytes.Skip(nextOffset - pos) - offset := nextOffset - nextOffset = inIndexes[ixBasicEmojiTrieOffset] - bytes.Skip(nextOffset - offset) - uemoji.stringTries = make([]string, getStringTrieIndex(ixRgiEmojiZwjSequenceTrieOffset)+1) - for i = ixBasicEmojiTrieOffset; i <= ixRgiEmojiZwjSequenceTrieOffset; i++ { - offset = inIndexes[i] - nextOffset = inIndexes[i+1] - if nextOffset > offset { - uemoji.stringTries[getStringTrieIndex(i)] = string(bytes.Uint8Slice((nextOffset - offset) / 2)) - } - } return nil } diff --git a/go/mysql/icuregex/internal/uemoji/uemoji.go b/go/mysql/icuregex/internal/uemoji/uemoji.go index 2c1dbafed49..5cc89acd69a 100644 --- a/go/mysql/icuregex/internal/uemoji/uemoji.go +++ b/go/mysql/icuregex/internal/uemoji/uemoji.go @@ -1,3 +1,24 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package uemoji import ( diff --git a/go/mysql/icuregex/pattern.go b/go/mysql/icuregex/pattern.go index f0823a213d4..90e69b3f55d 100644 --- a/go/mysql/icuregex/pattern.go +++ b/go/mysql/icuregex/pattern.go @@ -59,14 +59,6 @@ func NewPattern(flags RegexpFlag) *Pattern { } } -func MustCompileString(in string, flags RegexpFlag) *Pattern { - pat, err := CompileString(in, flags) - if err != nil { - panic(err) - } - return pat -} - func Compile(in []rune, flags RegexpFlag) (*Pattern, error) { pat := NewPattern(flags) cmp := newCompiler(pat) @@ -77,12 +69,7 @@ func Compile(in []rune, flags RegexpFlag) (*Pattern, error) { } func CompileString(in string, flags RegexpFlag) (*Pattern, error) { - pat := NewPattern(flags) - cmp := newCompiler(pat) - if err := cmp.compile([]rune(in)); err != nil { - return nil, err - } - return pat, nil + return Compile([]rune(in), flags) } func (p *Pattern) Match(input string) *Matcher { diff --git a/go/mysql/icuregex/perl_test.go b/go/mysql/icuregex/perl_test.go index 0e7beda9fbd..e8dfc95d6b0 100644 --- a/go/mysql/icuregex/perl_test.go +++ b/go/mysql/icuregex/perl_test.go @@ -27,22 +27,25 @@ import ( "strconv" "strings" "testing" + + "github.com/stretchr/testify/require" ) func TestPerl(t *testing.T) { f, err := os.Open("testdata/re_tests.txt") - if err != nil { - t.Fatalf("failed to open test data: %v", err) - } + require.NoError(t, err) defer f.Close() - flagPat := MustCompileString(`('?)(.*)\1(.*)`, 0) + flagPat, err := CompileString(`('?)(.*)\1(.*)`, 0) + require.NoError(t, err) flagMat := NewMatcher(flagPat) - groupsPat := MustCompileString(`\$([+\-])\[(\d+)\]`, 0) + groupsPat, err := CompileString(`\$([+\-])\[(\d+)\]`, 0) + require.NoError(t, err) groupsMat := NewMatcher(groupsPat) - cgPat := MustCompileString(`\$(\d+)`, 0) + cgPat, err := CompileString(`\$(\d+)`, 0) + require.NoError(t, err) cgMat := NewMatcher(cgPat) group := func(m *Matcher, idx int) string { @@ -52,9 +55,7 @@ func TestPerl(t *testing.T) { lookingAt := func(m *Matcher) bool { ok, err := m.LookingAt() - if err != nil { - t.Fatalf("failed to match with LookingAt(): %v", err) - } + require.NoError(t, err) return ok } @@ -73,9 +74,7 @@ func TestPerl(t *testing.T) { flagMat.ResetString(fields[0]) ok, _ := flagMat.Matches() - if !ok { - t.Fatalf("could not match pattern+flags (line %d)", lineno) - } + require.Truef(t, ok, "could not match pattern+flags (line %d)", lineno) pattern, _ := flagMat.Group(2) pattern = replacer.Replace(pattern) @@ -142,9 +141,7 @@ func TestPerl(t *testing.T) { case lookingAt(groupsMat): groupNum, err := strconv.ParseInt(group(groupsMat, 2), 10, 32) - if err != nil { - t.Fatalf("failed to parse Perl pattern: %v", err) - } + require.NoError(t, err) var matchPosition int if group(groupsMat, 1) == "+" { @@ -160,9 +157,7 @@ func TestPerl(t *testing.T) { case lookingAt(cgMat): groupNum, err := strconv.ParseInt(group(cgMat, 1), 10, 32) - if err != nil { - t.Fatalf("failed to parse Perl pattern: %v", err) - } + require.NoError(t, err) result = append(result, group(testMat, int(groupNum))...) perlExpr = perlExpr[cgMat.EndForGroup(0):] diff --git a/go/mysql/icuregex/sets_test.go b/go/mysql/icuregex/sets_test.go index ffa6911341f..58da9882701 100644 --- a/go/mysql/icuregex/sets_test.go +++ b/go/mysql/icuregex/sets_test.go @@ -23,6 +23,8 @@ package icuregex import ( "testing" + + "github.com/stretchr/testify/assert" ) func TestStaticSetContents(t *testing.T) { @@ -43,9 +45,7 @@ func TestStaticSetContents(t *testing.T) { } for setid, expected := range ExpectedSetSizes { - if got := staticPropertySets[setid].Len(); got != expected { - t.Fatalf("static set [%d] has wrong size: got %d, expected %d", setid, got, expected) - } + assert.Equalf(t, expected, staticPropertySets[setid].Len(), "static set [%d] has wrong size", setid) } }