Skip to content

Commit

Permalink
icuregex: Cleanup unused code and tests
Browse files Browse the repository at this point in the history
Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>
  • Loading branch information
dbussink committed Sep 4, 2023
1 parent 99a1cf4 commit 53938e3
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 101 deletions.
17 changes: 5 additions & 12 deletions go/mysql/icuregex/icu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,7 @@ func (tp *TestPattern) parseMatch(orig string) error {

func ParseTestFile(t testing.TB, filename string) []TestPattern {
f, err := os.Open(filename)
if err != nil {
t.Fatalf("failed to open test data: %v", err)
}
require.NoError(t, err)

defer f.Close()
scanner := bufio.NewScanner(f)
Expand Down Expand Up @@ -229,9 +227,8 @@ func ParseTestFile(t testing.TB, filename string) []TestPattern {
patterns = append(patterns, tp)
}

if err := scanner.Err(); err != nil {
t.Fatal(err)
}
err = scanner.Err()
require.NoError(t, err)
return patterns
}

Expand Down Expand Up @@ -394,9 +391,7 @@ func TestCornerCases(t *testing.T) {
for _, tc := range cases {
t.Run(tc.Pattern, func(t *testing.T) {
_, err := icuregex.CompileString(tc.Pattern, tc.Flags)
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
})
}
}
Expand All @@ -407,9 +402,7 @@ func TestOne(t *testing.T) {
const Flags = 0

re, err := icuregex.CompileString(Pattern, Flags)
if err != nil {
t.Fatalf("compilation failed: %v", err)
}
require.NoError(t, err)

re.Dump(os.Stderr)

Expand Down
9 changes: 0 additions & 9 deletions go/mysql/icuregex/internal/ucase/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ var ucaseOnce sync.Once
var ucase struct {
trie *utrie.UTrie2
exceptions []uint16
unfold []uint16
}

func trie() *utrie.UTrie2 {
Expand All @@ -47,11 +46,6 @@ func exceptions() []uint16 {
return ucase.exceptions
}

func unfold() []uint16 {
loadUCase()
return ucase.unfold
}

func loadUCase() {
ucaseOnce.Do(func() {
b := udata.NewBytes(icudata.UCase)
Expand Down Expand Up @@ -102,9 +96,6 @@ func readData(bytes *udata.Bytes) error {
if n := indexes[ixExcLength]; n > 0 {
ucase.exceptions = bytes.Uint16Slice(n)
}
if n := indexes[ixUnfoldLength]; n > 0 {
ucase.unfold = bytes.Uint16Slice(n)
}

return nil
}
68 changes: 23 additions & 45 deletions go/mysql/icuregex/internal/uemoji/loader.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
/*
© 2016 and later: Unicode, Inc. and others.
Copyright (C) 2004-2015, International Business Machines Corporation and others.
Copyright 2023 The Vitess Authors.
This file contains code derived from the Unicode Project's ICU library.
License & terms of use for the original code: http://www.unicode.org/copyright.html
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package uemoji

import (
"errors"
"sync"

"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
Expand All @@ -11,8 +31,7 @@ import (

var uemojiOnce sync.Once
var uemoji struct {
trie *utrie.UcpTrie
stringTries []string
trie *utrie.UcpTrie
}

func loadUEmoji() {
Expand All @@ -24,26 +43,11 @@ func loadUEmoji() {
})
}

func stringTries() []string {
loadUEmoji()
return uemoji.stringTries
}

func trie() *utrie.UcpTrie {
loadUEmoji()
return uemoji.trie
}

const (
ixCpTrieOffset = 0
ixBasicEmojiTrieOffset = 4
ixRgiEmojiZwjSequenceTrieOffset = 9
)

func getStringTrieIndex(i int) int {
return i - ixBasicEmojiTrieOffset
}

func readData(bytes *udata.Bytes) error {
err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
return info.DataFormat[0] == 0x45 &&
Expand All @@ -56,36 +60,10 @@ func readData(bytes *udata.Bytes) error {
return err
}

startPos := bytes.Position()
cpTrieOffset := bytes.Int32()
indexesLength := cpTrieOffset / 4
if indexesLength <= ixRgiEmojiZwjSequenceTrieOffset {
return errors.New("not enough indexes")
}
inIndexes := make([]int32, indexesLength)
inIndexes[0] = cpTrieOffset
for i := 1; i < int(indexesLength); i++ {
inIndexes[i] = bytes.Int32()
}

i := ixCpTrieOffset + 1
nextOffset := inIndexes[i]
bytes.Skip(bytes.Int32() - 4)
uemoji.trie, err = utrie.UcpTrieFromBytes(bytes)
if err != nil {
return err
}
pos := bytes.Position() - startPos
bytes.Skip(nextOffset - pos)
offset := nextOffset
nextOffset = inIndexes[ixBasicEmojiTrieOffset]
bytes.Skip(nextOffset - offset)
uemoji.stringTries = make([]string, getStringTrieIndex(ixRgiEmojiZwjSequenceTrieOffset)+1)
for i = ixBasicEmojiTrieOffset; i <= ixRgiEmojiZwjSequenceTrieOffset; i++ {
offset = inIndexes[i]
nextOffset = inIndexes[i+1]
if nextOffset > offset {
uemoji.stringTries[getStringTrieIndex(i)] = string(bytes.Uint8Slice((nextOffset - offset) / 2))
}
}
return nil
}
21 changes: 21 additions & 0 deletions go/mysql/icuregex/internal/uemoji/uemoji.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
/*
© 2016 and later: Unicode, Inc. and others.
Copyright (C) 2004-2015, International Business Machines Corporation and others.
Copyright 2023 The Vitess Authors.
This file contains code derived from the Unicode Project's ICU library.
License & terms of use for the original code: http://www.unicode.org/copyright.html
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package uemoji

import (
Expand Down
15 changes: 1 addition & 14 deletions go/mysql/icuregex/pattern.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@ func NewPattern(flags RegexpFlag) *Pattern {
}
}

func MustCompileString(in string, flags RegexpFlag) *Pattern {
pat, err := CompileString(in, flags)
if err != nil {
panic(err)
}
return pat
}

func Compile(in []rune, flags RegexpFlag) (*Pattern, error) {
pat := NewPattern(flags)
cmp := newCompiler(pat)
Expand All @@ -77,12 +69,7 @@ func Compile(in []rune, flags RegexpFlag) (*Pattern, error) {
}

func CompileString(in string, flags RegexpFlag) (*Pattern, error) {
pat := NewPattern(flags)
cmp := newCompiler(pat)
if err := cmp.compile([]rune(in)); err != nil {
return nil, err
}
return pat, nil
return Compile([]rune(in), flags)
}

func (p *Pattern) Match(input string) *Matcher {
Expand Down
31 changes: 13 additions & 18 deletions go/mysql/icuregex/perl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,25 @@ import (
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

func TestPerl(t *testing.T) {
f, err := os.Open("testdata/re_tests.txt")
if err != nil {
t.Fatalf("failed to open test data: %v", err)
}
require.NoError(t, err)
defer f.Close()

flagPat := MustCompileString(`('?)(.*)\1(.*)`, 0)
flagPat, err := CompileString(`('?)(.*)\1(.*)`, 0)
require.NoError(t, err)
flagMat := NewMatcher(flagPat)

groupsPat := MustCompileString(`\$([+\-])\[(\d+)\]`, 0)
groupsPat, err := CompileString(`\$([+\-])\[(\d+)\]`, 0)
require.NoError(t, err)
groupsMat := NewMatcher(groupsPat)

cgPat := MustCompileString(`\$(\d+)`, 0)
cgPat, err := CompileString(`\$(\d+)`, 0)
require.NoError(t, err)
cgMat := NewMatcher(cgPat)

group := func(m *Matcher, idx int) string {
Expand All @@ -52,9 +55,7 @@ func TestPerl(t *testing.T) {

lookingAt := func(m *Matcher) bool {
ok, err := m.LookingAt()
if err != nil {
t.Fatalf("failed to match with LookingAt(): %v", err)
}
require.NoError(t, err)
return ok
}

Expand All @@ -73,9 +74,7 @@ func TestPerl(t *testing.T) {

flagMat.ResetString(fields[0])
ok, _ := flagMat.Matches()
if !ok {
t.Fatalf("could not match pattern+flags (line %d)", lineno)
}
require.Truef(t, ok, "could not match pattern+flags (line %d)", lineno)

pattern, _ := flagMat.Group(2)
pattern = replacer.Replace(pattern)
Expand Down Expand Up @@ -142,9 +141,7 @@ func TestPerl(t *testing.T) {

case lookingAt(groupsMat):
groupNum, err := strconv.ParseInt(group(groupsMat, 2), 10, 32)
if err != nil {
t.Fatalf("failed to parse Perl pattern: %v", err)
}
require.NoError(t, err)

var matchPosition int
if group(groupsMat, 1) == "+" {
Expand All @@ -160,9 +157,7 @@ func TestPerl(t *testing.T) {

case lookingAt(cgMat):
groupNum, err := strconv.ParseInt(group(cgMat, 1), 10, 32)
if err != nil {
t.Fatalf("failed to parse Perl pattern: %v", err)
}
require.NoError(t, err)
result = append(result, group(testMat, int(groupNum))...)
perlExpr = perlExpr[cgMat.EndForGroup(0):]

Expand Down
6 changes: 3 additions & 3 deletions go/mysql/icuregex/sets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ package icuregex

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestStaticSetContents(t *testing.T) {
Expand All @@ -43,9 +45,7 @@ func TestStaticSetContents(t *testing.T) {
}

for setid, expected := range ExpectedSetSizes {
if got := staticPropertySets[setid].Len(); got != expected {
t.Fatalf("static set [%d] has wrong size: got %d, expected %d", setid, got, expected)
}
assert.Equalf(t, expected, staticPropertySets[setid].Len(), "static set [%d] has wrong size", setid)
}
}

Expand Down

0 comments on commit 53938e3

Please sign in to comment.