Skip to content

Commit

Permalink
unicode: correctly handle negative runes
Browse files Browse the repository at this point in the history
Is and isExcludingLatin did not handle negative runes when dispatching
to is16. TestNegativeRune covers this along with the existing uint32
casts in IsGraphic, etc. (For tests, I picked the smallest non-Latin-1
code point in each range.)

Updates #43254

Change-Id: I17261b91f0d2b5b5125d19219411b45c480df74f
Reviewed-on: https://go-review.googlesource.com/c/go/+/280493
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Rob Pike <r@golang.org>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
  • Loading branch information
davidben authored and odeke-em committed Feb 24, 2021
1 parent 0694fb3 commit 3780529
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/unicode/letter.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ func is32(ranges []Range32, r uint32) bool {
// Is reports whether the rune is in the specified table of ranges.
func Is(rangeTab *RangeTable, r rune) bool {
r16 := rangeTab.R16
if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
// Compare as uint32 to correctly handle negative runes.
if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
return is16(r16, uint16(r))
}
r32 := rangeTab.R32
Expand All @@ -166,7 +167,8 @@ func Is(rangeTab *RangeTable, r rune) bool {

func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
r16 := rangeTab.R16
if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
// Compare as uint32 to correctly handle negative runes.
if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
return is16(r16[off:], uint16(r))
}
r32 := rangeTab.R32
Expand Down
79 changes: 79 additions & 0 deletions src/unicode/letter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,3 +563,82 @@ func TestSpecialCaseNoMapping(t *testing.T) {
t.Errorf("got %q; want %q", got, want)
}
}

func TestNegativeRune(t *testing.T) {
// Issue 43254
// These tests cover negative rune handling by testing values which,
// when cast to uint8 or uint16, look like a particular valid rune.
// This package has Latin-1-specific optimizations, so we test all of
// Latin-1 and representative non-Latin-1 values in the character
// categories covered by IsGraphic, etc.
nonLatin1 := []uint32{
// Lu: LATIN CAPITAL LETTER A WITH MACRON
0x0100,
// Ll: LATIN SMALL LETTER A WITH MACRON
0x0101,
// Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
0x01C5,
// M: COMBINING GRAVE ACCENT
0x0300,
// Nd: ARABIC-INDIC DIGIT ZERO
0x0660,
// P: GREEK QUESTION MARK
0x037E,
// S: MODIFIER LETTER LEFT ARROWHEAD
0x02C2,
// Z: OGHAM SPACE MARK
0x1680,
}
for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
base := uint32(i)
if i >= MaxLatin1 {
base = nonLatin1[i-MaxLatin1]
}

// Note r is negative, but uint8(r) == uint8(base) and
// uint16(r) == uint16(base).
r := rune(base - 1<<31)
if Is(Letter, r) {
t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
}
if IsControl(r) {
t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
}
if IsDigit(r) {
t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
}
if IsGraphic(r) {
t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
}
if IsLetter(r) {
t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
}
if IsLower(r) {
t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
}
if IsMark(r) {
t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
}
if IsNumber(r) {
t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
}
if IsPrint(r) {
t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
}
if IsPunct(r) {
t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
}
if IsSpace(r) {
t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
}
if IsSymbol(r) {
t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
}
if IsTitle(r) {
t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
}
if IsUpper(r) {
t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
}
}
}

0 comments on commit 3780529

Please sign in to comment.