Skip to content

Commit

Permalink
text/scanner: don't liberally consume (invalid) floats or underbars
Browse files Browse the repository at this point in the history
This is a follow-up on https://golang.org/cl/161199 which introduced
the new Go 2 number literals to text/scanner.

That change introduced a bug by allowing decimal and hexadecimal floats
to be consumed even if the scanner was not configured to accept floats.

This CL changes the code to not consume a radix dot '.' or exponent
unless the scanner is configured to accept floats.

This CL also introduces a new mode "AllowNumberbars" which controls
whether underbars '_' are permitted as digit separators in numbers
or not.

There is a possibility that we may need to refine text/scanner
further (e.g., the Float mode now includes hexadecimal floats
which it didn't recognize before). We're very early in the cycle,
so let's see how it goes.

RELNOTE=yes

Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.

Fixes #30320.

Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3
Reviewed-on: https://go-review.googlesource.com/c/163079
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
griesemer committed Feb 20, 2019
1 parent 153c0da commit 34fb585
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 21 deletions.
1 change: 1 addition & 0 deletions api/except.txt
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16
pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8
pkg text/scanner, const GoTokens = 1012
3 changes: 3 additions & 0 deletions api/next.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pkg text/scanner, const AllowNumberbars = 1024
pkg text/scanner, const AllowNumberbars ideal-int
pkg text/scanner, const GoTokens = 2036
48 changes: 27 additions & 21 deletions src/text/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,16 @@ func (pos Position) String() string {
// "foo" is scanned as the token sequence '"' Ident '"'.
//
const (
ScanIdents = 1 << -Ident
ScanInts = 1 << -Int
ScanFloats = 1 << -Float // includes Ints
ScanChars = 1 << -Char
ScanStrings = 1 << -String
ScanRawStrings = 1 << -RawString
ScanComments = 1 << -Comment
SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
ScanIdents = 1 << -Ident
ScanInts = 1 << -Int
ScanFloats = 1 << -Float // includes Ints and hexadecimal floats
ScanChars = 1 << -Char
ScanStrings = 1 << -String
ScanRawStrings = 1 << -RawString
ScanComments = 1 << -Comment
SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars
)

// The result of Scan is one of these tokens or a Unicode character.
Expand All @@ -80,7 +81,10 @@ const (
String
RawString
Comment

// internal use only
skipComment
allowNumberbars
)

var tokenString = map[rune]string{
Expand Down Expand Up @@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }

// digits accepts the sequence { digit | '_' } starting with ch0.
// digits accepts the sequence { digit } (if AllowNumberbars is not set)
// or { digit | '_' } (if AllowNumberbars is set), starting with ch0.
// If base <= 10, digits accepts any decimal digit but records
// the first invalid digit >= base in *invalid if *invalid == 0.
// digits returns the first rune that is not part of the sequence
Expand All @@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = ch0
if base <= 10 {
max := rune('0' + base)
for isDecimal(ch) || ch == '_' {
for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1
if ch == '_' {
ds = 2
Expand All @@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = s.next()
}
} else {
for isHex(ch) || ch == '_' {
for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1
if ch == '_' {
ds = 2
Expand All @@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
return
}

func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
digsep := 0 // bit 0: digit present, bit 1: '_' present
Expand All @@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
// integer part
var tok rune
var ds int
if integerPart {
if !seenDot {
tok = Int
if ch == '0' {
ch = s.next()
Expand All @@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
if ch == '.' && s.Mode&ScanFloats != 0 {
ch = s.next()
seenDot = true
}
}

// fractional part
if !integerPart || ch == '.' {
if seenDot {
tok = Float
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
}
if ch == '.' {
ch = s.next()
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
}
Expand All @@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
}

// exponent
if e := lower(ch); e == 'e' || e == 'p' {
if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
switch {
case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", ch)
Expand Down Expand Up @@ -682,7 +688,7 @@ redo:
}
case isDecimal(ch):
if s.Mode&(ScanInts|ScanFloats) != 0 {
tok, ch = s.scanNumber(ch, true)
tok, ch = s.scanNumber(ch, false)
} else {
ch = s.next()
}
Expand All @@ -705,7 +711,7 @@ redo:
case '.':
ch = s.next()
if isDecimal(ch) && s.Mode&ScanFloats != 0 {
tok, ch = s.scanNumber(ch, false)
tok, ch = s.scanNumber(ch, true)
}
case '/':
ch = s.next()
Expand Down
37 changes: 37 additions & 0 deletions src/text/scanner/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) {
}
}
}

func TestIssue30320(t *testing.T) {
for _, test := range []struct {
in, want string
mode uint
}{
{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
{"xxx1e0yyy", "1 0", ScanInts},
{"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled
{"1_2", "1_2", ScanInts | AllowNumberbars},
{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
} {
got := extractInts(test.in, test.mode)
if got != test.want {
t.Errorf("%q: got %q; want %q", test.in, got, test.want)
}
}
}

func extractInts(t string, mode uint) (res string) {
var s Scanner
s.Init(strings.NewReader(t))
s.Mode = mode
for {
switch tok := s.Scan(); tok {
case Int, Float:
if len(res) > 0 {
res += " "
}
res += s.TokenText()
case EOF:
return
}
}
}

0 comments on commit 34fb585

Please sign in to comment.