Skip to content

Commit

Permalink
JS: return Integer token in lexer instead of Decimal/BigInt tokens wh…
Browse files Browse the repository at this point in the history
…en an integer; remove lexer error for identifier after numeric (will be caught in parser)
  • Loading branch information
tdewolff committed Jan 11, 2024
1 parent 3ced090 commit cd8aa3e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 36 deletions.
26 changes: 11 additions & 15 deletions js/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ func (l *Lexer) Next() (TokenType, []byte) {
prevLineTerminator := l.prevLineTerminator
l.prevLineTerminator = false

prevNumericLiteral := l.prevNumericLiteral
l.prevNumericLiteral = false

// study on 50x jQuery shows:
// spaces: 20k
// alpha: 16k
Expand Down Expand Up @@ -193,10 +190,7 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
default:
if l.consumeIdentifierToken() {
if prevNumericLiteral {
l.err = parse.NewErrorLexer(l.r, "unexpected identifier after number")
return ErrorToken, nil
} else if keyword, ok := Keywords[string(l.r.Lexeme())]; ok {
if keyword, ok := Keywords[string(l.r.Lexeme())]; ok {
return keyword, l.r.Shift()
}
return IdentifierToken, l.r.Shift()
Expand Down Expand Up @@ -543,8 +537,8 @@ func (l *Lexer) consumeNumericToken() TokenType {
}
return HexadecimalToken
}
l.err = parse.NewErrorLexer(l.r, "invalid hexadecimal number")
return ErrorToken
l.r.Move(-1)
return IntegerToken
} else if l.r.Peek(0) == 'b' || l.r.Peek(0) == 'B' {
l.r.Move(1)
if l.consumeBinaryDigit() {
Expand All @@ -555,8 +549,8 @@ func (l *Lexer) consumeNumericToken() TokenType {
}
return BinaryToken
}
l.err = parse.NewErrorLexer(l.r, "invalid binary number")
return ErrorToken
l.r.Move(-1)
return IntegerToken
} else if l.r.Peek(0) == 'o' || l.r.Peek(0) == 'O' {
l.r.Move(1)
if l.consumeOctalDigit() {
Expand All @@ -567,11 +561,11 @@ func (l *Lexer) consumeNumericToken() TokenType {
}
return OctalToken
}
l.err = parse.NewErrorLexer(l.r, "invalid octal number")
return ErrorToken
l.r.Move(-1)
return IntegerToken
} else if l.r.Peek(0) == 'n' {
l.r.Move(1)
return BigIntToken
return IntegerToken
} else if '0' <= l.r.Peek(0) && l.r.Peek(0) <= '9' {
l.err = parse.NewErrorLexer(l.r, "legacy octal numbers are not supported")
return ErrorToken
Expand All @@ -597,7 +591,9 @@ func (l *Lexer) consumeNumericToken() TokenType {
}
} else if c == 'n' {
l.r.Move(1)
return BigIntToken
return IntegerToken
} else if c != 'e' && c != 'E' {
return IntegerToken
}
if c == 'e' || c == 'E' {
l.r.Move(1)
Expand Down
35 changes: 17 additions & 18 deletions js/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ func TestTokens(t *testing.T) {
{" \t\v\f\u00A0\uFEFF\u2000", TTs{}}, // WhitespaceToken
{"\n\r\r\n\u2028\u2029", TTs{LineTerminatorToken}},
{"5.2 .04 1. 2.e3 0x0F 5e99", TTs{DecimalToken, DecimalToken, DecimalToken, DecimalToken, HexadecimalToken, DecimalToken}},
{"2_3 5_4.1_2 1_1n 0o2_3 0b1_1 0xF_F", TTs{DecimalToken, DecimalToken, BigIntToken, OctalToken, BinaryToken, HexadecimalToken}},
{"2_3 5_4.1_2 1_1n 0o2_3 0b1_1 0xF_F", TTs{IntegerToken, DecimalToken, IntegerToken, OctalToken, BinaryToken, HexadecimalToken}},
{"0o22 0b11", TTs{OctalToken, BinaryToken}},
{"0n 2345n 0o5n 0b1n 0x5n 435.333n", TTs{IntegerToken, IntegerToken, OctalToken, BinaryToken, HexadecimalToken, DecimalToken, ErrorToken}},
{"0n 2345n 0o5n 0b1n 0x5n 435.333n", TTs{IntegerToken, IntegerToken, OctalToken, BinaryToken, HexadecimalToken, DecimalToken, IdentifierToken}},
{"a = 'string'", TTs{IdentifierToken, EqToken, StringToken}},
{"/*comment*/ //comment", TTs{CommentToken, CommentToken}},
{"{ } ( ) [ ]", TTs{OpenBraceToken, CloseBraceToken, OpenParenToken, CloseParenToken, OpenBracketToken, CloseBracketToken}},
Expand All @@ -46,25 +46,25 @@ func TestTokens(t *testing.T) {

{"/*co\nm\u2028m/*ent*/ //co//mment\u2029//comment", TTs{CommentLineTerminatorToken, CommentToken, LineTerminatorToken, CommentToken}},
{"<!-", TTs{LtToken, NotToken, SubToken}},
{"1<!--2\n", TTs{DecimalToken, CommentToken, LineTerminatorToken}},
{"x=y-->10\n", TTs{IdentifierToken, EqToken, IdentifierToken, DecrToken, GtToken, DecimalToken, LineTerminatorToken}},
{"1<!--2\n", TTs{IntegerToken, CommentToken, LineTerminatorToken}},
{"x=y-->10\n", TTs{IdentifierToken, EqToken, IdentifierToken, DecrToken, GtToken, IntegerToken, LineTerminatorToken}},
{" /*comment*/ -->nothing\n", TTs{CommentToken, DecrToken, GtToken, IdentifierToken, LineTerminatorToken}},
{"1 /*comment\nmultiline*/ -->nothing\n", TTs{DecimalToken, CommentLineTerminatorToken, CommentToken, LineTerminatorToken}},
{"1 /*comment\nmultiline*/ -->nothing\n", TTs{IntegerToken, CommentLineTerminatorToken, CommentToken, LineTerminatorToken}},
{"$ _\u200C \\u2000 _\\u200C \u200C", TTs{IdentifierToken, IdentifierToken, IdentifierToken, IdentifierToken, ErrorToken}},
{">>>=>>>>=", TTs{GtGtGtEqToken, GtGtGtToken, GtEqToken}},
{"1/", TTs{DecimalToken, DivToken}},
{"1/=", TTs{DecimalToken, DivEqToken}},
{"1/", TTs{IntegerToken, DivToken}},
{"1/=", TTs{IntegerToken, DivEqToken}},
{"'str\\i\\'ng'", TTs{StringToken}},
{"'str\\\\'abc", TTs{StringToken, IdentifierToken}},
{"'str\\\ni\\\\u00A0ng'", TTs{StringToken}},
{"'str\u2028\u2029ing'", TTs{StringToken}},

{"0b0101 0o0707 0b17", TTs{BinaryToken, OctalToken, BinaryToken, DecimalToken}},
{"0b0101 0o0707 0b17", TTs{BinaryToken, OctalToken, BinaryToken, IntegerToken}},
{"`template`", TTs{TemplateToken}},
{"`a${x+y}b`", TTs{TemplateStartToken, IdentifierToken, AddToken, IdentifierToken, TemplateEndToken}},
{"`tmpl${x}tmpl${x}`", TTs{TemplateStartToken, IdentifierToken, TemplateMiddleToken, IdentifierToken, TemplateEndToken}},
{"`temp\nlate`", TTs{TemplateToken}},
{"`outer${{x: 10}}bar${ raw`nested${2}endnest` }end`", TTs{TemplateStartToken, OpenBraceToken, IdentifierToken, ColonToken, DecimalToken, CloseBraceToken, TemplateMiddleToken, IdentifierToken, TemplateStartToken, DecimalToken, TemplateEndToken, TemplateEndToken}},
{"`outer${{x: 10}}bar${ raw`nested${2}endnest` }end`", TTs{TemplateStartToken, OpenBraceToken, IdentifierToken, ColonToken, IntegerToken, CloseBraceToken, TemplateMiddleToken, IdentifierToken, TemplateStartToken, IntegerToken, TemplateEndToken, TemplateEndToken}},
{"`tmpl ${ a ? '' : `tmpl2 ${b ? 'b' : 'c'}` }`", TTs{TemplateStartToken, IdentifierToken, QuestionToken, StringToken, ColonToken, TemplateStartToken, IdentifierToken, QuestionToken, StringToken, ColonToken, StringToken, TemplateEndToken, TemplateEndToken}},

// early endings
Expand All @@ -78,9 +78,9 @@ func TestTokens(t *testing.T) {
{"\\u002", TTs{ErrorToken}},
{"`template", TTs{ErrorToken}},
{"`template${x}template", TTs{TemplateStartToken, IdentifierToken, ErrorToken}},
{"a++=1", TTs{IdentifierToken, IncrToken, EqToken, DecimalToken}},
{"a++==1", TTs{IdentifierToken, IncrToken, EqEqToken, DecimalToken}},
{"a++===1", TTs{IdentifierToken, IncrToken, EqEqEqToken, DecimalToken}},
{"a++=1", TTs{IdentifierToken, IncrToken, EqToken, IntegerToken}},
{"a++==1", TTs{IdentifierToken, IncrToken, EqEqToken, IntegerToken}},
{"a++===1", TTs{IdentifierToken, IncrToken, EqEqEqToken, IntegerToken}},

// null characters
{"'string\x00'return", TTs{StringToken, ReturnToken}},
Expand All @@ -90,13 +90,13 @@ func TestTokens(t *testing.T) {
{"`template\\\x00`return", TTs{TemplateToken, ReturnToken}},

// numbers
{"0xg", TTs{ErrorToken}},
{"0.f", TTs{DecimalToken, ErrorToken}},
{"0bg", TTs{ErrorToken}},
{"0og", TTs{ErrorToken}},
{"0xg", TTs{IntegerToken, IdentifierToken}},
{"0.f", TTs{DecimalToken, IdentifierToken}},
{"0bg", TTs{IntegerToken, IdentifierToken}},
{"0og", TTs{IntegerToken, IdentifierToken}},
{"010", TTs{ErrorToken}}, // Decimal(0) Decimal(10) Identifier(xF)
{"50e+-0", TTs{ErrorToken}},
{"5.a", TTs{DecimalToken, ErrorToken}},
{"5.a", TTs{DecimalToken, IdentifierToken}},
{"5..a", TTs{DecimalToken, DotToken, IdentifierToken}},

// coverage
Expand Down Expand Up @@ -252,7 +252,6 @@ func TestLexerErrors(t *testing.T) {
{"\x7f", "unexpected 0x7F"},
{"\u200F", "unexpected U+200F"},
{"\u2010", "unexpected \u2010"},
{"5a", "unexpected identifier after number"},
{".0E", "invalid number"},
{`"a`, "unterminated string literal"},
{"'a\nb'", "unterminated string literal"},
Expand Down
1 change: 1 addition & 0 deletions js/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ func TestParseError(t *testing.T) {
js string
err string
}{
{"5a", "unexpected a in expression"},
{"{a", "unexpected EOF"},
{"if", "expected ( instead of EOF in if statement"},
{"if(a", "expected ) instead of EOF in if statement"},
Expand Down
6 changes: 3 additions & 3 deletions js/tokentype.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const (
BinaryToken
OctalToken
HexadecimalToken
BigIntToken
IntegerToken
)

// Punctuator token values.
Expand Down Expand Up @@ -369,8 +369,8 @@ func (tt TokenType) Bytes() []byte {
return []byte("Octal")
case HexadecimalToken:
return []byte("Hexadecimal")
case BigIntToken:
return []byte("BigInt")
case IntegerToken:
return []byte("Integer")
case PunctuatorToken:
return []byte("Punctuator")
case OpenBraceToken:
Expand Down

0 comments on commit cd8aa3e

Please sign in to comment.