Skip to content

Commit

Permalink
Nitcc occasional tokens #2845
Browse files Browse the repository at this point in the history
Introduce the keyword `Occasional` that make token be ignored instead of producing syntax errors.
This makes it easier to parse some language where spaces and/or new lines are sometime meaningful but not always.
  • Loading branch information
privat authored Jul 25, 2024
2 parents 8fae9fc + d5a0469 commit e8e9cca
Show file tree
Hide file tree
Showing 38 changed files with 515 additions and 111 deletions.
3 changes: 3 additions & 0 deletions contrib/nitcc/src/grammar.nit
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,9 @@ class Token
do
return to_s
end

# Unexpected occurences of this token do not cause a syntax error but are ignored.
var occasional = false is writable
end

# A alternative with a cursor (dot) before an element
Expand Down
18 changes: 11 additions & 7 deletions contrib/nitcc/src/lrautomaton.nit
Original file line number Diff line number Diff line change
Expand Up @@ -365,17 +365,17 @@ redef class Generator
end

add "redef class NToken"
add "\t# Default action on any state"
add "\tprivate fun action_default(parser: Parser) do"
add "\t\tparser.parse_error"
add "\tend"

for s in states do
if not s.need_guard then continue
add "\t# guarded action for state {s}"
add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
add "\tprivate fun action_s{s.number}(parser: Parser) do"
if s.reduces.length != 1 then
add "\t\tparser.parse_error"
else
add "\t\treduce_{s.reduces.first.cname}(parser)"
#gen_reduce_to_nit(s.reduces.first)
end
add "\t\taction_default(parser)"
add "\tend"
end
add "end"
Expand All @@ -390,12 +390,16 @@ redef class Generator
end
for s in t.reduces do
if not s.need_guard then continue
if s.reduces.length <= 1 then continue
add "\tredef fun action_s{s.number}(parser) do"
add "\t\treduce_{s.guarded_reduce[t].first.alt.cname}(parser)"
#gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
add "\tend"
end
if t.occasional then
add "\tredef fun action_default(parser) do"
add "\t\tparser.get_token"
add "\tend"
end
add "end"
end

Expand Down
4 changes: 2 additions & 2 deletions contrib/nitcc/src/nitcc.sablecc
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ text {-> re} =
{ch_dec:} ch_dec |
{ch_hex:} ch_hex ;

parser_part = 'Parser' ign? rej? prod*;
parser_part = 'Parser' ign? occasional? rej? prod*;

ign = 'Ignored' elem_list ';' ;

occasional = 'Occasional' elem_list ';' ;
rej = 'Rejected' elem_list ';' ;

prod = id ptrans? '=' alts priority* ';';
Expand Down
22 changes: 22 additions & 0 deletions contrib/nitcc/src/nitcc_semantic.nit
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ private class CheckNameVisitor
# Known ignored tokens
var ignoreds = new Array[Element]

# Known occasional tokens
var occasionals = new Array[Element]

# Known rejected tokens
var rejecteds = new Array[Element]

Expand Down Expand Up @@ -244,6 +247,25 @@ redef class Nign
end
end

redef class Noccasional
redef fun accept_check_name_visitor(v) do
# Add elements to the ignored list
v.elems = v.occasionals
super
for e in v.elems do
if e isa Production then
print "Error: cannot ignore {e}, it is a production"
exit(1)
abort
else if e isa Token then
e.occasional = true
else
abort
end
end
end
end

redef class Nrej
redef fun accept_check_name_visitor(v) do
# Add elements to the rejected list
Expand Down
1 change: 1 addition & 0 deletions contrib/nitcc/tests/occasional.input
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abbabbaa
5 changes: 5 additions & 0 deletions contrib/nitcc/tests/occasional.sablecc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Parser
Ignored #10, #32;
Occasional 'b';
s = s e | e;
e = 'a' 'a' | 'b' ;
1 change: 1 addition & 0 deletions contrib/nitcc/tests/occasional2.input
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abbabbaa
6 changes: 6 additions & 0 deletions contrib/nitcc/tests/occasional2.sablecc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Parser
Ignored #10, #32;
Occasional 'b';
s = s e | e;
e = 'a' 'a' | b ;
b = 'b' ;
27 changes: 27 additions & 0 deletions contrib/nitcc/tests/oneliner3.input
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@


if id then id

if id then
id
if id then id
id
end

if
id
then
id
if id then id
id
end

if id then if id then id

if id then if id then
id
if id then id
id
end


21 changes: 21 additions & 0 deletions contrib/nitcc/tests/oneliner3.sablecc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Grammar oneliner;
Lexer
n = #10;
Parser
Ignored #32;
Occasional n;

prog
= stmts
;
expr = 'if' expr 'then' expr
| 'if' expr 'then' eol stmts 'end'
| 'id'
;

stmts
= stmts expr eol
| expr eol
;

eol = n | ';';
27 changes: 27 additions & 0 deletions contrib/nitcc/tests/oneliner4.input
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@


if id then id

if id then
id
if id then id
id
end

if
id
then
id
if id then id
id
end

if id then if id then id

if id then if id then
id
if id then id
id
end


21 changes: 21 additions & 0 deletions contrib/nitcc/tests/oneliner4.sablecc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Grammar oneliner;
Lexer
n = #10;
Parser
Ignored #32;
Occasional n;

prog
= stmts eol?
;
expr = 'if' expr 'then' expr
| 'if' expr 'then' eol stmts eol 'end'
| 'id'
;

stmts
= stmts eol expr
| expr
;

eol = n | ';';
16 changes: 7 additions & 9 deletions contrib/nitcc/tests/sav/conflict-dangling.1alt1.alt1.input3.res
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
NParserError@(1:5-1:6)='3'
Nodes[Node]
f_0
e_1
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
x
'2'@(1:4-1:5)
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
x
'2'@(1:4-1:5)
'3'@(1:5-1:6)
19 changes: 8 additions & 11 deletions contrib/nitcc/tests/sav/conflict-dangling.1alt1.alt2.input3.res
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
NParserError@(1:5-1:6)='3'
Nodes[Node]
f_0
e_1
'1'@(1:1-1:2)
e_1
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
y_0
x
'2'@(1:4-1:5)
y_1
'1'@(1:1-1:2)
e_1
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
y_0
x
'2'@(1:4-1:5)
'3'@(1:5-1:6)
14 changes: 6 additions & 8 deletions contrib/nitcc/tests/sav/conflict-dangling.1alt1.input3.res
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
NParserError@(1:5-1:6)='3'
Nodes[Node]
f_0
e_1
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
'2'@(1:4-1:5)
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
'2'@(1:4-1:5)
'3'@(1:5-1:6)
16 changes: 7 additions & 9 deletions contrib/nitcc/tests/sav/conflict-dangling.alt1.input3.res
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
NLexerError@(1:5-1:5)='3'
Nodes[Node]
f
e_1
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
x
'2'@(1:4-1:5)
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
x
'2'@(1:4-1:5)
NLexerError@(1:5-1:5)='3'
19 changes: 8 additions & 11 deletions contrib/nitcc/tests/sav/conflict-dangling.alt2.input3.res
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
NLexerError@(1:5-1:5)='3'
Nodes[Node]
f
e_1
'1'@(1:1-1:2)
e_1
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
y_0
x
'2'@(1:4-1:5)
y_1
'1'@(1:1-1:2)
e_1
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
y_0
x
'2'@(1:4-1:5)
NLexerError@(1:5-1:5)='3'
14 changes: 6 additions & 8 deletions contrib/nitcc/tests/sav/conflict-dangling.input3.res
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
NLexerError@(1:5-1:5)='3'
Nodes[Node]
f
e_1
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
'2'@(1:4-1:5)
'1'@(1:1-1:2)
e_2
'1'@(1:2-1:3)
e_0
'0'@(1:3-1:4)
'2'@(1:4-1:5)
NLexerError@(1:5-1:5)='3'
1 change: 0 additions & 1 deletion contrib/nitcc/tests/sav/inf5000-04-lexer-demo.input2.res
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
NLexerError@(1:1-1:1)='5'
Nodes[Node]
p
NLexerError@(1:1-1:1)='5'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
16:1-16:11 Syntax Error: Unexpected unknown_keyword 'Precedence'; is acceptable instead: Eof
16:1-16:11 Syntax Error: Unexpected unknown_keyword 'Precedence'; is acceptable instead: prod
Original file line number Diff line number Diff line change
@@ -1 +1 @@
15:7-15:17 Syntax Error: Unexpected unknown_keyword 'Precedence'; is acceptable instead: Eof
15:7-15:17 Syntax Error: Unexpected unknown_keyword 'Precedence'; is acceptable instead: prod
24 changes: 11 additions & 13 deletions contrib/nitcc/tests/sav/inf5000-06-grammaire2-grammaire.input.res
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,17 @@ Nodes[Node]
';'@(4:27-4:28)
id@(5:1-5:6)='atoms'
'='@(5:7-5:8)
alts_many
alts_one
alt_0
altid@(5:9-5:16)='{many:}'
alts_one
alt_0
altid@(5:9-5:16)='{many:}'
atoms_many
atoms_many
atoms_many
atoms_none
atom_id
id@(5:17-5:22)='atoms'
atoms_none
atom_id
id@(5:23-5:27)='atom'
'|'@(5:28-5:29)
alt_0
altid@(5:30-5:37)='{none:}'
atoms_none
id@(5:17-5:22)='atoms'
atom_id
id@(5:23-5:27)='atom'
'|'@(5:28-5:29)
altid@(5:30-5:37)='{none:}'
atoms_none
NLexerError@(5:38-5:38)='E'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
11:20-11:29 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: elem, text
11:20-11:29 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: '*', '?', '+'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
20:14-20:23 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: elem, text
20:14-20:23 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: '*', '?', '+'
2 changes: 1 addition & 1 deletion contrib/nitcc/tests/sav/inf5000-06-grammaire2-polygone.res
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8:25-8:34 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: elem, text
8:25-8:34 Syntax Error: Unexpected unknown_keyword 'Separator'; is acceptable instead: '*', '?', '+'
2 changes: 1 addition & 1 deletion contrib/nitcc/tests/sav/lexer-c-comment.alt3.res
Original file line number Diff line number Diff line change
@@ -1 +1 @@
11:26-11:35 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: ')', '|'
11:26-11:35 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: '.'
2 changes: 1 addition & 1 deletion contrib/nitcc/tests/sav/lexer-errors.alt7.res
Original file line number Diff line number Diff line change
@@ -1 +1 @@
10:16-10:25 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: ';', '|'
10:16-10:25 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: '.'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4:9-4:18 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: ';', '|'
4:9-4:18 Syntax Error: Unexpected unknown_keyword 'Lookahead'; is acceptable instead: '.'
Loading

0 comments on commit e8e9cca

Please sign in to comment.