From 7e74841e3d80e759bb9f95f22e245a378942d4f1 Mon Sep 17 00:00:00 2001 From: Rangi Date: Sun, 18 Apr 2021 20:25:09 -0400 Subject: [PATCH] Return a marker token at the end of any buffer Removes the lexer hack mentioned in #778 --- include/asm/lexer.h | 1 + src/asm/lexer.c | 34 +++++++------- src/asm/parser.y | 47 +++++++++++--------- test/asm/block-comment-termination-error.err | 2 +- test/asm/code-after-endm-endr-endc.err | 8 ++-- test/asm/nested-macrodef.err | 2 +- 6 files changed, 52 insertions(+), 42 deletions(-) diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 73057d8871..84b8e259c6 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -81,6 +81,7 @@ struct CaptureBody { uint32_t lineNo; char *body; size_t size; + bool unterminated; }; char const *lexer_GetFileName(void); diff --git a/src/asm/lexer.c b/src/asm/lexer.c index ecfadc42c2..9b0f5143c5 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -354,6 +354,7 @@ struct LexerState { uint32_t colNo; int lastToken; int nextToken; + bool isAtEOF; struct IfStack *ifStack; @@ -378,6 +379,7 @@ static void initState(struct LexerState *state) state->atLineStart = true; /* yylex() will init colNo due to this */ state->lastToken = T_EOF; state->nextToken = 0; + state->isAtEOF = false; state->ifStack = NULL; @@ -2267,11 +2269,13 @@ static int yylex_SKIP_TO_ENDR(void) int yylex(void) { -restart: - if (lexerState->atLineStart && lexerStateEOL) { + if (lexerStateEOL) { lexer_SetState(lexerStateEOL); lexerStateEOL = NULL; } + /* `lexer_SetState` updates `lexerState`, so check for EOF after it */ + if (lexerState->isAtEOF) + return T_EOF; if (lexerState->atLineStart) { /* Newlines read within an expansion should not increase the line count */ if (!lexerState->expansions) @@ -2288,23 +2292,19 @@ int yylex(void) int token = lexerModeFuncs[lexerState->mode](); if (token == T_EOF) { - if (lexerState->lastToken != T_NEWLINE) { - dbgPrint("Forcing EOL at EOF\n"); - token = T_NEWLINE; - } else { - /* Try to switch to new buffer; if it succeeds, scan again */ - dbgPrint("Reached EOF!\n"); - /* Captures end at their buffer's boundary no matter what */ - if (!lexerState->capturing) { - if (!yywrap()) - goto restart; + /* Try to switch to new buffer; if it succeeds, scan again */ + dbgPrint("Reached EOB!\n"); + /* Captures end at their buffer's boundary no matter what */ + if (!lexerState->capturing) { + if (yywrap()) { dbgPrint("Reached end of input.\n"); - return T_EOF; + lexerState->isAtEOF = true; } + token = T_EOB; } } lexerState->lastToken = token; - lexerState->atLineStart = token == T_NEWLINE; + lexerState->atLineStart = token == T_NEWLINE || token == T_EOB; return token; } @@ -2327,6 +2327,7 @@ static char *startCapture(void) void lexer_CaptureRept(struct CaptureBody *capture) { + capture->unterminated = false; capture->lineNo = lexer_GetLineNo(); char *captureStart = startCapture(); @@ -2361,7 +2362,6 @@ void lexer_CaptureRept(struct CaptureBody *capture) * We know we have read exactly "ENDR", not e.g. an EQUS */ lexerState->captureSize -= strlen("ENDR"); - lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF goto finish; } level--; @@ -2372,6 +2372,7 @@ void lexer_CaptureRept(struct CaptureBody *capture) for (;;) { if (c == EOF) { error("Unterminated REPT/FOR block\n"); + capture->unterminated = true; goto finish; } else if (c == '\n' || c == '\r') { handleCRLF(c); @@ -2393,6 +2394,7 @@ void lexer_CaptureRept(struct CaptureBody *capture) void lexer_CaptureMacroBody(struct CaptureBody *capture) { + capture->unterminated = false; capture->lineNo = lexer_GetLineNo(); char *captureStart = startCapture(); @@ -2423,7 +2425,6 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture) * We know we have read exactly "ENDM", not e.g. an EQUS */ lexerState->captureSize -= strlen("ENDM"); - lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF goto finish; } } @@ -2432,6 +2433,7 @@ void lexer_CaptureMacroBody(struct CaptureBody *capture) for (;;) { if (c == EOF) { error("Unterminated macro definition\n"); + capture->unterminated = true; goto finish; } else if (c == '\n' || c == '\r') { handleCRLF(c); diff --git a/src/asm/parser.y b/src/asm/parser.y index b4ecdf095e..5c51e6a86e 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -638,6 +638,7 @@ enum { %type op_mem_ind %type assert_type +%token T_EOB "end of buffer" %token T_EOF 0 "end of file" %start asmfile @@ -646,14 +647,13 @@ enum { asmfile : lines ; -/* - * The lexer adds T_NEWLINE at the end of the file if one was not - * already present, so we can rely on it to end a line. - */ lines : %empty | lines line ; +endofline : T_NEWLINE | T_EOB +; + plain_directive : label | label cpu_command | label macro @@ -661,9 +661,9 @@ plain_directive : label | assignment_directive ; -line : plain_directive T_NEWLINE +line : plain_directive endofline | line_directive /* Directives that manage newlines themselves */ - | error T_NEWLINE { /* Continue parsing the next line on a syntax error */ + | error endofline { /* Continue parsing the next line on a syntax error */ fstk_StopRept(); } ; @@ -678,6 +678,7 @@ line_directive : macrodef | rept | for | break + | include | if /* It's important that all of these require being at line start for `skipIfBlock` */ | elif @@ -799,8 +800,7 @@ assignment_directive : equ | equs ; -directive : include - | endc +directive : endc | print | println | printf @@ -980,8 +980,10 @@ load : T_POP_LOAD sectmod string T_COMMA sectiontype sectorg sectattrs { rept : T_POP_REPT uconst T_NEWLINE { lexer_CaptureRept(&captureBody); - } T_NEWLINE { - fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size); + } endofline { + if (!captureBody.unterminated) + fstk_RunRept($2, captureBody.lineNo, captureBody.body, + captureBody.size); } ; @@ -991,9 +993,10 @@ for : T_POP_FOR { lexer_ToggleStringExpansion(true); } T_COMMA for_args T_NEWLINE { lexer_CaptureRept(&captureBody); - } T_NEWLINE { - fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo, - captureBody.body, captureBody.size); + } endofline { + if (!captureBody.unterminated) + fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo, + captureBody.body, captureBody.size); } for_args : const { @@ -1013,7 +1016,7 @@ for_args : const { } ; -break : T_POP_BREAK T_NEWLINE { +break : label T_POP_BREAK endofline { if (fstk_Break()) lexer_SetMode(LEXER_SKIP_TO_ENDR); } @@ -1025,13 +1028,17 @@ macrodef : T_POP_MACRO { lexer_ToggleStringExpansion(true); } T_NEWLINE { lexer_CaptureMacroBody(&captureBody); - } T_NEWLINE { - sym_AddMacro($3, captureBody.lineNo, captureBody.body, captureBody.size); + } endofline { + if (!captureBody.unterminated) + sym_AddMacro($3, captureBody.lineNo, captureBody.body, + captureBody.size); } | T_LABEL T_COLON T_POP_MACRO T_NEWLINE { lexer_CaptureMacroBody(&captureBody); - } T_NEWLINE { - sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size); + } endofline { + if (!captureBody.unterminated) + sym_AddMacro($1, captureBody.lineNo, captureBody.body, + captureBody.size); } ; @@ -1154,8 +1161,8 @@ export_list : export_list_entry export_list_entry : scoped_id { sym_Export($1); } ; -include : T_POP_INCLUDE string { - fstk_RunInclude($2); +include : label T_POP_INCLUDE string endofline { + fstk_RunInclude($3); if (oFailedOnMissingInclude) YYACCEPT; } diff --git a/test/asm/block-comment-termination-error.err b/test/asm/block-comment-termination-error.err index f5e82102af..dcad56d78e 100644 --- a/test/asm/block-comment-termination-error.err +++ b/test/asm/block-comment-termination-error.err @@ -1,5 +1,5 @@ ERROR: block-comment-termination-error.asm(1): Unterminated block comment ERROR: block-comment-termination-error.asm(1): - syntax error, unexpected newline + syntax error, unexpected end of buffer error: Assembly aborted (2 errors)! diff --git a/test/asm/code-after-endm-endr-endc.err b/test/asm/code-after-endm-endr-endc.err index 65a7188f28..d06c5d727a 100644 --- a/test/asm/code-after-endm-endr-endc.err +++ b/test/asm/code-after-endm-endr-endc.err @@ -1,15 +1,15 @@ ERROR: code-after-endm-endr-endc.asm(6): - syntax error, unexpected PRINTLN, expecting newline + syntax error, unexpected PRINTLN, expecting newline or end of buffer ERROR: code-after-endm-endr-endc.asm(7): Macro "mac" not defined ERROR: code-after-endm-endr-endc.asm(12): - syntax error, unexpected PRINTLN, expecting newline + syntax error, unexpected PRINTLN, expecting newline or end of buffer ERROR: code-after-endm-endr-endc.asm(17): syntax error, unexpected PRINTLN, expecting newline ERROR: code-after-endm-endr-endc.asm(19): - syntax error, unexpected PRINTLN, expecting newline + syntax error, unexpected PRINTLN, expecting newline or end of buffer ERROR: code-after-endm-endr-endc.asm(23): syntax error, unexpected PRINTLN, expecting newline ERROR: code-after-endm-endr-endc.asm(25): - syntax error, unexpected PRINTLN, expecting newline + syntax error, unexpected PRINTLN, expecting newline or end of buffer error: Assembly aborted (7 errors)! diff --git a/test/asm/nested-macrodef.err b/test/asm/nested-macrodef.err index e0e0473b00..885287a9c2 100644 --- a/test/asm/nested-macrodef.err +++ b/test/asm/nested-macrodef.err @@ -3,5 +3,5 @@ warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser] ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24): Unterminated macro definition ERROR: nested-macrodef.asm(27): - syntax error, unexpected identifier, expecting newline + Macro "inner" not defined error: Assembly aborted (2 errors)!