diff --git a/src/asm/lexer.c b/src/asm/lexer.c index e07553d3a..493b2fea1 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -845,7 +845,6 @@ static int peek(uint8_t distance) } } else if (c == '{' && !lexerState->disableInterpolation) { /* If character is an open brace, do symbol interpolation */ - lexerState->macroArgScanDistance++; shiftChars(1); char const *ptr = readInterpolation(); @@ -1247,7 +1246,7 @@ static void readGfxConstant(void) yylval.nConstValue = bp1 << 8 | bp0; } -/* Function to read identifiers & keywords */ +/* Functions to read identifiers & keywords */ static bool startsIdentifier(int c) { @@ -1373,51 +1372,39 @@ static char const *readInterpolation(void) return NULL; } -static int appendMacroArg(char const *str, int i) +#define append_yylval_tzString(c) do { \ + if (i < sizeof(yylval.tzString)) \ + yylval.tzString[i++] = (c); \ +} while (0) + +static size_t appendEscapedSubstring(char const *str, size_t i) { - while (*str && i < sizeof(yylval.tzString)) { + /* Copy one extra to flag overflow */ + while (*str) { int c = *str++; - if (c != '\\') { - yylval.tzString[i++] = c; - continue; - } - - c = *str++; - + /* Escape characters that need escaping */ switch (c) { - case '\\': /* Return that character unchanged */ + case '\\': case '"': case '{': - case '}': - break; - case 'n': - c = '\n'; - break; - case 'r': - c = '\r'; + append_yylval_tzString('\\'); break; - case 't': - c = '\t'; + case '\n': + append_yylval_tzString('\\'); + c = 'n'; break; - - case '\0': /* Can't really print that one */ - error("Illegal character escape at end of macro arg\n"); - yylval.tzString[i++] = '\\'; + case '\r': + append_yylval_tzString('\\'); + c = 'r'; break; - - /* - * Line continuations and macro args were already - * handled while reading the macro args, so '\@', - * '\#', and '\0'-'\9' should not occur here. - */ - - default: - error("Illegal character escape '%s'\n", print(c)); - c = '\\'; + case '\t': + append_yylval_tzString('\\'); + c = 't'; break; } - yylval.tzString[i++] = c; + + append_yylval_tzString(c); } return i; @@ -1479,10 +1466,11 @@ static void readString(void) case '\\': // Character escape or macro arg c = peek(0); switch (c) { - case '\\': // Return that character unchanged + case '\\': case '"': case '{': case '}': + // Return that character unchanged shiftChars(1); break; case 'n': @@ -1521,17 +1509,167 @@ static void readString(void) shiftChars(1); char const *str = readMacroArg(c); - i = appendMacroArg(str, i); + while (*str) + append_yylval_tzString(*str++); continue; // Do not copy an additional character case EOF: // Can't really print that one error("Illegal character escape at end of input\n"); c = '\\'; break; + default: error("Illegal character escape '%s'\n", print(c)); + shiftChars(1); + break; + } + break; + + case '{': // Symbol interpolation + // We'll be exiting the string scope, so re-enable expansions + // (Not interpolations, since they're handled by the function itself...) + lexerState->disableMacroArgs = false; + char const *ptr = readInterpolation(); + + if (ptr) + while (*ptr) + append_yylval_tzString(*ptr++); + lexerState->disableMacroArgs = true; + continue; // Do not copy an additional character + + // Regular characters will just get copied + } + + append_yylval_tzString(c); + } + +finish: + if (i == sizeof(yylval.tzString)) { + i--; + warning(WARNING_LONG_STR, "String constant too long\n"); + } + yylval.tzString[i] = '\0'; + + dbgPrint("Read string \"%s\"\n", yylval.tzString); + lexerState->disableMacroArgs = false; + lexerState->disableInterpolation = false; +} + +static size_t appendStringLiteral(size_t i) +{ + dbgPrint("Reading string\n"); + lexerState->disableMacroArgs = true; + lexerState->disableInterpolation = true; + + bool multiline = false; + + // We reach this function after reading a single quote, but we also support triple quotes + append_yylval_tzString('"'); + if (peek(0) == '"') { + append_yylval_tzString('"'); + shiftChars(1); + if (peek(0) == '"') { + // """ begins a multi-line string + append_yylval_tzString('"'); + shiftChars(1); + multiline = true; + } else { + // "" is an empty string, skip the loop + goto finish; + } + } + + for (;;) { + int c = peek(0); + + // '\r', '\n' or EOF ends a single-line string early + if (c == EOF || (!multiline && (c == '\r' || c == '\n'))) { + error("Unterminated string\n"); + break; + } + + // We'll be staying in the string, so we can safely consume the char + shiftChars(1); + + // Handle '\r' or '\n' (in multiline strings only, already handled above otherwise) + if (c == '\r' || c == '\n') { + /* Handle CRLF before nextLine() since shiftChars updates colNo */ + if (c == '\r' && peek(0) == '\n') + shiftChars(1); + nextLine(); + c = '\n'; + } + + switch (c) { + case '"': + if (multiline) { + // Only """ ends a multi-line string + if (peek(0) != '"' || peek(1) != '"') + break; + append_yylval_tzString('"'); + append_yylval_tzString('"'); + shiftChars(2); + } + append_yylval_tzString('"'); + goto finish; + + case '\\': // Character escape or macro arg + c = peek(0); + switch (c) { + // Character escape + case '\\': + case '"': + case '{': + case '}': + case 'n': + case 'r': + case 't': + // Return that character unchanged + append_yylval_tzString('\\'); + shiftChars(1); + break; + + // Line continuation + case ' ': + case '\r': + case '\n': + readLineContinuation(); + continue; + + // Macro arg + case '@': + case '#': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + shiftChars(1); + char const *str = readMacroArg(c); + + i = appendEscapedSubstring(str, i); + continue; // Do not copy an additional character + + case EOF: // Can't really print that one + error("Illegal character escape at end of input\n"); c = '\\'; break; + + case ',': /* `\,` inside a macro arg string literal */ + warning(WARNING_OBSOLETE, + "`\\,` is deprecated inside strings\n"); + shiftChars(1); + break; + + default: + error("Illegal character escape '%s'\n", print(c)); + shiftChars(1); + break; } break; @@ -1542,16 +1680,14 @@ static void readString(void) char const *ptr = readInterpolation(); if (ptr) - while (*ptr && i < sizeof(yylval.tzString)) - yylval.tzString[i++] = *ptr++; + i = appendEscapedSubstring(ptr, i); lexerState->disableMacroArgs = true; continue; // Do not copy an additional character // Regular characters will just get copied } - if (i < sizeof(yylval.tzString)) // Copy one extra to flag overflow - yylval.tzString[i++] = c; + append_yylval_tzString(c); } finish: @@ -1564,6 +1700,8 @@ static void readString(void) dbgPrint("Read string \"%s\"\n", yylval.tzString); lexerState->disableMacroArgs = false; lexerState->disableInterpolation = false; + + return i; } /* Function to report one character's worth of garbage bytes */ @@ -1835,6 +1973,7 @@ static int yylex_NORMAL(void) case EOF: error("Illegal character escape at end of input\n"); break; + default: shiftChars(1); error("Illegal character escape '%s'\n", print(c)); @@ -1886,9 +2025,8 @@ static int yylex_RAW(void) dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n", lexer_GetLineNo(), lexer_GetColNo()); - /* This is essentially a modified `readString` */ + /* This is essentially a modified `appendStringLiteral` */ size_t i = 0; - bool insideString = false; /* Trim left of string... */ while (isWhitespace(peek(0))) @@ -1898,18 +2036,16 @@ static int yylex_RAW(void) int c = peek(0); switch (c) { - case '"': - insideString = !insideString; - /* Other than that, just process quotes normally */ + case '"': /* String literals inside macro args */ + shiftChars(1); + i = appendStringLiteral(i); break; case ';': /* Comments inside macro args */ - if (insideString) - break; discardComment(); c = peek(0); /* fallthrough */ - case ',': + case ',': /* End of macro arg */ case '\r': case '\n': case EOF: @@ -1939,16 +2075,30 @@ static int yylex_RAW(void) return T_STRING; case '\\': /* Character escape */ - c = peek(1); + shiftChars(1); /* Shift the backslash */ + c = peek(0); + switch (c) { - case ',': - shiftChars(1); + case ',': /* Escape `\,` only inside a macro arg */ + case '\\': /* Escapes shared with string literals */ + case '"': + case '{': + case '}': + break; + + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; break; case ' ': case '\r': case '\n': - shiftChars(1); /* Shift the backslash */ readLineContinuation(); continue; @@ -1956,20 +2106,28 @@ static int yylex_RAW(void) error("Illegal character escape at end of input\n"); c = '\\'; break; - default: /* Pass the rest as-is */ - c = '\\'; + + /* + * Macro args were already handled by peek, so '\@', + * '\#', and '\0'-'\9' should not occur here. + */ + + default: + error("Illegal character escape '%s'\n", print(c)); break; } - break; + /* fallthrough */ - /* Regular characters will just get copied */ + default: /* Regular characters will just get copied */ + append_yylval_tzString(c); + shiftChars(1); + break; } - if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */ - yylval.tzString[i++] = c; - shiftChars(1); } } +#undef append_yylval_tzString + /* * This function uses the fact that `if`, etc. constructs are only valid when * there's nothing before them on their lines. This enables filtering diff --git a/src/asm/rgbasm.5 b/src/asm/rgbasm.5 index 5718a13f6..e0956f2aa 100644 --- a/src/asm/rgbasm.5 +++ b/src/asm/rgbasm.5 @@ -235,7 +235,6 @@ There are a number of escape sequences you can use within a string: .It Sy String Ta Sy Meaning .It Ql \[rs]\[rs] Ta Produces a backslash .It Ql \[rs]" Ta Produces a double quote without terminating -.It Ql \[rs], Ta Comma .It Ql \[rs]{ Ta Curly bracket left .It Ql \[rs]} Ta Curly bracket right .It Ql \[rs]n Ta Newline ($0A) @@ -1088,6 +1087,10 @@ definition ENDM .Ed .El +.Pp +Macro arguments support all the escape sequences of strings, as well as +.Ql \[rs], +to escape commas, since those otherwise separate arguments. .Ss Exporting and importing symbols Importing and exporting of symbols is a feature that is very useful when your project spans many source files and, for example, you need to jump to a routine defined in another file. .Pp @@ -1462,16 +1465,13 @@ PrintMacro: MACRO ENDM PrintMacro STRCAT("Hello "\[rs], \[rs] - "world\[rs]\[rs]n") + "world\[rs]n") .Ed .Pp The comma needs to be escaped to avoid it being treated as separating the macro's arguments. -The backslash -.Sq \[rs] -.Pq from Sq \[rs]n -also needs to be escaped because of the way -.Nm -processes macro arguments. +The backslash in +.Ql \[rs]n +does not need to be escaped because string literals also work as usual inside macro arguments. .Pp In reality, up to 256 arguments can be passed to a macro, but you can only use the first 9 like this. If you want to use the rest, you need to use the diff --git a/test/asm/macro-arg-in-string.asm b/test/asm/macro-arg-in-string.asm index a1d93fa22..4982fb2f0 100644 --- a/test/asm/macro-arg-in-string.asm +++ b/test/asm/macro-arg-in-string.asm @@ -1,9 +1,12 @@ print1: MACRO + if _NARG == 2 + assert !STRCMP("\1", \2) + endc PRINTLN "\1" ENDM print1 John "Danger" Smith - print1 \\A\nB + print1 \\\\A\\nB\n, "\\\\A\\nB\n" print1 C\ D print1 E\!F ; illegal character escape @@ -15,3 +18,10 @@ ENDM s EQUS "hello" iprint s + +symprint: MACRO + PRINTLN {\1} +ENDM + +hello EQUS "\"goodbye\"" + symprint s diff --git a/test/asm/macro-arg-in-string.err b/test/asm/macro-arg-in-string.err index 058cc43df..1b16204ec 100644 --- a/test/asm/macro-arg-in-string.err +++ b/test/asm/macro-arg-in-string.err @@ -1,3 +1,3 @@ -ERROR: macro-arg-in-string.asm(9) -> macro-arg-in-string.asm::print1(2): +ERROR: macro-arg-in-string.asm(12): Illegal character escape '!' error: Assembly aborted (1 errors)! diff --git a/test/asm/macro-arg-in-string.out b/test/asm/macro-arg-in-string.out index 5fd0e1d51..3e47777f3 100644 --- a/test/asm/macro-arg-in-string.out +++ b/test/asm/macro-arg-in-string.out @@ -1,6 +1,7 @@ John "Danger" Smith -\A -B +\\A\nB + CD -E\F +E!F hello +goodbye diff --git a/test/asm/multi-line-strings.asm b/test/asm/multi-line-strings.asm index e39635dbb..fd6b9fe4c 100644 --- a/test/asm/multi-line-strings.asm +++ b/test/asm/multi-line-strings.asm @@ -21,7 +21,8 @@ printarg: MACRO ENDM printarg " - printarg """ + printarg """multi-line +string argument""" EMPTY1 EQUS "" EMPTY2 EQUS "\ ; comment diff --git a/test/asm/multi-line-strings.err b/test/asm/multi-line-strings.err index 9f139531f..47a604f4e 100644 --- a/test/asm/multi-line-strings.err +++ b/test/asm/multi-line-strings.err @@ -1,2 +1,5 @@ -warning: multi-line-strings.asm(34): [-Wuser] +ERROR: multi-line-strings.asm(23): + Unterminated string +warning: multi-line-strings.asm(35): [-Wuser] check the line number +error: Assembly aborted (1 errors)! diff --git a/test/asm/multi-line-strings.out b/test/asm/multi-line-strings.out index ef7d11806..c88ad8dca 100644 --- a/test/asm/multi-line-strings.out +++ b/test/asm/multi-line-strings.out @@ -8,6 +8,8 @@ The multi-line string can contain: ! arg <"> arg (") -arg <"""> -arg (""") +arg <"""multi-line +string argument"""> +arg ("""multi-line +string argument""") () diff --git a/test/asm/quine.asm b/test/asm/quine.asm new file mode 100644 index 000000000..f70a31af7 --- /dev/null +++ b/test/asm/quine.asm @@ -0,0 +1,14 @@ +R:MACRO +REPT _NARG +PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1) +SHIFT +ENDR +ENDM +N:MACRO + R \# +REPT _NARG +PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1) +SHIFT +ENDR +ENDM + N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3 diff --git a/test/asm/quine.err b/test/asm/quine.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/quine.out b/test/asm/quine.out new file mode 100644 index 000000000..f70a31af7 --- /dev/null +++ b/test/asm/quine.out @@ -0,0 +1,14 @@ +R:MACRO +REPT _NARG +PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1) +SHIFT +ENDR +ENDM +N:MACRO + R \# +REPT _NARG +PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1) +SHIFT +ENDR +ENDM + N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3 diff --git a/test/asm/quine2.asm b/test/asm/quine2.asm new file mode 100644 index 000000000..89d5f1dd6 --- /dev/null +++ b/test/asm/quine2.asm @@ -0,0 +1,4 @@ +q: macro + println \1,"\1" +endm + q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq " diff --git a/test/asm/quine2.err b/test/asm/quine2.err new file mode 100644 index 000000000..e69de29bb diff --git a/test/asm/quine2.out b/test/asm/quine2.out new file mode 100644 index 000000000..89d5f1dd6 --- /dev/null +++ b/test/asm/quine2.out @@ -0,0 +1,4 @@ +q: macro + println \1,"\1" +endm + q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq " diff --git a/test/asm/raw-macro-args.asm b/test/asm/raw-macro-args.asm new file mode 100644 index 000000000..a7ca23a30 --- /dev/null +++ b/test/asm/raw-macro-args.asm @@ -0,0 +1,38 @@ +printargs: MACRO + rept _NARG + println \1 + shift + endr +ENDM + +printlit: MACRO + rept _NARG + println "\1" + shift + endr +ENDM + +NUM EQU 42 +STR EQUS "str\"ing" + + printargs NUM + printargs "{d:NUM}" + printargs "{STR}", 16 ; comment 1 + printargs "\"literal \\\"\\\\\\\"\"" + printargs "literal \"\\\"", \ ; comment 2 +"""multi-"line" + ""string"" arg""" + printargs MUL(2.0\, 3.0) + printargs "unclosed + + printlit NUM + printlit "{d:NUM}" + printlit "{STR}", 16 ; comment 3 + printlit "\"literal \\\"\\\\\\\"\"" + printlit "literal \"\\\"", \ ; comment 4 +"""multi-"line" + ""string"" arg""" + printlit MUL(2.0\, 3.0) + printlit this\n is\, \{not\} a\\n syntax\" error + printlit "unclosed + printlit """EOF \ No newline at end of file diff --git a/test/asm/raw-macro-args.err b/test/asm/raw-macro-args.err new file mode 100644 index 000000000..3a68773ed --- /dev/null +++ b/test/asm/raw-macro-args.err @@ -0,0 +1,9 @@ +ERROR: raw-macro-args.asm(26): + Unterminated string +ERROR: raw-macro-args.asm(26) -> raw-macro-args.asm::printargs(2) -> raw-macro-args.asm::printargs::REPT~1(3): + Unterminated string +ERROR: raw-macro-args.asm(37): + Unterminated string +ERROR: raw-macro-args.asm(38): + Unterminated string +error: Assembly aborted (4 errors)! diff --git a/test/asm/raw-macro-args.out b/test/asm/raw-macro-args.out new file mode 100644 index 000000000..277a7ddd1 --- /dev/null +++ b/test/asm/raw-macro-args.out @@ -0,0 +1,23 @@ +$2A +42 +str"ing +$10 +"literal \"\\\"" +literal "\" +multi-"line" + ""string"" arg +$60000 +unclosed +NUM +"42" +"str\"ing" +16 +"\"literal \\\"\\\\\\\"\"" +"literal \"\\\"" +"""multi-"line" + ""string"" arg""" +MUL(2.0, 3.0) +this + is, {not} a\n syntax" error +"unclosed +"""EOF