From 5024066c00bb71bb78e3ca9fe3e081898dae7fab Mon Sep 17 00:00:00 2001 From: Rangi Date: Wed, 28 Apr 2021 20:09:38 -0400 Subject: [PATCH] [WIP] Allow longer strings, not '\0'-terminated Fixes #650 Fixes #505 --- Makefile | 1 + include/asm/format.h | 8 +- include/asm/fstack.h | 7 +- include/asm/lexer.h | 2 - include/asm/section.h | 3 +- include/asm/string.h | 53 +++++++++ include/asm/symbol.h | 51 ++++---- include/asm/warning.h | 1 - src/CMakeLists.txt | 1 + src/asm/format.c | 72 +++++------- src/asm/fstack.c | 33 +++--- src/asm/lexer.c | 267 +++++++++++++++++++++++------------------- src/asm/parser.y | 75 ++++++------ src/asm/section.c | 7 ++ src/asm/string.c | 154 ++++++++++++++++++++++++ src/asm/symbol.c | 101 +++++++++------- src/asm/warning.c | 2 - 17 files changed, 541 insertions(+), 297 deletions(-) create mode 100644 include/asm/string.h create mode 100644 src/asm/string.c diff --git a/Makefile b/Makefile index 5c53b4b0e6..e38c68686b 100644 --- a/Makefile +++ b/Makefile @@ -66,6 +66,7 @@ rgbasm_obj := \ src/asm/parser.o \ src/asm/rpn.o \ src/asm/section.o \ + src/asm/string.o \ src/asm/symbol.o \ src/asm/util.o \ src/asm/warning.o \ diff --git a/include/asm/format.h b/include/asm/format.h index 7a144e8058..a17ce418dc 100644 --- a/include/asm/format.h +++ b/include/asm/format.h @@ -35,10 +35,12 @@ struct FormatSpec { bool valid; }; +struct String; + struct StrFmtArg { union { uint32_t number; - char *string; + struct String *string; }; bool isNumeric; }; @@ -57,7 +59,7 @@ bool fmt_IsValid(struct FormatSpec const *fmt); bool fmt_IsFinished(struct FormatSpec const *fmt); void fmt_UseCharacter(struct FormatSpec *fmt, int c); void fmt_FinishCharacters(struct FormatSpec *fmt); -void fmt_PrintString(char *buf, size_t bufLen, struct FormatSpec const *fmt, char const *value); -void fmt_PrintNumber(char *buf, size_t bufLen, struct FormatSpec const *fmt, uint32_t value); +struct String *fmt_PrintString(struct FormatSpec const *fmt, struct String const *value); +struct String *fmt_PrintNumber(struct FormatSpec const *fmt, uint32_t value); #endif /* RGBDS_FORMAT_SPEC_H */ diff --git a/include/asm/fstack.h b/include/asm/fstack.h index 15af4d427c..41b95f6b9d 100644 --- a/include/asm/fstack.h +++ b/include/asm/fstack.h @@ -52,6 +52,7 @@ struct FileStackNamedNode { /* NODE_FILE, NODE_MACRO */ extern size_t maxRecursionDepth; struct MacroArgs; +struct String; void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo); void fstk_DumpCurrent(void); @@ -71,10 +72,10 @@ bool fstk_FindFile(char const *path, char **fullPath, size_t *size); bool yywrap(void); void fstk_RunInclude(char const *path); -void fstk_RunMacro(char const *macroName, struct MacroArgs *args); +void fstk_RunMacro(struct String const *macroName, struct MacroArgs *args); void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size); -void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, - int32_t reptLineNo, char *body, size_t size); +void fstk_RunFor(struct String const *symName, int32_t start, int32_t stop, int32_t step, + int32_t reptLineNo, char *body, size_t size); void fstk_StopRept(void); bool fstk_Break(void); diff --git a/include/asm/lexer.h b/include/asm/lexer.h index 0ad6de4851..0b55572815 100644 --- a/include/asm/lexer.h +++ b/include/asm/lexer.h @@ -11,8 +11,6 @@ #include -#define MAXSTRLEN 255 - struct LexerState; extern struct LexerState *lexerState; extern struct LexerState *lexerStateEOL; diff --git a/include/asm/section.h b/include/asm/section.h index eb24413963..8192d9de90 100644 --- a/include/asm/section.h +++ b/include/asm/section.h @@ -17,9 +17,10 @@ extern uint8_t fillByte; struct Expression; +struct String; struct Section { - char *name; + struct String *name; enum SectionType type; enum SectionModifier modifier; struct FileStackNode *src; /* Where the section was defined */ diff --git a/include/asm/string.h b/include/asm/string.h new file mode 100644 index 0000000000..f3c28080d7 --- /dev/null +++ b/include/asm/string.h @@ -0,0 +1,53 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 2021, Eldred Habert and RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef RGBDS_STRING_H +#define RGBDS_STRING_H + +#include +#include +#include + +#include "helpers.h" + +struct String; + +#define PRI_STR ".*s" +// WARNING: **DO NOT** pass any side-effecting parameters to the macros below! +#define STR_FMT(str) (int)str_Len(str), str_Chars(str) + +#define MUTATE_STR(str, ...) do { \ + struct String *___orig_str = str; \ + str = __VA_ARGS__; \ + assert(___orig_str == str); /* This shouldn't have been reallocated */ \ +} while (0) + +static inline bool str_IsWhitespace(int c) +{ + return c == ' ' || c == '\t'; +} + +size_t str_Len(struct String const *str) attr_(pure); +void str_Trunc(struct String *str, size_t len); +char str_Index(struct String const *str, size_t i) attr_(pure); +bool str_Find(struct String const *str, char c) attr_(pure); +char const *str_Chars(struct String const *str) attr_(pure); + +/** + * @param capacity The capacity to use, or 0 if unknown + */ +struct String *str_New(size_t capacity) attr_(malloc); +void str_Ref(struct String *str); +void str_Unref(struct String *str); + +struct String *str_Push(struct String *str, char c) attr_(warn_unused_result); +struct String *str_Append(struct String *lhs, struct String const *rhs) attr_(warn_unused_result); +struct String *str_AppendSlice(struct String *lhs, char const *rhs, size_t len) + attr_(warn_unused_result); + +#endif diff --git a/include/asm/symbol.h b/include/asm/symbol.h index 729ec28f39..65c99a1404 100644 --- a/include/asm/symbol.h +++ b/include/asm/symbol.h @@ -15,12 +15,12 @@ #include #include "asm/section.h" +#include "asm/string.h" #include "platform.h" // MIN_NB_ELMS #include "types.h" #define HASHSIZE (1 << 16) -#define MAXSYMLEN 256 enum SymbolType { SYM_LABEL, @@ -32,7 +32,7 @@ enum SymbolType { }; struct Symbol { - char name[MAXSYMLEN + 1]; + struct String *name; enum SymbolType type; bool isExported; /* Whether the symbol is to be exported */ bool isBuiltin; /* Whether the symbol is a built-in */ @@ -45,13 +45,14 @@ struct Symbol { /* If sym_IsNumeric */ int32_t value; int32_t (*numCallback)(void); - /* For SYM_MACRO and SYM_EQUS; TODO: have separate fields */ + /* For SYM_MACRO */ struct { size_t macroSize; char *macro; }; /* For SYM_EQUS */ - char const *(*strCallback)(void); + struct String *str; + struct String *(*strCallback)(void); }; uint32_t ID; /* ID of the symbol in the object file (-1 if none) */ @@ -92,7 +93,7 @@ static inline bool sym_IsLabel(struct Symbol const *sym) static inline bool sym_IsLocal(struct Symbol const *sym) { - return sym_IsLabel(sym) && strchr(sym->name, '.'); + return sym_IsLabel(sym) && str_Find(sym->name, '.'); } static inline bool sym_IsExported(struct Symbol const *sym) @@ -103,50 +104,50 @@ static inline bool sym_IsExported(struct Symbol const *sym) /* * Get a string equate's value */ -static inline char const *sym_GetStringValue(struct Symbol const *sym) +static inline struct String *sym_GetStringValue(struct Symbol const *sym) { if (sym->hasCallback) return sym->strCallback(); - return sym->macro; + return sym->str; } void sym_ForEach(void (*func)(struct Symbol *, void *), void *arg); int32_t sym_GetValue(struct Symbol const *sym); void sym_SetExportAll(bool set); -struct Symbol *sym_AddLocalLabel(char const *symName); -struct Symbol *sym_AddLabel(char const *symName); +struct Symbol *sym_AddLocalLabel(struct String *symName); +struct Symbol *sym_AddLabel(struct String *symName); struct Symbol *sym_AddAnonLabel(void); -void sym_WriteAnonLabelName(char buf[MIN_NB_ELMS(MAXSYMLEN + 1)], uint32_t ofs, bool neg); -void sym_Export(char const *symName); -struct Symbol *sym_AddEqu(char const *symName, int32_t value); -struct Symbol *sym_RedefEqu(char const *symName, int32_t value); -struct Symbol *sym_AddSet(char const *symName, int32_t value); +struct String *sym_WriteAnonLabelName(uint32_t ofs, bool neg); +void sym_Export(struct String const *symName); +struct Symbol *sym_AddEqu(struct String const *symName, int32_t value); +struct Symbol *sym_RedefEqu(struct String *symName, int32_t value); +struct Symbol *sym_AddSet(struct String const *symName, int32_t value); uint32_t sym_GetPCValue(void); uint32_t sym_GetConstantSymValue(struct Symbol const *sym); -uint32_t sym_GetConstantValue(char const *symName); +uint32_t sym_GetConstantValue(struct String const *symName); /* * Find a symbol by exact name, bypassing expansion checks */ -struct Symbol *sym_FindExactSymbol(char const *symName); +struct Symbol *sym_FindExactSymbol(struct String const *symName); /* * Find a symbol by exact name; may not be scoped, produces an error if it is */ -struct Symbol *sym_FindUnscopedSymbol(char const *symName); +struct Symbol *sym_FindUnscopedSymbol(struct String const *symName); /* * Find a symbol, possibly scoped, by name */ -struct Symbol *sym_FindScopedSymbol(char const *symName); +struct Symbol *sym_FindScopedSymbol(struct String const *symName); struct Symbol const *sym_GetPC(void); -struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size); -struct Symbol *sym_Ref(char const *symName); -struct Symbol *sym_AddString(char const *symName, char const *value); -struct Symbol *sym_RedefString(char const *symName, char const *value); -void sym_Purge(char const *symName); +struct Symbol *sym_AddMacro(struct String *symName, int32_t defLineNo, char *body, size_t size); +struct Symbol *sym_Ref(struct String *symName); +struct Symbol *sym_AddString(struct String *symName, struct String *value); +struct Symbol *sym_RedefString(struct String *symName, struct String *value); +void sym_Purge(struct String const *symName); void sym_Init(time_t now); /* Functions to save and restore the current symbol scope. */ -char const *sym_GetCurrentSymbolScope(void); -void sym_SetCurrentSymbolScope(char const *newScope); +struct String const *sym_GetCurrentSymbolScope(void); +void sym_SetCurrentSymbolScope(struct String const *newScope); #endif /* RGBDS_SYMBOL_H */ diff --git a/include/asm/warning.h b/include/asm/warning.h index 1bd9f53daf..dd508fc07e 100644 --- a/include/asm/warning.h +++ b/include/asm/warning.h @@ -30,7 +30,6 @@ enum WarningID { WARNING_EMPTY_MACRO_ARG, /* Empty macro argument */ WARNING_EMPTY_STRRPL, /* Empty second argument in `STRRPL` */ WARNING_LARGE_CONSTANT, /* Constants too large */ - WARNING_LONG_STR, /* String too long for internal buffers */ WARNING_MACRO_SHIFT, /* Shift past available arguments in macro */ WARNING_NESTED_COMMENT, /* Comment-start delimiter in a block comment */ WARNING_OBSOLETE, /* Obsolete things */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b72eb89096..f2052dbf71 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,6 +54,7 @@ set(rgbasm_src "asm/output.c" "asm/rpn.c" "asm/section.c" + "asm/string.c" "asm/symbol.c" "asm/util.c" "asm/warning.c" diff --git a/src/asm/format.c b/src/asm/format.c index 39b6a5b010..afe4c5b85f 100644 --- a/src/asm/format.c +++ b/src/asm/format.c @@ -16,6 +16,7 @@ #include #include "asm/format.h" +#include "asm/string.h" #include "asm/warning.h" struct FormatSpec fmt_NewSpec(void) @@ -133,7 +134,7 @@ void fmt_FinishCharacters(struct FormatSpec *fmt) fmt->state = FORMAT_INVALID; } -void fmt_PrintString(char *buf, size_t bufLen, struct FormatSpec const *fmt, char const *value) +struct String *fmt_PrintString(struct FormatSpec const *fmt, struct String *value) { if (fmt->sign) error("Formatting string with sign flag '%c'\n", fmt->sign); @@ -146,33 +147,28 @@ void fmt_PrintString(char *buf, size_t bufLen, struct FormatSpec const *fmt, cha if (fmt->type != 's') error("Formatting string as type '%c'\n", fmt->type); - size_t len = strlen(value); + size_t len = str_Len(value); size_t totalLen = fmt->width > len ? fmt->width : len; - - if (totalLen > bufLen - 1) { /* bufLen includes terminator */ - error("Formatted string value too long\n"); - totalLen = bufLen - 1; - if (len > totalLen) - len = totalLen; - } - assert(len < bufLen && totalLen < bufLen && len <= totalLen); - size_t padLen = totalLen - len; + struct String *str = str_New(totalLen); + + if (!str) + return NULL; if (fmt->alignLeft) { - memcpy(buf, value, len); - for (size_t i = len; i < totalLen; i++) - buf[i] = ' '; + MUTATE_STR(str, str_Append(str, value)); + for (size_t i = 0; i < padLen; i++) + MUTATE_STR(str, str_Push(str, ' ')); } else { for (size_t i = 0; i < padLen; i++) - buf[i] = ' '; - memcpy(buf + padLen, value, len); + MUTATE_STR(str, str_Push(str, ' ')); + MUTATE_STR(str, str_Append(str, value)); } - buf[totalLen] = '\0'; + return str; } -void fmt_PrintNumber(char *buf, size_t bufLen, struct FormatSpec const *fmt, uint32_t value) +struct String *fmt_PrintNumber(struct FormatSpec const *fmt, uint32_t value) { if (fmt->type != 'X' && fmt->type != 'x' && fmt->type != 'b' && fmt->type != 'o' && fmt->prefix) @@ -257,48 +253,42 @@ void fmt_PrintNumber(char *buf, size_t bufLen, struct FormatSpec const *fmt, uin size_t len = strlen(valueBuf); size_t numLen = !!sign + !!prefix + len; size_t totalLen = fmt->width > numLen ? fmt->width : numLen; + size_t padLen = totalLen - numLen; + struct String *str = str_New(totalLen); - if (totalLen > bufLen - 1) { /* bufLen includes terminator */ - error("Formatted numeric value too long\n"); - totalLen = bufLen - 1; - if (numLen > totalLen) { - len -= numLen - totalLen; - numLen = totalLen; - } - } - assert(numLen < bufLen && totalLen < bufLen && numLen <= totalLen && len <= numLen); + if (!str) + return NULL; - size_t padLen = totalLen - numLen; size_t pos = 0; if (fmt->alignLeft) { if (sign) - buf[pos++] = sign; + MUTATE_STR(str, str_Push(str, sign)); if (prefix) - buf[pos++] = prefix; - memcpy(buf + pos, valueBuf, len); - for (size_t i = pos + len; i < totalLen; i++) - buf[i] = ' '; + MUTATE_STR(str, str_Push(str, prefix)); + MUTATE_STR(str, str_AppendSlice(str, valueBuf, len)); + for (size_t i = 0; i < padLen; i++) + MUTATE_STR(str, str_Push(str, ' ')); } else { if (fmt->padZero) { /* sign, then prefix, then zero padding */ if (sign) - buf[pos++] = sign; + MUTATE_STR(str, str_Push(str, sign)); if (prefix) - buf[pos++] = prefix; + MUTATE_STR(str, str_Push(str, prefix)); for (size_t i = 0; i < padLen; i++) - buf[pos++] = '0'; + MUTATE_STR(str, str_Push(str, '0')); } else { /* space padding, then sign, then prefix */ for (size_t i = 0; i < padLen; i++) - buf[pos++] = ' '; + MUTATE_STR(str, str_Push(str, ' ')); if (sign) - buf[pos++] = sign; + MUTATE_STR(str, str_Push(str, sign)); if (prefix) - buf[pos++] = prefix; + MUTATE_STR(str, str_Push(str, prefix)); } - memcpy(buf + pos, valueBuf, len); + MUTATE_STR(str, str_AppendSlice(str, valueBuf, len)); } - buf[totalLen] = '\0'; + return str; } diff --git a/src/asm/fstack.c b/src/asm/fstack.c index 75b2f2f3c5..5ccb1a0c9d 100644 --- a/src/asm/fstack.c +++ b/src/asm/fstack.c @@ -17,6 +17,7 @@ #include "asm/fstack.h" #include "asm/macro.h" #include "asm/main.h" +#include "asm/string.h" #include "asm/symbol.h" #include "asm/warning.h" #include "platform.h" /* S_ISDIR (stat macro) */ @@ -38,7 +39,7 @@ struct Context { uint32_t nbReptIters; int32_t forValue; int32_t forStep; - char *forName; + struct String *forName; }; static struct Context *contextStack; @@ -354,18 +355,18 @@ void fstk_RunInclude(char const *path) macro_SetUniqueID(0); } -void fstk_RunMacro(char const *macroName, struct MacroArgs *args) +void fstk_RunMacro(struct String *macroName, struct MacroArgs *args) { - dbgPrint("Running macro \"%s\"\n", macroName); + dbgPrint("Running macro \"%" PRI_STR "\"\n", STR_FMT(macroName)); struct Symbol *macro = sym_FindExactSymbol(macroName); if (!macro) { - error("Macro \"%s\" not defined\n", macroName); + error("Macro \"%" PRI_STR "\" not defined\n", STR_FMT(macroName)); return; } if (macro->type != SYM_MACRO) { - error("\"%s\" is not a macro\n", macroName); + error("\"%" PRI_STR "\" is not a macro\n", STR_FMT(macroName)); return; } contextStack->macroArgs = macro_GetCurrentArgs(); @@ -386,12 +387,13 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args) } struct FileStackNamedNode const *baseNode = (struct FileStackNamedNode const *)node; size_t baseLen = strlen(baseNode->name); - size_t macroNameLen = strlen(macro->name); + size_t macroNameLen = str_Len(macro->name); struct FileStackNamedNode *fileInfo = malloc(sizeof(*fileInfo) + baseLen + reptNameLen + 2 + macroNameLen + 1); if (!fileInfo) { - error("Failed to alloc file info for \"%s\": %s\n", macro->name, strerror(errno)); + error("Failed to alloc file info for \"%" PRI_STR "\": %s\n", STR_FMT(macro->name), + strerror(errno)); return; } fileInfo->node.type = NODE_MACRO; @@ -414,7 +416,7 @@ void fstk_RunMacro(char const *macroName, struct MacroArgs *args) } *dest++ = ':'; *dest++ = ':'; - memcpy(dest, macro->name, macroNameLen + 1); + memcpy(dest, str_Chars(macro->name), macroNameLen + 1); newContext((struct FileStackNode *)fileInfo); contextStack->lexerState = lexer_OpenFileView("MACRO", macro->macro, macro->macroSize, @@ -472,11 +474,14 @@ void fstk_RunRept(uint32_t count, int32_t reptLineNo, char *body, size_t size) contextStack->forName = NULL; } -void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, - int32_t reptLineNo, char *body, size_t size) +void fstk_RunFor(struct String *symName, int32_t start, int32_t stop, int32_t step, + int32_t reptLineNo, char *body, size_t size) { - dbgPrint("Running FOR(\"%s\", %" PRId32 ", %" PRId32 ", %" PRId32 ")\n", - symName, start, stop, step); + dbgPrint("Running FOR(\"%" PRI_STR "\", %" PRId32 ", %" PRId32 ", %" PRId32 ")\n", + STR_FMT(symName), start, stop, step); + + assert(0 || "Check ownership of FOR sym name on exit, and also here when erroring out"); + str_Ref(symName); struct Symbol *sym = sym_AddSet(symName, start); @@ -504,9 +509,7 @@ void fstk_RunFor(char const *symName, int32_t start, int32_t stop, int32_t step, contextStack->nbReptIters = count; contextStack->forValue = start; contextStack->forStep = step; - contextStack->forName = strdup(symName); - if (!contextStack->forName) - fatalerror("Not enough memory for FOR symbol name: %s\n", strerror(errno)); + contextStack->forName = symName; } void fstk_StopRept(void) diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 2cd350073b..97470ec631 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -33,6 +33,7 @@ #include "asm/macro.h" #include "asm/main.h" #include "asm/rpn.h" +#include "asm/string.h" #include "asm/symbol.h" #include "asm/util.h" #include "asm/warning.h" @@ -298,11 +299,6 @@ static struct KeywordMapping { {".", T_PERIOD}, }; -static bool isWhitespace(int c) -{ - return c == ' ' || c == '\t'; -} - #define LEXER_BUF_SIZE 42 /* TODO: determine a sane value for this */ /* The buffer needs to be large enough for the maximum `peekInternal` lookahead distance */ static_assert(LEXER_BUF_SIZE > 1, "Lexer buffer size is too small"); @@ -1110,7 +1106,7 @@ static void readLineContinuation(void) for (;;) { int c = peek(); - if (isWhitespace(c)) { + if (str_IsWhitespace(c)) { shiftChar(); } else if (c == '\r' || c == '\n') { shiftChar(); @@ -1141,7 +1137,7 @@ static void readAnonLabelRef(char c) n++; } while (peek() == c); - sym_WriteAnonLabelName(yylval.symName, n, c == '-'); + yylval.symName = sym_WriteAnonLabelName(n, c == '-'); } /* Functions to lex numbers of various radixes */ @@ -1320,18 +1316,24 @@ static int readIdentifier(char firstChar) { dbgPrint("Reading identifier or keyword\n"); /* Lex while checking for a keyword */ - yylval.symName[0] = firstChar; + yylval.symName = str_New(0); + if (!yylval.symName) + goto fail; + yylval.symName = str_Push(yylval.symName, firstChar); + if (!yylval.symName) + goto fail; uint16_t nodeID = keywordDict[0].children[dictIndex(firstChar)]; int tokenType = firstChar == '.' ? T_LOCAL_ID : T_ID; - size_t i = 1; /* Continue reading while the char is in the symbol charset */ - for (int c = peek(); continuesIdentifier(c); i++, c = peek()) { + for (int c = peek(); continuesIdentifier(c); c = peek()) { shiftChar(); if (i < sizeof(yylval.symName) - 1) { /* Write the char to the identifier's name */ - yylval.symName[i] = c; + yylval.symName = str_Push(yylval.symName, c); + if (!yylval.symName) + goto fail; /* If the char was a dot, mark the identifier as local */ if (c == '.') @@ -1343,30 +1345,31 @@ static int readIdentifier(char firstChar) } } - if (i > sizeof(yylval.symName) - 1) { - warning(WARNING_LONG_STR, "Symbol name too long, got truncated\n"); - i = sizeof(yylval.symName) - 1; - } - yylval.symName[i] = '\0'; /* Terminate the string */ - dbgPrint("Ident/keyword = \"%s\"\n", yylval.symName); + dbgPrint("Ident/keyword = \"%" PRI_STR "\"\n", STR_FMT(yylval.symName)); + + if (!keywordDict[nodeID].keyword) + return tokenType; - if (keywordDict[nodeID].keyword) - return keywordDict[nodeID].keyword->token; + str_Unref(yylval.symName); + return keywordDict[nodeID].keyword->token; - return tokenType; +fail: + fatalerror("Failed to read identifier or keyword: %s\n", strerror(errno)); } /* Functions to read strings */ -static char const *readInterpolation(size_t depth) +static struct String *readInterpolation(size_t depth) { if (depth >= maxRecursionDepth) fatalerror("Recursion limit (%zu) exceeded\n", maxRecursionDepth); - char symName[MAXSYMLEN + 1]; - size_t i = 0; struct FormatSpec fmt = fmt_NewSpec(); bool disableInterpolation = lexerState->disableInterpolation; + struct String *symName = str_New(0); + + if (!symName) + goto finish; /* * In a context where `lexerState->disableInterpolation` is true, `peek` will expand @@ -1380,10 +1383,10 @@ static char const *readInterpolation(size_t depth) if (c == '{') { /* Nested interpolation */ shiftChar(); - char const *str = readInterpolation(depth + 1); + struct String *sub = readInterpolation(depth + 1); - if (str && str[0]) - beginExpansion(str, false, str); + if (sub && str_Len(sub)) + beginExpansion(sub, false, sub); continue; /* Restart, reading from the new buffer */ } else if (c == EOF || c == '\r' || c == '\n' || c == '"') { error("Missing }\n"); @@ -1393,62 +1396,56 @@ static char const *readInterpolation(size_t depth) break; } else if (c == ':' && !fmt_IsFinished(&fmt)) { /* Format spec, only once */ shiftChar(); - for (size_t j = 0; j < i; j++) - fmt_UseCharacter(&fmt, symName[j]); + for (size_t i = 0; i < str_Len(symName); i++) + fmt_UseCharacter(&fmt, str_Index(symName, i)); fmt_FinishCharacters(&fmt); - symName[i] = '\0'; if (!fmt_IsValid(&fmt)) - error("Invalid format spec '%s'\n", symName); - i = 0; /* Now that format has been set, restart at beginning of string */ + error("Invalid format spec '%" PRI_STR "'\n", STR_FMT(symName)); + /* Now that format has been set, restart at beginning of string */ + str_Trunc(symName, 0); } else { shiftChar(); - if (i < sizeof(symName)) /* Allow writing an extra char to flag overflow */ - symName[i++] = c; + symName = str_Push(symName, c); + if (!symName) + goto finish; } } - if (i == sizeof(symName)) { - warning(WARNING_LONG_STR, "Interpolated symbol name too long\n"); - i--; - } - symName[i] = '\0'; - +finish: /* Don't return before `lexerState->disableInterpolation` is reset! */ lexerState->disableInterpolation = disableInterpolation; - static char buf[MAXSTRLEN + 1]; + if (!symName) { + fatalerror("Failed to read interpolation: %s\n", strerror(errno)); + return NULL; + } struct Symbol const *sym = sym_FindScopedSymbol(symName); + struct String *str = NULL; if (!sym) { - error("Interpolated symbol \"%s\" does not exist\n", symName); + error("Interpolated symbol \"%" PRI_STR "\" does not exist\n", STR_FMT(symName)); } else if (sym->type == SYM_EQUS) { if (fmt_IsEmpty(&fmt)) /* No format was specified */ fmt.type = 's'; - fmt_PrintString(buf, sizeof(buf), &fmt, sym_GetStringValue(sym)); - return buf; + str = fmt_PrintString(&fmt, sym_GetStringValue(sym)); } else if (sym_IsNumeric(sym)) { if (fmt_IsEmpty(&fmt)) { /* No format was specified; default to uppercase $hex */ fmt.type = 'X'; fmt.prefix = true; } - fmt_PrintNumber(buf, sizeof(buf), &fmt, sym_GetConstantSymValue(sym)); - return buf; + str = fmt_PrintNumber(&fmt, sym_GetConstantSymValue(sym)); } else { error("Only numerical and string symbols can be interpolated\n"); } - return NULL; -} -#define append_yylval_string(c) do { \ - char v = (c); /* Evaluate c exactly once in case it has side effects. */ \ - if (i < sizeof(yylval.string)) \ - yylval.string[i++] = v; \ -} while (0) + str_Unref(symName); + return str; +} -static size_t appendEscapedSubstring(char const *str, size_t i) +static void appendEscapedSubstring(struct String *str) { /* Copy one extra to flag overflow */ while (*str) { @@ -1459,26 +1456,28 @@ static size_t appendEscapedSubstring(char const *str, size_t i) case '\\': case '"': case '{': - append_yylval_string('\\'); + yylval.string = str_Push(yylval.string, '\\'); break; case '\n': - append_yylval_string('\\'); + yylval.string = str_Push(yylval.string, '\\'); c = 'n'; break; case '\r': - append_yylval_string('\\'); + yylval.string = str_Push(yylval.string, '\\'); c = 'r'; break; case '\t': - append_yylval_string('\\'); + yylval.string = str_Push(yylval.string, '\\'); c = 't'; break; } + if (!yylval.string) + return; - append_yylval_string(c); + yylval.string = str_Push(yylval.string, c); + if (!yylval.string) + return; } - - return i; } static void readString(void) @@ -1487,9 +1486,13 @@ static void readString(void) lexerState->disableMacroArgs = true; lexerState->disableInterpolation = true; + yylval.string = str_New(0); + if (!yylval.string) + return; + size_t i = 0; bool multiline = false; - char const *str; + struct String *sub; // We reach this function after reading a single quote, but we also support triple quotes if (peek() == '"') { @@ -1532,7 +1535,9 @@ static void readString(void) break; shiftChar(); if (peek() != '"') { - append_yylval_string('"'); + yylval.string = str_Push(yylval.string, '"'); + if (!yylval.string) + return; break; } shiftChar(); @@ -1584,10 +1589,12 @@ static void readString(void) case '9': case '<': shiftChar(); - str = readMacroArg(c); - if (str) { - while (*str) - append_yylval_string(*str++); + sub = readMacroArg(c); + if (sub) { + yylval.string = str_Append(yylval.string, sub); + str_Unref(sub); + if (!yylval.string) + return; } continue; // Do not copy an additional character @@ -1607,10 +1614,12 @@ static void readString(void) // We'll be exiting the string scope, so re-enable expansions // (Not interpolations, since they're handled by the function itself...) lexerState->disableMacroArgs = false; - str = readInterpolation(0); - if (str) { - while (*str) - append_yylval_string(*str++); + sub = readInterpolation(0); + if (sub) { + yylval.string = str_Append(yylval.string, sub); + str_Unref(sub); + if (!yylval.string) + return; } lexerState->disableMacroArgs = true; continue; // Do not copy an additional character @@ -1618,38 +1627,40 @@ static void readString(void) // Regular characters will just get copied } - append_yylval_string(c); + yylval.string = str_Push(yylval.string, c); + if (!yylval.string) + return; } finish: - if (i == sizeof(yylval.string)) { - i--; - warning(WARNING_LONG_STR, "String constant too long\n"); - } - yylval.string[i] = '\0'; - - dbgPrint("Read string \"%s\"\n", yylval.string); + dbgPrint("Read string \"%" PRI_STR "\"\n", STR_FMT(yylval.string)); lexerState->disableMacroArgs = false; lexerState->disableInterpolation = false; } -static size_t appendStringLiteral(size_t i) +static void appendStringLiteral(void) { dbgPrint("Reading string\n"); lexerState->disableMacroArgs = true; lexerState->disableInterpolation = true; bool multiline = false; - char const *str; + struct String *sub; // We reach this function after reading a single quote, but we also support triple quotes - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; if (peek() == '"') { - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; shiftChar(); if (peek() == '"') { // """ begins a multi-line string - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; shiftChar(); multiline = true; } else { @@ -1684,14 +1695,20 @@ static size_t appendStringLiteral(size_t i) // Only """ ends a multi-line string if (peek() != '"') break; - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; shiftChar(); if (peek() != '"') break; - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; shiftChar(); } - append_yylval_string('"'); + yyval.string = str_Push(yyval.string, '"'); + if (!yyval.string) + return; goto finish; case '\\': // Character escape or macro arg @@ -1706,7 +1723,9 @@ static size_t appendStringLiteral(size_t i) case 'r': case 't': // Return that character unchanged - append_yylval_string('\\'); + yyval.string = str_Push(yyval.string, '\\'); + if (!yyval.string) + return; shiftChar(); break; @@ -1732,9 +1751,13 @@ static size_t appendStringLiteral(size_t i) case '9': case '<': shiftChar(); - str = readMacroArg(c); - if (str && str[0]) - i = appendEscapedSubstring(str, i); + sub = readMacroArg(c); + if (sub) { + appendEscapedSubstring(sub); + str_Unref(sub); + if (!yylval.string) + return; + } continue; // Do not copy an additional character case EOF: // Can't really print that one @@ -1759,30 +1782,28 @@ static size_t appendStringLiteral(size_t i) // We'll be exiting the string scope, so re-enable expansions // (Not interpolations, since they're handled by the function itself...) lexerState->disableMacroArgs = false; - str = readInterpolation(0); - if (str && str[0]) - i = appendEscapedSubstring(str, i); + sub = readInterpolation(0); + if (sub) { + appendEscapedSubstring(sub); + str_Unref(sub); + if (!yylval.string) + return; + } lexerState->disableMacroArgs = true; continue; // Do not copy an additional character // Regular characters will just get copied } - append_yylval_string(c); + yyval.string = str_Push(yyval.string, c); + if (!yyval.string) + return; } finish: - if (i == sizeof(yylval.string)) { - i--; - warning(WARNING_LONG_STR, "String constant too long\n"); - } - yylval.string[i] = '\0'; - - dbgPrint("Read string \"%s\"\n", yylval.string); + dbgPrint("Read string \"%" PRI_STR "\"\n", STR_FMT(yylval.string)); lexerState->disableMacroArgs = false; lexerState->disableInterpolation = false; - - return i; } /* Lexer core */ @@ -1905,6 +1926,9 @@ static int yylex_NORMAL(void) return T_COLON; readAnonLabelRef(c); + if (!yylval.symName) + fatalerror("Failed to read anonymous label: %s\n", + strerror(errno)); return T_ANON; /* Handle numbers */ @@ -1957,6 +1981,8 @@ static int yylex_NORMAL(void) case '"': readString(); + if (!yylval.string) + fatalerror("Failed to read string: %s\n", strerror(errno)); return T_STRING; /* Handle newlines and EOF */ @@ -2030,13 +2056,15 @@ static int yylex_RAW(void) dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n", lexer_GetLineNo(), lexer_GetColNo()); - /* This is essentially a modified `appendStringLiteral` */ + yylval.string = str_New(0); + if (!yylval.string) + fatalerror("Failed to alloc raw mode string: %s\n", strerror(errno)); + size_t parenDepth = 0; - size_t i = 0; int c; /* Trim left whitespace (stops at a block comment or line continuation) */ - while (isWhitespace(peek())) + while (str_IsWhitespace(peek())) shiftChar(); for (;;) { @@ -2045,7 +2073,7 @@ static int yylex_RAW(void) switch (c) { case '"': /* String literals inside macro args */ shiftChar(); - i = appendStringLiteral(i); + appendStringLiteral(); break; case ';': /* Comments inside macro args */ @@ -2064,7 +2092,7 @@ static int yylex_RAW(void) discardBlockComment(); continue; } - append_yylval_string(c); /* Append the slash */ + yylval.string = str_Push(yylval.string, c); break; case ',': /* End of macro arg */ @@ -2130,23 +2158,18 @@ static int yylex_RAW(void) default: /* Regular characters will just get copied */ append: - append_yylval_string(c); + yylval.string = str_Push(yylval.string, c); shiftChar(); break; } } finish: - if (i == sizeof(yylval.string)) { - i--; - warning(WARNING_LONG_STR, "Macro argument too long\n"); - } /* Trim right whitespace */ - while (i && isWhitespace(yylval.string[i - 1])) - i--; - yylval.string[i] = '\0'; + for (size_t n = str_Len(yylval.string); n && str_IsWhitespace(str_Index(yylval.string, n - 1));) + str_Trunc(yylval.string, --n); - dbgPrint("Read raw string \"%s\"\n", yylval.string); + dbgPrint("Read raw string \"%" PRI_STR "\"\n", STR_FMT(yylval.string)); // Returning T_COMMAs to the parser would mean that two consecutive commas // (i.e. an empty argument) need to return two different tokens (T_STRING @@ -2176,8 +2199,6 @@ static int yylex_RAW(void) return T_EOF; } -#undef append_yylval_string - /* * This function uses the fact that `if`, etc. constructs are only valid when * there's nothing before them on their lines. This enables filtering @@ -2203,7 +2224,7 @@ static int skipIfBlock(bool toEndc) for (;;) { c = peek(); - if (!isWhitespace(c)) + if (!str_IsWhitespace(c)) break; shiftChar(); } @@ -2298,7 +2319,7 @@ static int yylex_SKIP_TO_ENDR(void) for (;;) { c = peek(); - if (!isWhitespace(c)) + if (!str_IsWhitespace(c)) break; shiftChar(); } @@ -2436,7 +2457,7 @@ bool lexer_CaptureRept(struct CaptureBody *capture) /* We're at line start, so attempt to match a `REPT` or `ENDR` token */ do { /* Discard initial whitespace */ c = nextChar(); - } while (isWhitespace(c)); + } while (str_IsWhitespace(c)); /* Now, try to match `REPT`, `FOR` or `ENDR` as a **whole** identifier */ if (startsIdentifier(c)) { switch (readIdentifier(c)) { @@ -2510,7 +2531,7 @@ bool lexer_CaptureMacroBody(struct CaptureBody *capture) /* We're at line start, so attempt to match an `ENDM` token */ do { /* Discard initial whitespace */ c = nextChar(); - } while (isWhitespace(c)); + } while (str_IsWhitespace(c)); /* Now, try to match `ENDM` as a **whole** identifier */ if (startsIdentifier(c)) { switch (readIdentifier(c)) { diff --git a/src/asm/parser.y b/src/asm/parser.y index fe69ab4bfc..730a901f92 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -282,69 +282,76 @@ static size_t nextStrFmtArgListIndex(struct StrFmtArgList *args) static void freeStrFmtArgList(struct StrFmtArgList *args) { - free(args->format); + str_Unref(args->format); for (size_t i = 0; i < args->nbArgs; i++) if (!args->args[i].isNumeric) - free(args->args[i].string); + str_Unref(args->args[i].string); free(args->args); } -static void strfmt(char *dest, size_t destLen, char const *fmt, size_t nbArgs, struct StrFmtArg *args) +static void strfmt(struct String const *fmt, size_t nbArgs, struct StrFmtArg *args) { size_t a = 0; - size_t i = 0; + size_t n = str_Len(fmt); + struct String *str = str_New(0); + + if (!str) + return NULL; - while (i < destLen) { - int c = *fmt++; + for (size_t i = 0; i < n;) { + char c = str_Index(fmt, i++); - if (c == '\0') { - break; - } else if (c != '%') { - dest[i++] = c; + if (c != '%') { + str = str_Push(str, c); continue; } - c = *fmt++; + if (i == n) { + error("STRFMT: Illegal '%%' at end of format string\n"); + str = str_Push(str, '%'); + break; + } + + c = str_Index(fmt, i++); if (c == '%') { - dest[i++] = c; + str = str_Push(str, '%'); continue; } struct FormatSpec spec = fmt_NewSpec(); - while (c != '\0') { + while (i < n) { fmt_UseCharacter(&spec, c); if (fmt_IsFinished(&spec)) break; - c = *fmt++; + c = str_Index(fmt, i++); } - if (fmt_IsEmpty(&spec)) { - error("STRFMT: Illegal '%%' at end of format string\n"); - dest[i++] = '%'; - break; - } else if (!fmt_IsValid(&spec)) { + if (!fmt_IsValid(&spec)) { error("STRFMT: Invalid format spec for argument %zu\n", a + 1); - dest[i++] = '%'; + str = str_Push(str, '%'); a++; continue; } else if (a >= nbArgs) { // Will warn after formatting is done. - dest[i++] = '%'; + str = str_Push(str, '%'); a++; continue; } struct StrFmtArg *arg = &args[a++]; - static char buf[MAXSTRLEN + 1]; + struct String *argStr; - if (arg->isNumeric) - fmt_PrintNumber(buf, sizeof(buf), &spec, arg->number); - else - fmt_PrintString(buf, sizeof(buf), &spec, arg->string); + if (arg->isNumeric) { + argStr = fmt_PrintNumber(&spec, arg->number); + } else { + argStr = fmt_PrintString(&spec, arg->string); + str_Unref(arg->string); + } - i += snprintf(&dest[i], destLen - i, "%s", buf); + str = str_Append(str, argStr); + str_Unref(argStr); } if (a < nbArgs) @@ -352,11 +359,7 @@ static void strfmt(char *dest, size_t destLen, char const *fmt, size_t nbArgs, s else if (a > nbArgs) error("STRFMT: Not enough arguments for format spec, got: %zu, need: %zu\n", nbArgs, a); - if (i > destLen - 1) { - warning(WARNING_LONG_STR, "STRFMT: String too long, got truncated\n"); - i = destLen - 1; - } - dest[i] = '\0'; + return str; } static void initDsArgList(struct DsArgList *args) @@ -459,8 +462,8 @@ enum { %union { - char symName[MAXSYMLEN + 1]; - char string[MAXSTRLEN + 1]; + struct String *symName; + struct String *string; struct Expression expr; int32_t constValue; enum SectionModifier sectMod; @@ -1614,7 +1617,7 @@ strcat_args : string ; strfmt_args : string strfmt_va_args { - $$.format = strdup($1); + $$.format = $1; // Take ownership of the string $$.capacity = $2.capacity; $$.nbArgs = $2.nbArgs; $$.args = $2.args; @@ -1634,7 +1637,7 @@ strfmt_va_args : %empty { | strfmt_va_args T_COMMA string { size_t i = nextStrFmtArgListIndex(&$1); - $1.args[i].string = strdup($3); + $1.args[i].string = $3; // Take ownership of the string $1.args[i].isNumeric = false; $$ = $1; } diff --git a/src/asm/section.c b/src/asm/section.c index c9cf015f2c..b38b935c2b 100644 --- a/src/asm/section.c +++ b/src/asm/section.c @@ -1,3 +1,10 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 1997-2021, Carsten Sorensen and RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ #include #include diff --git a/src/asm/string.c b/src/asm/string.c new file mode 100644 index 0000000000..86d99bd3ec --- /dev/null +++ b/src/asm/string.c @@ -0,0 +1,154 @@ +/* + * This file is part of RGBDS. + * + * Copyright (c) 2021, Eldred Habert and RGBDS contributors. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "asm/string.h" + +#include "helpers.h" + +// A ref-counted string +struct String { + size_t refs; + size_t capacity; + size_t len; + char chars[]; +}; + +size_t str_Len(struct String const *str) attr_(pure) +{ + return str->len; +} + +void str_Trunc(struct String *str, size_t len) +{ + assert(len <= str->len); + + str->len = len; +} + +char str_Index(struct String const *str, size_t i) attr_(pure) +{ + return str->chars[i]; +} + +bool str_Find(struct String const *str, char c) attr_(pure) +{ + for (size_t i = 0; i < str->len; i++) { + if (str->chars[i] == c) + return true; + } + + return false; +} + +char const *str_Chars(struct String const *str) attr_(pure) +{ + return str->chars; +} + +struct String *str_New(size_t capacity) attr_(malloc) +{ + if (capacity == 0) + capacity = 32; + + struct String *str = malloc(sizeof(*str) + capacity); + + if (!str) + return NULL; + + str->refs = 1; + str->capacity = capacity; + str->len = 0; + return str; +} + +void str_Ref(struct String *str) +{ + assert(str->refs < SIZE_MAX); + + str->refs++; +} + +void str_Unref(struct String *str) +{ + assert(str->refs > 0); + + str->refs--; + + if (!str->refs) + free(str); +} + +static bool doubleCapacity(struct String *str) attr_(warn_unused_result) +{ + assert(str->capacity > 0); + + if (str->capacity == SIZE_MAX) { + errno = ERANGE; + return false; + } else if (str->capacity > SIZE_MAX / 2) { + str->capacity = SIZE_MAX; + } else { + str->capacity *= 2; + } + + return true; +} + +struct String *str_Push(struct String *str, char c) attr_(warn_unused_result) +{ + assert(str->len <= str->capacity); + + if (str->len == str->capacity) { + if (!doubleCapacity(str)) + return NULL; + str = realloc(str, str->capacity); + if (!str) + return NULL; + } + + str->chars[str->len++] = c; + return str; +} + +struct String *str_AppendSlice(struct String *lhs, char const *rhs, size_t len) + attr_(warn_unused_result) +{ + assert(lhs->len <= lhs->capacity); + + // Avoid overflow + if (lhs->len > INT_MAX - len) { + errno = ERANGE; + return NULL; + } + + // If the combined len is larger than the capacity, grow lhs + if (lhs->len + len > lhs->capacity) { + if (!doubleCapacity(lhs)) + return NULL; + if (lhs->capacity < lhs->len) + lhs->capacity = lhs->len; + lhs = realloc(lhs, lhs->capacity); + if (!lhs) + return NULL; + } + + // Copy rhs + memcpy(&lhs->chars[lhs->len], rhs, len); + lhs->len += len; + + return lhs; +} + +struct String *str_Append(struct String *lhs, struct String const *rhs) attr_(warn_unused_result) +{ + return str_AppendSlice(lhs, rhs->chars, rhs->len); +} diff --git a/src/asm/symbol.c b/src/asm/symbol.c index 2d6005b1be..f723c0e194 100644 --- a/src/asm/symbol.c +++ b/src/asm/symbol.c @@ -25,6 +25,7 @@ #include "asm/main.h" #include "asm/output.h" #include "asm/section.h" +#include "asm/string.h" #include "asm/symbol.h" #include "asm/util.h" #include "asm/warning.h" @@ -84,42 +85,34 @@ static int32_t Callback__LINE__(void) return lexer_GetLineNo(); } -static char const *Callback__FILE__(void) +static struct String *Callback__FILE__(void) { - /* - * FIXME: this is dangerous, and here's why this is CURRENTLY okay. It's still bad, fix it. - * There are only two call sites for this; one copies the contents directly, the other is - * EQUS expansions, which cannot straddle file boundaries. So this should be fine. - */ - static char *buf = NULL; - static size_t bufsize = 0; char const *fileName = fstk_GetFileName(); - size_t j = 1; + struct String *str = str_New(0); + + if (!str) + return str; + + str = str_Push(str, '"'); + if (!str) + return str; assert(fileName[0]); /* The assertion above ensures the loop runs at least once */ - for (size_t i = 0; fileName[i]; i++, j++) { - /* Account for the extra backslash inserted below */ - if (fileName[i] == '"') - j++; - /* Ensure there will be enough room; DO NOT PRINT ANYTHING ABOVE THIS!! */ - if (j + 2 >= bufsize) { /* Always keep room for 2 tail chars */ - bufsize = bufsize ? bufsize * 2 : 64; - buf = realloc(buf, bufsize); - if (!buf) - fatalerror("Failed to grow buffer for file name: %s\n", - strerror(errno)); - } + for (size_t i = 0; fileName[i]; i++) { /* Escape quotes, since we're returning a string */ - if (fileName[i] == '"') - buf[j - 1] = '\\'; - buf[j] = fileName[i]; + if (fileName[i] == '"') { + str = str_Push(str, '\\'); + if (!str) + return NULL; + } + str = str_Push(str, fileName[i]); + if (!str) + return str; } - /* Write everything after the loop, to ensure the buffer has been allocated */ - buf[0] = '"'; - buf[j++] = '"'; - buf[j] = '\0'; - return buf; + + str = str_Push(str, '"'); + return str; } static int32_t CallbackPC(void) @@ -179,17 +172,17 @@ static void updateSymbolFilename(struct Symbol *sym) /* * Create a new symbol by name + * @param symName The symbol's name; ownership taken by the returned symbol */ -static struct Symbol *createsymbol(char const *symName) +static struct Symbol *createsymbol(struct String *symName) { struct Symbol *sym = malloc(sizeof(*sym)); if (!sym) - fatalerror("Failed to create symbol '%s': %s\n", symName, strerror(errno)); - - if (snprintf(sym->name, MAXSYMLEN + 1, "%s", symName) > MAXSYMLEN) - warning(WARNING_LONG_STR, "Symbol name is too long: '%s'\n", symName); + fatalerror("Failed to create symbol '%" PRI_STR "': %s\n", + STR_FMT(symName), strerror(errno)); + sym->name = symName; sym->isExported = false; sym->isBuiltin = false; sym->hasCallback = false; @@ -360,20 +353,20 @@ void sym_SetCurrentSymbolScope(char const *newScope) * @param symName The name of the symbol to create * @param numeric If false, the symbol may not have been referenced earlier */ -static struct Symbol *createNonrelocSymbol(char const *symName, bool numeric) +static struct Symbol *createNonrelocSymbol(struct String *symName, bool numeric) { struct Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { sym = createsymbol(symName); } else if (sym_IsDefined(sym)) { - error("'%s' already defined at ", symName); + error("'%" PRI_STR "' already defined at ", STR_FMT(symName)); dumpFilename(sym); putc('\n', stderr); return NULL; // Don't allow overriding the symbol, that'd be bad! } else if (!numeric) { // The symbol has already been referenced, but it's not allowed - error("'%s' already referenced at ", symName); + error("'%" PRI_STR "' already referenced at ", STR_FMT(symName)); dumpFilename(sym); putc('\n', stderr); return NULL; // Don't allow overriding the symbol, that'd be bad! @@ -478,15 +471,15 @@ struct Symbol *sym_RedefString(char const *symName, char const *value) /* * Alter a SET symbol's value */ -struct Symbol *sym_AddSet(char const *symName, int32_t value) +struct Symbol *sym_AddSet(struct String *symName, int32_t value) { struct Symbol *sym = sym_FindExactSymbol(symName); if (!sym) { sym = createsymbol(symName); } else if (sym_IsDefined(sym) && sym->type != SYM_SET) { - error("'%s' already defined as %s at ", - symName, sym->type == SYM_LABEL ? "label" : "constant"); + error("'%" PRI_STR "' already defined as %s at ", + STR_FMT(symName), sym->type == SYM_LABEL ? "label" : "constant"); dumpFilename(sym); putc('\n', stderr); return sym; @@ -577,7 +570,7 @@ struct Symbol *sym_AddLocalLabel(char const *symName) /* * Add a relocatable symbol */ -struct Symbol *sym_AddLabel(char const *symName) +struct Symbol *sym_AddLabel(struct String *symName) { struct Symbol *sym = addLabel(symName); @@ -598,9 +591,12 @@ struct Symbol *sym_AddAnonLabel(void) error("Only %" PRIu32 " anonymous labels can be created!", anonLabelID); return NULL; } - char name[MAXSYMLEN + 1]; - sym_WriteAnonLabelName(name, 0, true); // The direction is important!! + struct String *name = sym_WriteAnonLabelName(name, 0, true); // The direction is important! + + if (!name) + fatalerror("Failed to write anonymous label name: %s\n", strerror(errno)); + anonLabelID++; return addLabel(name); } @@ -608,7 +604,7 @@ struct Symbol *sym_AddAnonLabel(void) /* * Write an anonymous label's name to a buffer */ -void sym_WriteAnonLabelName(char buf[MIN_NB_ELMS(MAXSYMLEN + 1)], uint32_t ofs, bool neg) +struct String *sym_WriteAnonLabelName(uint32_t ofs, bool neg) { uint32_t id = 0; @@ -628,7 +624,22 @@ void sym_WriteAnonLabelName(char buf[MIN_NB_ELMS(MAXSYMLEN + 1)], uint32_t ofs, id = anonLabelID + ofs; } - sprintf(buf, "!%u", id); + struct String *name = str_New(9); + + if (name) { + // Begin the name with a character normally illegal in symbol names + // That way, anonymous label *cannot* be referenced directly + MUTATE_STR(name, str_Push(name, '!')); + for (uint8_t i = 0; i < 8; i++) { + uint32_t shift = 32 - (i + 1) * 4; + uint32_t nybble = (id >> shift) & 0xF; + char c = nybble + (nybble > 9 ? 'a' - 10 : '0'); + + MUTATE_STR(name, str_Push(name, c)); + } + } + + return name; } /* diff --git a/src/asm/warning.c b/src/asm/warning.c index dc2338662e..419e84fbb0 100644 --- a/src/asm/warning.c +++ b/src/asm/warning.c @@ -31,7 +31,6 @@ static enum WarningState const defaultWarnings[NB_WARNINGS] = { [WARNING_EMPTY_MACRO_ARG] = WARNING_DISABLED, [WARNING_EMPTY_STRRPL] = WARNING_DISABLED, [WARNING_LARGE_CONSTANT] = WARNING_DISABLED, - [WARNING_LONG_STR] = WARNING_DISABLED, [WARNING_MACRO_SHIFT] = WARNING_DISABLED, [WARNING_NESTED_COMMENT] = WARNING_ENABLED, [WARNING_OBSOLETE] = WARNING_ENABLED, @@ -74,7 +73,6 @@ static char const *warningFlags[NB_WARNINGS_ALL] = { "empty-macro-arg", "empty-strrpl", "large-constant", - "long-string", "macro-shift", "nested-comment", "obsolete",