From 793d3b8de7e5f7a2a4cdafee3aafbb4dbca49bc2 Mon Sep 17 00:00:00 2001 From: shenao78 Date: Fri, 6 Aug 2021 16:24:44 +0800 Subject: [PATCH 1/4] support string literal --- Makefile | 4 +++- compiler/create.c | 7 +++++++ compiler/grammar.y | 8 +++++--- compiler/lex.l | 49 +++++++++++++++++++++++++++++++++++++++----- compiler/string.c | 44 +++++++++++++++++++++++++++++++++++++++ compiler/summoner.h | 8 ++++++++ interpreter/eval.cpp | 7 +++++++ interpreter/eval.h | 2 ++ 8 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 compiler/string.c diff --git a/Makefile b/Makefile index 44a2bd0..16e960f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -output: main.o lex.o grammar.o create.o compiler.o codegen.o opcode.o +output: main.o lex.o grammar.o create.o string.o compiler.o codegen.o opcode.o $(CC) -o bin/main $^ main.o : main/main.c $(CC) -c main/main.c @@ -16,6 +16,8 @@ grammar.o : compiler/grammar.y lex.o : compiler/lex.l grammar.o lex $< $(CC) -c lex.yy.c -o lex.o +string.o : compiler/string.c + $(CC) -c $< -o string.o clean: rm y.tab.* diff --git a/compiler/create.c b/compiler/create.c index 28688fa..44b503d 100644 --- a/compiler/create.c +++ b/compiler/create.c @@ -37,6 +37,13 @@ Expression *alloc_bool_expression(bool value) return expr; } +Expression *alloc_string_expression(char *value) +{ + Expression *expr = alloc_expression(STRING_EXPRESSION); + expr->u.str_value = value; + return expr; +} + Expression *alloc_unary_expression(ExpressionKind kind, Expression *unaryExpr) { Expression *expr = alloc_expression(kind); diff --git a/compiler/grammar.y b/compiler/grammar.y index 8bc4326..785e399 100644 --- a/compiler/grammar.y +++ b/compiler/grammar.y @@ -9,6 +9,7 @@ int yyerror(const char *s); %union { char *identifier; + char *str_value; double double_value; int int_value; struct Expression* expression; @@ -24,10 +25,10 @@ int yyerror(const char *s); struct ArgumentList *argument_list; } -%token BOOL_LITERAL %token DOUBLE_LITERAL -%token INT_LITERAL -%token IDENTIFIER; +%token INT_LITERAL BOOL_LITERAL +%token STRING_LITERAL +%token IDENTIFIER %token VAR CONST FUNCTION IF ELSE FOR RETURN BREAK CONTINUE NIL %token BOOL_T INT_T DOUBLE_T STRING_T @@ -183,6 +184,7 @@ literal: INT_LITERAL { $$ = alloc_int_expression($1); } | DOUBLE_LITERAL { $$ = alloc_double_expression($1); } | BOOL_LITERAL { $$ = alloc_bool_expression($1); } + | STRING_LITERAL { $$ = alloc_string_expression($1); } ; bool_expr: diff --git a/compiler/lex.l b/compiler/lex.l index 83b86d1..d25190a 100644 --- a/compiler/lex.l +++ b/compiler/lex.l @@ -2,15 +2,57 @@ #include #include #include "y.tab.h" +#include "compiler/summoner.h" + +void lex_err(char *str) { + fprintf(stderr, "lexical error:%s\n", str); + exit(1); +} %} -%x COMMENT +%x COMMENT STRING_STATE STRING_ESCAPE RAW_STRING_STATE %% "//" BEGIN(COMMENT); \n { BEGIN(0); return '\n'; } . ; // eat anything in comment +` { + open_string_literal(); + BEGIN(RAW_STRING_STATE); +} +` { + yylval.str_value = close_string_literal(); + BEGIN(0); + return STRING_LITERAL; +} +\n add_string_literal('\n'); +. add_string_literal(yytext[0]); + +\" { + open_string_literal(); + BEGIN(STRING_STATE); +} + +\" { + yylval.str_value = close_string_literal(); + BEGIN(0); + return STRING_LITERAL; +} +\\ BEGIN(STRING_ESCAPE); +\n lex_err("new line exist in interpreted string literals"); +. add_string_literal(yytext[0]); + +\" { add_string_literal('"'); BEGIN(STRING_STATE); } +b { add_string_literal('\b'); BEGIN(STRING_STATE); } +f { add_string_literal('\f'); BEGIN(STRING_STATE); } +n { add_string_literal('\n'); BEGIN(STRING_STATE); } +r { add_string_literal('\r'); BEGIN(STRING_STATE); } +t { add_string_literal('\t'); BEGIN(STRING_STATE); } +v { add_string_literal('\v'); BEGIN(STRING_STATE); } +\\ { add_string_literal('\\'); BEGIN(STRING_STATE); } +. lex_err("unknow escape"); + [+\-*/\(\)<>!{}\n=,] { return *yytext; } @@ -65,10 +107,7 @@ [ \t] ; -. { - fprintf(stderr, "lexical error:%s\n", yytext); - exit(1); -} +. { lex_err(yytext); } %% int yywrap(void) { diff --git a/compiler/string.c b/compiler/string.c new file mode 100644 index 0000000..843c345 --- /dev/null +++ b/compiler/string.c @@ -0,0 +1,44 @@ + +#include +#include +#include + +const int STRING_ALLOC_SIZE = 128; + +static char *st_string_literal_buffer = NULL; +static int st_string_literal_buffer_size = 0; +static int st_string_literal_buffer_alloc_size = 0; + +void open_string_literal(void) +{ + st_string_literal_buffer_size = 0; +} + +void add_string_literal(int letter) +{ + if (st_string_literal_buffer_size == st_string_literal_buffer_alloc_size) + { + st_string_literal_buffer_alloc_size += STRING_ALLOC_SIZE; + st_string_literal_buffer = (char *) realloc(st_string_literal_buffer, + st_string_literal_buffer_alloc_size); + } + st_string_literal_buffer[st_string_literal_buffer_size] = letter; + st_string_literal_buffer_size++; +} + +void reset_string_literal_buffer() +{ + free(st_string_literal_buffer); + st_string_literal_buffer = NULL; + st_string_literal_buffer_size = 0; + st_string_literal_buffer_alloc_size = 0; +} + +char *close_string_literal(void) +{ + char *p = (char *) malloc(st_string_literal_buffer_size + 1); + strcpy(p, st_string_literal_buffer); + p[st_string_literal_buffer_size] = '\0'; + reset_string_literal_buffer(); + return p; +} diff --git a/compiler/summoner.h b/compiler/summoner.h index 652a479..2160d00 100644 --- a/compiler/summoner.h +++ b/compiler/summoner.h @@ -9,6 +9,7 @@ typedef enum BOOL_EXPRESSION = 1, INT_EXPRESSION, DOUBLE_EXPRESSION, + STRING_EXPRESSION, IDENTIFIER_EXPRESSION, FUNC_CALL_EXPRESSION, ADD_EXPRESSION, @@ -53,6 +54,7 @@ typedef struct Expression bool boolean_value; int int_value; double double_value; + char *str_value; char *identifier; struct BinaryExpression *binary_expression; struct Expression *unary_expression; @@ -83,6 +85,7 @@ Expression *alloc_expression(ExpressionKind kind); Expression *alloc_int_expression(int value); Expression *alloc_double_expression(double value); Expression *alloc_bool_expression(bool value); +Expression *alloc_string_expression(char *value); Expression *alloc_identifier_expression(char *identifier); Expression *alloc_unary_expression(ExpressionKind kind, Expression *unaryExpr); Expression *alloc_binary_expression(ExpressionKind kind, Expression *left, Expression *right); @@ -309,4 +312,9 @@ typedef struct SVM_Executable SVM_CodeBlock top_level; } SVM_Executable; +/** string.c */ +void open_string_literal(void); +void add_string_literal(int letter); +char *close_string_literal(void); + #endif diff --git a/interpreter/eval.cpp b/interpreter/eval.cpp index 93d8ed7..8c46e4d 100644 --- a/interpreter/eval.cpp +++ b/interpreter/eval.cpp @@ -174,6 +174,10 @@ ExprValue Interpreter::eval_expression(Expression *expr) v.type = EXPR_DOUBLE_VALUE; v.u.double_value = expr->u.double_value; return v; + case STRING_EXPRESSION: + v.type = EXPR_STRING_VALUE; + v.u.str_value = expr->u.str_value; + return v; case MINUS_EXPRESSION: v = this->eval_expression(expr->u.unary_expression); if (v.type == EXPR_INT_VALUE) @@ -409,6 +413,9 @@ void print_expr_value(ExprValue val) case EXPR_BOOL_VALUE: printf(">>>%s\n", val.u.boolean_value ? "true" : "false"); break; + case EXPR_STRING_VALUE: + printf(">>>%s\n", val.u.str_value); + break; default: printf("invalid expression type when print expr value:%d", val.type); exit(1); diff --git a/interpreter/eval.h b/interpreter/eval.h index 6a1d24d..bf65a1d 100644 --- a/interpreter/eval.h +++ b/interpreter/eval.h @@ -13,6 +13,7 @@ typedef enum EXPR_BOOL_VALUE = 1, EXPR_INT_VALUE, EXPR_DOUBLE_VALUE, + EXPR_STRING_VALUE, } ExprValueType; typedef struct ExprValue @@ -23,6 +24,7 @@ typedef struct ExprValue bool boolean_value; int int_value; double double_value; + char *str_value; } u; } ExprValue; From 54e0bd9995b0faa2ae0ae71f57c0c20c47a01bb8 Mon Sep 17 00:00:00 2001 From: shenao78 Date: Fri, 6 Aug 2021 16:34:53 +0800 Subject: [PATCH 2/4] opt string error --- compiler/lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/lex.l b/compiler/lex.l index d25190a..bedf6a5 100644 --- a/compiler/lex.l +++ b/compiler/lex.l @@ -40,7 +40,7 @@ void lex_err(char *str) { return STRING_LITERAL; } \\ BEGIN(STRING_ESCAPE); -\n lex_err("new line exist in interpreted string literals"); +\n lex_err("new line string"); . add_string_literal(yytext[0]); \" { add_string_literal('"'); BEGIN(STRING_STATE); } From c466afc000012d5b862c447611b6265dc2a88d7a Mon Sep 17 00:00:00 2001 From: shenao78 Date: Fri, 6 Aug 2021 16:35:10 +0800 Subject: [PATCH 3/4] opt string error --- compiler/lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/lex.l b/compiler/lex.l index bedf6a5..b9999b8 100644 --- a/compiler/lex.l +++ b/compiler/lex.l @@ -40,7 +40,7 @@ void lex_err(char *str) { return STRING_LITERAL; } \\ BEGIN(STRING_ESCAPE); -\n lex_err("new line string"); +\n lex_err("new line in string"); . add_string_literal(yytext[0]); \" { add_string_literal('"'); BEGIN(STRING_STATE); } From 0c43ce426a1901ffec352b91add02bd892e1b675 Mon Sep 17 00:00:00 2001 From: shenao78 Date: Fri, 6 Aug 2021 16:37:34 +0800 Subject: [PATCH 4/4] fix make clean --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 16e960f..738306c 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ string.o : compiler/string.c $(CC) -c $< -o string.o clean: - rm y.tab.* - rm lex.yy.c - rm *.o - rm bin/main + rm -f y.tab.* + rm -f lex.yy.c + rm -f *.o + rm -f bin/main