-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.c
188 lines (167 loc) · 3.88 KB
/
parser.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "wannabe-lisp.h"
/*
* Table of abbreviations, as shown in
* r6rs.pdf, section 4.3.5, page 17
*/
struct {
char *shorthand;
char *internal;
} abbrev[] = {
{"'", "quote"},
{"`", "quasiquote"},
{",@", "unquote-splicing"},
{",", "unquote"},
{"#'", "syntax"},
{"#`", "quasisyntax"},
{"#,@", "unsyntax-splicing"},
{"#,", "unsyntax"},
{NULL, NULL}};
/*
* This checks if a character can be reasonably
* part of a symbol-name
*/
int symbol_name_char(char c)
{
return
c != '('
&& c != ')'
&& c != ' '
&& c != '\t'
&& c != '\n';
}
/*
* Recursive parsing function.
* Returns pointer to last-parsed character
* in `expr'. Builds parsed LISP structures
* into the `l' list pointer, which has to
* be allocated prior to calling build().
*/
char* build(list_t* l, char *expr)
{
char *p, *q;
char *old;
char* tok;
int sgn;
list_t* child;
int len;
int i;
tok = malloc(SYMBOL_NAME_MAXLEN);
if (!tok) {
error_msg("malloc failed");
code_error();
}
p = expr;
/*
* Deal with abbreviations
* e.g. 'x => (quote x)
* using the `abbrev' entry {"'", "quote"}
*/
for (i = 0; abbrev[i].shorthand; ++i) {
if (!strncmp(abbrev[i].shorthand, p, strlen(abbrev[i].shorthand))) {
child = new_list();
p = build(child, p + strlen(abbrev[i].shorthand));
add_child(l, mksym(abbrev[i].internal));
add_child(l, child);
l->type = LIST;
goto final;
}
}
/* Parse a list: (... */
if (*p == '(') {
++p; /* eat ( */
l->type = LIST; /* setup list-object */
l->cc = 0; /* initialize child count */
/* ======================================= */
while (*p != ')' && *p) {
/* Consume whitespace */
while (*p && (*p == ' ' || *p == '\t'))
++p;
/* Stop if there's nothing left now */
if (!*p)
break;
/* Try to parse a list-child */
child = new_list();
old = p;
p = build(child, p);
/* Check if list-child-parsing failed */
if (p == old)
break; /* yes; stop */
/* no; add child to the list, and continue */
add_child(l, child);
}
/* ======================================= */
if (*p++ != ')') {
error_msg("Error: ) expected");
code_error();
}
}
/* Parse a number: 123, -123 */
else if (isnum(*p) || (*p == '-' && isalnum(*(p+1)))) {
/*
* Eat up the sign if there is one, and set
* `sgn' accordingly'
*/
if (*p == '-') {
sgn = -1;
++p;
} else {
sgn = 1;
}
/* ======================================= */
q = tok; /* use `tok' heapbuffer as copy dest */
len = 0; /* used to check if symbolname is too long */
while (*p && isnum(*p)) {
*q++ = *p++;
if (++len >= SYMBOL_NAME_MAXLEN) {
error_msg("numeral too long");
code_error();
}
}
*q = 0; /* null-terminate the string */
/* ======================================= */
/* Make the number object */
l->type = NUMBER;
/*
* Don't worry about the sscanf, the code above
* already ensures `tok' does not overflow
* `SYMBOL_NAME_MAXLEN' characters
*/
sscanf(tok, "%d", &(l->val));
l->val *= sgn;
/* Anything else is probably a symbol */
} else {
/*
* Well, maybe there's some whitespace first,
* eat that up first
*/
while (*p == ' ' || *p == '\t')
++p;
/* ======================================= */
q = tok; /* use `tok' heapbuffer as copy dest */
len = 0; /* used to check if symbolname is too long */
while (*p && symbol_name_char(*p)) {
*q++ = *p++;
if (++len >= SYMBOL_NAME_MAXLEN) {
error_msg("symbol name too long");
code_error();
}
}
*q = 0; /* null-terminate the string */
/* ======================================= */
/* Make the symbol object */
l->type = SYMBOL;
strcpy(l->head, tok);
}
/*
* "tok" is a heap buffer, so it must be freed.
* i'm not using a stack one because emscripten
* behaves erratically when these are used.
*/
final:
free(tok);
return p;
}