-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathm100-jumps.lex
258 lines (207 loc) · 6.22 KB
/
m100-jumps.lex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/* m100-jumps.lex
*
* given a M100 BASIC file, output all the line numbers that consist
* of only a comment (for example, "10 REM FOO") and are referenced by
* any part of the program (for example, "20 GOTO 10").
*
* All comments can be removed from a program _except_ those lines.
* Compile with: flex m100-jumps.lex && gcc yy.lex.c
*
* Side note: Technically, "jumps" is misnamed. Although the vast
* majority of references will be via GOTO and GOSUB, it is possible
* to refer to a line number using RESTORE, ERL, LIST, and EDIT.
* For best results, sanitize the source code first using m100-sanity
* which prevents an obscure and highly improbable condition where the
* BASIC program source code might have two lines with the same line
* number like so:
222 PRINT "The next line 222 replaces this one.": GOTO 222
222 REM A decommenter should keep neither line 222, despite the GOTO.
*/
%option warn
%option case-insensitive
%x string
%x remark
%x data
%x datastring
LINENUM [0-9]+
LINELIST ([ \t,]*{LINENUM})+
LINERANGE {LINENUM}?[ \t]*-[ \t]*{LINENUM}?
#include <string.h>
#include <ctype.h>
int parse_linenumber(char *);
int parse_linelist(char *);
int parse_linerange(char *);
int parse_erl(char *);
void insert(int set[], int n);
void print_set(int set[]);
void print_intersection(int seta[], int setb[]);
/* An array to insert line numbers as a sorted & unique set. */
/* First entry, jumps[0], is length of array. */
int jumps[65537] = {0,};
/* A set to store lines which contain only a REM statement */
int remset[65537] = {0, };
/* Insert a number into the set, if it isn't already there. */
/* Minor optimization: start at the end of the array since it is sorted. */
void insert(int set[], int n) {
int i, len=set[0];
for (i=len; i>0; i--) {
if (set[i] == n) return;
if (set[i] < n) break;
}
i++;
memmove(set+i+1, set+i, len*sizeof(set[0]));
set[i] = n;
set[0]++;
}
%%
/* A line which starts with REM or ' should be noted */
^{LINENUM}[ \t:]*([']|REM) {
insert(remset, atoi(yytext));
BEGIN(remark);
}
/* Skip over remarks, strings, and data statements */
(REM|\') BEGIN(remark);
\" BEGIN(string);
<string>\" BEGIN(INITIAL);
DATA BEGIN(data);
<data>\" BEGIN(datastring);
<datastring>\" BEGIN(data);
<data>: BEGIN(INITIAL);
/* Newline ends <string>, <remark>, and <data> conditions. */
<*>\r?\n BEGIN(INITIAL);
/* GOTO & GOSUB take a line number list:
Need to handle commas, eg
ON var GOTO 10, 20, 30.
Q: Can list be empty? E.g., ON KEY GOSUB ,,,, ?
*/
(GO[ \t]*TO|GOSUB)([ \t,]*[0-9]+)+ parse_linelist(yytext);
RESTORE[ \t]*{LINENUM} parse_linenumber(yytext);
RESUME[ \t]*{LINENUM} parse_linenumber(yytext);
RUN[ \t]*{LINENUM} parse_linenumber(yytext);
THEN[ \t]*{LINENUM} parse_linenumber(yytext);
ELSE[ \t]*{LINENUM} parse_linenumber(yytext);
/* LIST and EDIT take a line number range:
Must handle dash. The numbers do not need to refer to actual lines.
LIST -300
EDIT 99-201
LLIST 9000-
*/
LIST[ \t]*{LINERANGE} parse_linerange(yytext);
LLIST[ \t]*{LINERANGE} parse_linerange(yytext);
EDIT[ \t]*{LINERANGE} parse_linerange(yytext);
/* ERL is a variable used to compare against a line number */
ERL[ \t]*[\<=\>]+[ \t]*{LINENUM} parse_erl(yytext);
{LINENUM}[ \t]*[\<=\>]+[ \t]*ERL parse_erl(yytext);
/* Delete all else */
<*>.|\r|\n ;
%%
int parse_linenumber(char *p) { /* Example input: "RESTORE 1000" */
while (*p && !isdigit(*p)) /* Skip over BASIC keyword */
p++;
if (p && *p) {
int n = atoi(p);
insert(jumps, n);
}
return 0;
}
int parse_linelist(char *linelist) {
/* Skip over "GO TO" or "GOSUB" */
while (*linelist && !isdigit(*linelist))
linelist++;
char *p = strtok(linelist, " \t,");
while (p && *p) {
int n = atoi(p);
insert(jumps, n);
p = strtok(NULL, " \t,");
}
return 0;
}
int parse_linerange(char *linerange) {
/* Skip over "LLIST" or "EDIT" */
while (*linerange && !isdigit(*linerange) && *linerange != '-')
linerange++;
char *p = strstr(linerange, "-");
if (p == NULL) return -1;
*p++ = '\0';
int n;
if (*linerange) {
n = atoi(linerange);
insert(jumps, n);
}
if (*p) {
n = atoi(p);
insert(jumps, n);
}
return 0;
}
int parse_erl(char *comparison) {
/* This is not actually necessary for use by remove_comments
as there is no case where a line referred to by ERL might
be removed as it is only a comment. (REM statements cannot
cause errors!) */
/* ERL <=> n */
char *p = strstr(comparison, "ERL");
if (p == NULL) return -1;
while (*p && !isdigit(*p)) {
p++;
}
if ( isdigit(*p) ) {
insert( jumps, atoi(p) );
return 0;
}
/* n <=> ERL */
p = strstr(comparison, "ERL");
while ( p != comparison && !isdigit(*p) && !isalpha(*p) )
p--;
if ( isdigit(*p) ) {
while ( p != comparison && isdigit(*(p-1)) )
p--;
insert( jumps, atoi(p) );
return 0;
}
return -1;
}
void print_set(int set[]) {
for (int j=1; j<=set[0]; j++) {
printf("%6d\n", set[j]);
}
}
void print_intersection(int a[], int b[]) {
int alen=a[0], blen=b[0];
int i=1, j=1;
while( (i <= alen) && (j <= blen) ) {
if ( a[i] == b[j] ) {
printf(" %d", a[i]);
i++; j++;
}
else if ( a[i] > b[j] ) { j++; }
else { i++; }
}
if (i>1 || j>1) printf("\n");
}
int main(int argc, char *argv[]) {
++argv, --argc; /* skip over program name */
/* -j flags shows all jumps, not just purely commented out lines */
int printalljumps = 0;
if (argc>0 && argv[0][0]=='-' && argv[0][1]=='j') {
printalljumps = 1;
++argv, --argc;
}
/* First arg (if any) is input file name */
yyin = (argc>0) ? fopen( argv[0], "r" ) : stdin;
if (yyin == NULL) { perror(argv[0]); exit(1); }
/* Second arg (if any) is output file name */
++argv, --argc;
yyout = (argc>0) ? fopen( argv[0], "w+" ) : stdout;
if (yyout == NULL) { perror(argv[0]); exit(1); }
while (yylex())
;
if (printalljumps)
print_set(jumps); /* -j flag */
else
print_intersection(jumps, remset); /* Default */
return 0;
}
int yywrap() {
return 1; /* Always only read one file */
}