-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScanner.java
248 lines (210 loc) · 7.71 KB
/
Scanner.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package edu.ntu.compilers.lab4.scanner;
import edu.ntu.compilers.lab4.scanner.fa.DFABuilder;
import edu.ntu.compilers.lab4.scanner.fa.DFA;
import edu.ntu.compilers.lab4.scanner.fa.DFAState;
import edu.ntu.compilers.lab4.tokens.TokenName;
import edu.ntu.compilers.lab4.tokens.Token;
import edu.ntu.compilers.lab4.tokens.TokenDescriptor;
import edu.ntu.compilers.lab4.util.FormatHelper;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: Domi
*/
public class Scanner<TN extends TokenName> /* implements Iterable<Token> */ {
public final String FileName;
Reader reader;
int lastChr;
int currentLine = 1, currentCol = 1;
StringBuilder partialTokenContent = new StringBuilder(256);
DFA automaton;
DFAState currentState;
Token lastToken;
Token currentToken;
public Scanner(String fileName, TN[] allTokenNames) throws Exception {
FileName = fileName;
File file = new File(FileName);
if (!file.exists()) {
throw new FileNotFoundException("Invalid FileName: " + FileName);
}
lastToken = currentToken = Token.StartToken;
reader = new FileReader(file);
automaton = DFABuilder.buildTokenDFA(Arrays.asList(allTokenNames));
currentState = automaton.StartState;
}
public int getLineNo() { return currentLine; }
public int getColumnNo() { return currentCol; }
public Token current() {
return currentToken;
}
private int read() {
try {
int chr = reader.read();
// we support all 3 kinds of line endings (CR, LF, CRLF):
if (lastChr != '\r' || chr != '\n') { // CRLF -> do nothing, since we already accounted for the line ending
if (chr == '\n' || chr == '\r') {
// new line
currentLine++;
currentCol = 1;
}
else {
// old line
++currentCol;
}
}
return chr;
}
catch (IOException e) {
throw new ScannerException("Unable to read from file: " + e.getMessage(), lastToken);
}
}
/**
* @return The next Token that is not a whitespace or comment
*/
public Token nextSyntaxElement() {
next();
skipUntilNextSyntaxElement();
return current();
}
/**
* @return The next Token, where whitespace and comment tokens are also emitted.
*/
public Token next() {
if (currentToken != null && currentToken.isSyntaxElement()) {
lastToken = currentToken;
}
currentToken = null;
int chr;
try {
do {
chr = read();
processChr(chr);
}
while (currentToken == null && chr != -1);
if (currentToken == null) {
// read till EOF
currentToken = Token.FinalToken;
}
}
catch (Exception e) {
// TODO: Error recovery
throw new ScannerException("Error while scanning (at %d,%d): " + e.getMessage(), currentLine, currentCol);
}
return currentToken;
}
private void processChr(int chr) {
DFAState newState = currentState.getNeighbor(chr);
if (newState != null) {
// transition exists from currentState
currentState = newState;
partialTokenContent.append((char)chr); // append all characters - TokenCategory decides how to evaluate it
}
else if (currentState.isFinalState()) {
// transition does not exist but we are in final state -> emit the read token
emitToken(chr);
}
else if (chr != -1) {
// no transition and not in final state -> character is invalid in context
if (currentState.isStartState()) {
// outside of token
throw new ScannerException("Invalid character \"%s\", right after \"%s\"",
FormatHelper.toDebuggableString((char) chr), lastToken);
}
else {
// inside of token
throw new ScannerException("Invalid character \"%s\" in token, after \"%s\"",
FormatHelper.toDebuggableString((char) chr), lastToken);
}
}
lastChr = chr;
}
private void emitToken(int currentChr) {
TN name = (TN)currentState.getHighestPrecedenceToken();
String content = partialTokenContent.toString();
partialTokenContent.delete(0, partialTokenContent.length()); // clear buffer
TokenDescriptor.SubScanner scanner = name.descriptor().createSubScanner(content);
if (scanner != null) {
boolean scanned;
int chr;
scanner.scan(currentChr);
do {
chr = read();
}
while((scanned = scanner.scan(chr)) && chr != -1);
if (scanned) {
// file ended before scanning finished
throw new ScannerException("Unterminated \"%s\"-token at end of file", name.name());
}
currentChr = chr;
content = scanner.getCurrentString();
// go back to StartState
currentState = automaton.StartState; // go back to StartState
}
else {
currentState = automaton.StartState; // go back to StartState
}
// process the first character of the new token and return
// keep in mind that this will not emit another token,
// since one needs to look at, at least, two characters to decide whether a token should be emitted or not
processChr(currentChr);
currentToken = name.emitter().emitToken(content); // create token
// if (currentToken.Category.Code > -1)
// System.out.println(FormatHelper.toDebuggableString(currentToken.toString()));
}
// Utility methods
public boolean isName(TN name) {
return current().Name == name;
}
/**
* Ignores all current and following white space tokens
*/
public void skipUntilNextSyntaxElement() {
while (!current().isSyntaxElement()) {
next();
}
}
public boolean skipAny(TN name) {
boolean skipped = false;
while (next().Name == name)
skipped = true;
return skipped;
}
public void skipCurrent(TN name) {
if (current().Name == name) {
nextSyntaxElement();
}
else {
expectationNotMet(name);
}
}
/**
* Returns whether the token was skipped
*/
public boolean skipCurrentOptional(TN name) {
if (current().Name == name) {
nextSyntaxElement();
return true;
}
return false;
}
public Token getCurrentAndMoveOn() {
Token current = current();
nextSyntaxElement();
return current;
}
/**
* Returns the next token if it has the given name or, else, throws an Exception
*/
public Token getAndMoveOn(TN name) {
skipUntilNextSyntaxElement();
if (current().Name == name) {
return getCurrentAndMoveOn();
}
expectationNotMet(name);
return null;
}
protected void expectationNotMet(TN... name) {
throw new ScannerException("Expected: %s - Found: %s", Arrays.asList(name), current().Name);
}
}