-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.cpp
78 lines (65 loc) · 1.78 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include "lexer.h"
#include "rexparser.h"
#include "dfa.h"
lexer::lexer(machine &mac) : ttab(mac) {
line = 1;
col = 0;
}
lexer::token lexer::next_token(std::istream &ifs) {
std::ostringstream accum_ss;
std::ostringstream token_ss;
std::string token_class = ERROR_CLASS;
if (!ifs || ifs.eof())
return lexer::token("", EOF_MARK, line, col);
std::string str;
char c = ifs.peek();
while (isspace((char) ifs.peek())) {
if(ifs.peek() == '\n') {
line++;
col = 0;
} else {
col++;
}
ifs.get(c);
}
if (ifs.eof())
return lexer::token("", EOF_MARK, line, col);
// Recognize when starting state is also a final state
if (ttab.is_accepting())
token_class = ttab.get_token_class();
while (ifs.peek()) {
c = ifs.peek();
sid_t st = ttab.move(c);
if (st == -1) // reached a deadend state
break;
ifs.get(c);
col++;
accum_ss << c;
if (ttab.is_accepting()) {
// append accum_ss to token_ss, clear accum_ss afterwards
token_ss << accum_ss.str() ;
accum_ss.str(""); accum_ss.clear();
token_class = ttab.get_token_class();
}
}
if (token_class == ERROR_CLASS) {
ifs.get(c);
token_ss << c;
col++;
}
ttab.reset();
return lexer::token(token_class.empty() ? accum_ss.str() : token_ss.str(),
token_class, line, col);
}
std::ostream &operator <<(std::ostream &os, lexer::token &tok) {
os << std::left
<< tok.get_line()
<< ":"
<< tok.get_col()
<< ":"
<< "\t"
<< tok.get_str()
<< "\t"
<< tok.get_class();
return os;
}