-
Notifications
You must be signed in to change notification settings - Fork 53
/
lex.um
111 lines (89 loc) · 2.11 KB
/
lex.um
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import "std.um"
type (
TokenKind* = enum {
Null
Atom
LPar
RPar
}
Token* = struct {
kind: TokenKind
name: str
val: int
}
Lexer* = struct {
buf: str
pos, size: int
ch: char
tok: Token
}
)
spelling := [4]str{
"nothing",
"atom",
"(",
")"
}
fn (l: ^Lexer) open*(buf: str) {
l.buf = buf
l.pos = 0
l.size = len(l.buf)
l.ch = ' '
}
fn (l: ^Lexer) getch(): char {
if l.pos >= l.size {return '\0'}
c := l.buf[l.pos]
l.pos++
return c
}
fn (l: ^Lexer) next*() {
const letter = fn(c: char): bool {return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'}
const digit = fn(c: char): bool {return c >= '0' && c <= '9'}
const space = fn(c: char): bool {return c == ' ' || c == '\t' || c == '\n'}
ch := l.ch
l.tok = Token{.Null, "", 0}
// Skip spaces
for space(ch) {
ch = l.getch()
}
// Read string atom
if letter(ch) {
l.tok.kind = .Atom
l.tok.name = ""
for letter(ch) || digit(ch) {
l.tok.name += ch
ch = l.getch()
}
// Read number
} else if ch == '+' || ch == '-' || digit(ch) {
l.tok.kind = .Atom
l.tok.name = "<number>"
s := ""
if ch == '+' || ch == '-' {
s = ch
ch = l.getch()
}
for digit(ch) {
s += ch
ch = l.getch()
}
l.tok.val = std::atoi(s)
// Read parentheses
} else if ch == '(' || ch == ')' {
if ch == '(' {l.tok.kind = .LPar} else {l.tok.kind = .RPar}
l.tok.name = ch
ch = l.getch()
} else if ch != '\0' {
exit(2, "Illegal character " + ch + " (" + std::itoa(int(ch)) + ")")
}
l.ch = ch
}
fn (l: ^Lexer) check*(kind: TokenKind) {
if l.tok.kind != kind {
exit(2, spelling[int(kind)] + " expected but " + spelling[int(l.tok.kind)] + " found")
}
}
fn (l: ^Lexer) eat*(kind: TokenKind) {
l.check(kind)
l.next()
}