-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscanner.go
227 lines (195 loc) · 4.74 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// Copyright (c) Liam Stanley <me@liamstanley.io>. All rights reserved. Use
// of this source code is governed by the MIT license that can be found in
// the LICENSE file.
package queryparser
import (
"fmt"
"unicode"
"unicode/utf8"
)
// stateFn represents the state of the scanner as a function that returns the
// next state.
type stateFn func(*scanner) stateFn
type token int
type tokenRef struct {
tok token // Token that represents the literal.
pos int // Position in the input.
lit string // Literal/value of item.
}
const (
tokenEOF token = iota // EOF.
tokenDELIM // :
tokenFIELD // Quoted (with spaces/words) or unquoted (single word).
tokenIDENT // Raw text, an IDENT can also be a WORD.
tokenWS // Whitespaces.
// eof isn't a token, but rather the literal reference to the EOF token.
eof = 1
)
func (i tokenRef) String() string {
if i.tok == tokenEOF {
return "EOF"
}
if len(i.lit) > 10 {
return fmt.Sprintf("%.10q...", i.lit)
}
return fmt.Sprintf("%q", i.lit)
}
// scanner represents a lexical scanner.
type scanner struct {
items chan tokenRef // The channel of scanned items.
input string // The string being scanned.
pos int // Current position in the input.
start int // Start position of the acive item.
width int // Width of last rune read from input.
lastPos int // Position of most recent item returned by nextItem.
}
// newScanner returns a new instance of Scanner. This starts a goroutine. Make sure
// to call drain() on it to ensure it doesn't leak goroutines.
func newScanner(input string) *scanner {
s := &scanner{
input: input,
items: make(chan tokenRef),
}
go s.run()
return s
}
// emit passes a tokenRef back to the client.
func (s *scanner) emit(t token) {
s.items <- tokenRef{t, s.start, s.input[s.start:s.pos]}
s.start = s.pos
}
// nextToken returns the next tokenRef from the input. Called by the parser, not
// in the lexing goroutine.
func (s *scanner) nextToken() tokenRef {
item := <-s.items
s.lastPos = item.pos
return item
}
// run runs the state machine for the lexer.
func (s *scanner) run() {
for state := scanMain; state != nil; {
state = state(s)
}
close(s.items)
}
// drain drains the output so the lexing goroutine will exit. Called by the
// parser, not in the lexing goroutine.
func (s *scanner) drain() {
for range s.items {
}
}
// read reads the next rune from the buffered reader.
// Returns eof if an error occurs (or io.EOF is returned).
func (s *scanner) next() rune {
if s.pos >= len(s.input) {
s.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(s.input[s.pos:])
s.width = w
s.pos += s.width
return r
}
// backup steps back one rune. Can only be called once per call of next.
func (s *scanner) backup() {
s.pos -= s.width
}
// peek steps forward one rune, reads, and backs up again.
func (s *scanner) peek() rune {
r := s.next()
s.backup()
return r
}
func scanMain(s *scanner) stateFn {
switch r := s.next(); {
case r == eof:
s.emit(tokenEOF)
return nil
case isWhitespace(r):
return scanWhitespace
case r == ':':
s.emit(tokenDELIM)
return scanMain
case r == '"':
return scanDoubleQuote
case r == '\'':
return scanSingleQuote
case isWord(r):
return scanWord
}
return nil
}
// scanWhitespace scans a run of space characters. One space has already been
// seen.
func scanWhitespace(s *scanner) stateFn {
for isWhitespace(s.peek()) {
s.next()
}
s.emit(tokenWS)
return scanMain
}
// scanWord scans a run of word characters. One word character has already been
// seen.
func scanWord(s *scanner) stateFn {
for isWord(s.peek()) {
s.next()
}
s.emit(tokenIDENT)
return scanMain
}
// scanSingleQuote scans a quoted string.
func scanSingleQuote(s *scanner) stateFn {
Loop:
for {
switch s.next() {
case '\\':
if r := s.next(); r != eof {
break
}
fallthrough
case eof:
// Should this be a req? Unterminated quoted string.
break Loop
case '\'':
break Loop
}
}
s.emit(tokenFIELD)
return scanMain
}
// scanDoubleQuote scans a quoted string.
func scanDoubleQuote(s *scanner) stateFn {
Loop:
for {
switch s.next() {
case '\\':
if r := s.next(); r != eof {
break
}
fallthrough
case eof:
// Should this be a req? Unterminated quoted string.
break Loop
case '"':
break Loop
}
}
s.emit(tokenFIELD)
return scanMain
}
// isWhitespace returns true if ch is a space or a tab.
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t'
}
// isWord returns true if ch is character allowed in raw text.
func isWord(ch rune) bool {
return ch >= '!' && ch <= '~' && ch != ':'
}
func isIdent(input string) bool {
for _, r := range input {
if r != '_' && r != '-' && !unicode.IsLetter(r) && !unicode.IsDigit(r) {
return false
}
}
return true
}