-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathparser.go
189 lines (159 loc) · 4.1 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright (c) Liam Stanley <me@liamstanley.io>. All rights reserved. Use
// of this source code is governed by the MIT license that can be found in
// the LICENSE file.
package queryparser
import (
"strings"
"unicode"
)
// Options allow the adjustment of allowed filters and characters.
type Options struct {
// CutFn allows excluding specific characters from being allowed within
// the filter fields. When the function returns true on a rune, it will
// be excluded from the filter field.
CutFn func(rune) bool
// Allowed is a slice of allowed filter names. If no allowed filter names
// are provided, all are considered allowed.
Allowed []string
}
// Parser represents a parser.
type Parser struct {
s *scanner
opt *Options
buf []tokenRef
}
// New returns a new instance of Parser. Make sure Parser.Parser() is called
// or this will leak goroutines.
func New(query string, opt Options) *Parser {
return &Parser{s: newScanner(query), opt: &opt}
}
// Parse is a higher level helper method to return a query from a query string.
func Parse(query string) *Query {
return New(query, Options{CutFn: DefaultCut}).Parse()
}
// scan returns the next token from the underlying scanner.
// If a token has been unscanned then read that instead.
func (p *Parser) scan() (tr tokenRef) {
// If we have a token on the buffer, then return it.
if len(p.buf) > 0 {
// Pop first item off the buffer.
tr = p.buf[0]
copy(p.buf, p.buf[1:])
p.buf = p.buf[:len(p.buf)-1]
return tr
}
// Otherwise read the next token from the scanner.
tr = p.s.nextToken()
return tr
}
// unscan pushes provided token/literal back onto the buffer.
func (p *Parser) unscan(tr tokenRef) {
p.buf = append(p.buf, tr)
}
// accept scans if the provided token matches, otherwise unscans.
func (p *Parser) accept(tok token) bool {
tr := p.scan()
if tr.tok == tok {
return true
}
p.unscan(tr)
return false
}
// Parse parses the input query and returns a new instance of Query if there
// were no errors.
func (p *Parser) Parse() *Query {
defer p.s.drain()
qp := &Query{Filters: make(map[string][]string)}
for {
tr := p.scan()
switch tr.tok {
case tokenIDENT:
p.scanField(tr, qp)
case tokenEOF:
if p.opt.CutFn != nil {
qp.Raw = cutsetFunc(qp.Raw, p.opt.CutFn)
qp.Raw = stripDuplicateWS(qp.Raw)
}
return qp
default:
qp.Raw += tr.lit
}
}
}
func (p *Parser) scanField(ident tokenRef, qp *Query) {
if !isIdent(ident.lit) {
qp.Raw += ident.lit
return
}
// Return early if it's not allowed.
if p.opt.Allowed != nil && len(p.opt.Allowed) > 0 {
var in bool
for i := 0; i < len(p.opt.Allowed); i++ {
if strings.EqualFold(p.opt.Allowed[i], ident.lit) {
in = true
break
}
}
if !in {
qp.Raw += ident.lit
return
}
}
delim := p.scan()
if delim.tok != tokenDELIM {
qp.Raw += ident.lit
p.unscan(delim)
return
}
// Chomp all trailing fields.
var fields []tokenRef
var count int
for {
field := p.scan()
count++
if field.tok == tokenFIELD || field.tok == tokenIDENT {
fields = append(fields, field)
continue
}
if count == 1 {
qp.Raw += ident.lit
p.unscan(delim)
p.unscan(field)
return
}
p.unscan(field)
break
}
// Chomp trailing whitespaces if there are any.
_ = p.accept(tokenWS)
var fieldText string
for i := 0; i < len(fields); i++ {
fieldText += fields[i].lit
}
if p.opt.CutFn != nil {
qp.Add(ident.lit, cutsetFunc(fieldText, p.opt.CutFn))
return
}
qp.Add(ident.lit, fieldText)
}
func cutsetFunc(input string, cutFn func(rune) bool) (out string) {
for _, c := range input {
if !cutFn(c) {
out += string(c)
}
}
return out
}
// DefaultCut is the default cut function, which allowed stripping out potentially
// unwanted characters from filter fields and raw text. Only allows
// " _,-.:A-Za-z0-9" (or unicode equivalents).
func DefaultCut(r rune) (strip bool) {
return !unicode.IsLetter(r) && !unicode.IsNumber(r) && r != ' ' && r != '\t' &&
r != '_' && r != ',' && r != '-' && r != '.' && r != ':'
}
func stripDuplicateWS(val string) string {
for strings.Contains(val, " ") {
val = strings.ReplaceAll(val, " ", " ")
}
return val
}