-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
sanitize.go
294 lines (251 loc) · 11.4 KB
/
sanitize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
/*
Package sanitize (go-sanitize) implements a simple library of various sanitation methods for data transformation.
If you have any suggestions or comments, please feel free to open an issue on this project's GitHub page.
*/
package sanitize
import (
"net"
"net/url"
"regexp"
"strings"
"unicode"
)
// Set all the regular expressions
var (
alphaNumericRegExp = regexp.MustCompile(`[^a-zA-Z0-9]`) // Alpha numeric
alphaNumericWithSpacesRegExp = regexp.MustCompile(`[^a-zA-Z0-9\s]`) // Alphanumeric (with spaces)
alphaRegExp = regexp.MustCompile(`[^a-zA-Z]`) // Alpha characters
alphaWithSpacesRegExp = regexp.MustCompile(`[^a-zA-Z\s]`) // Alpha characters (with spaces)
bitcoinCashAddrRegExp = regexp.MustCompile(`[^ac-hj-np-zAC-HJ-NP-Z02-9]`) // Bitcoin `cashaddr` address accepted characters
bitcoinRegExp = regexp.MustCompile(`[^a-km-zA-HJ-NP-Z1-9]`) // Bitcoin address accepted characters
decimalRegExp = regexp.MustCompile(`[^0-9.-]`) // Decimals (positive and negative)
domainRegExp = regexp.MustCompile(`[^a-zA-Z0-9-.]`) // Domain accepted characters
emailRegExp = regexp.MustCompile(`[^a-zA-Z0-9-_.@+]`) // Email address characters
formalNameRegExp = regexp.MustCompile(`[^a-zA-Z0-9-',.\s]`) // Characters recognized in surnames and proper names
htmlRegExp = regexp.MustCompile(`(?i)<[^>]*>`) // HTML/XML tags or any alligator open/close tags
ipAddressRegExp = regexp.MustCompile(`[^a-zA-Z0-9:.]`) // IPV4 and IPV6 characters only
numericRegExp = regexp.MustCompile(`[^0-9]`) // Numbers only
pathNameRegExp = regexp.MustCompile(`[^a-zA-Z0-9-_]`) // Path name (file name, seo)
punctuationRegExp = regexp.MustCompile(`[^a-zA-Z0-9-'"#&!?,.\s]+`) // Standard accepted punctuation characters
scientificNotationRegExp = regexp.MustCompile(`[^0-9.eE+-]`) // Scientific Notation (float) (positive and negative)
scriptRegExp = regexp.MustCompile(`(?i)<(script|iframe|embed|object)[^>]*>.*</(script|iframe|embed|object)>`) // Scripts and embeds
singleLineRegExp = regexp.MustCompile(`(\r)|(\n)|(\t)|(\v)|(\f)`) // Carriage returns, line feeds, tabs, for single line transition
timeRegExp = regexp.MustCompile(`[^0-9:]`) // Time allowed characters
uriRegExp = regexp.MustCompile(`[^a-zA-Z0-9-_/?&=#%]`) // URI allowed characters
urlRegExp = regexp.MustCompile(`[^a-zA-Z0-9-_/:.,?&@=#%]`) // URL allowed characters
wwwRegExp = regexp.MustCompile(`(?i)www.`) // For removing www
)
// emptySpace is an empty space for replacing
var emptySpace = []byte("")
// Alpha returns only alpha characters. Set the parameter spaces to true if you
// want to allow space characters. Valid characters are a-z and A-Z.
//
// View examples: sanitize_test.go
func Alpha(original string, spaces bool) string {
// Leave white spaces?
if spaces {
return string(alphaWithSpacesRegExp.ReplaceAll([]byte(original), emptySpace))
}
// No spaces
return string(alphaRegExp.ReplaceAll([]byte(original), emptySpace))
}
// AlphaNumeric returns only alphanumeric characters. Set the parameter spaces to true
// if you want to allow space characters. Valid characters are a-z, A-Z and 0-9.
//
// View examples: sanitize_test.go
func AlphaNumeric(original string, spaces bool) string {
// Leave white spaces?
if spaces {
return string(alphaNumericWithSpacesRegExp.ReplaceAll([]byte(original), emptySpace))
}
// No spaces
return string(alphaNumericRegExp.ReplaceAll([]byte(original), emptySpace))
}
// BitcoinAddress returns sanitized value for bitcoin address
//
// View examples: sanitize_test.go
func BitcoinAddress(original string) string {
return string(bitcoinRegExp.ReplaceAll([]byte(original), emptySpace))
}
// BitcoinCashAddress returns sanitized value for bitcoin `cashaddr`
// address (https://www.bitcoinabc.org/2018-01-14-CashAddr/)
//
// View examples: sanitize_test.go
func BitcoinCashAddress(original string) string {
return string(bitcoinCashAddrRegExp.ReplaceAll([]byte(original), emptySpace))
}
// Custom uses a custom regex string and returns the sanitized result.
// This is used for any additional regex that this package does not contain.
//
// View examples: sanitize_test.go
func Custom(original string, regExp string) string {
// Return the processed string or panic if regex fails
return string(regexp.MustCompile(regExp).ReplaceAll([]byte(original), emptySpace))
}
// Decimal returns sanitized decimal/float values in either positive or negative.
//
// View examples: sanitize_test.go
func Decimal(original string) string {
return string(decimalRegExp.ReplaceAll([]byte(original), emptySpace))
}
// Domain returns a proper hostname / domain name. Preserve case is to flag keeping the case
// versus forcing to lowercase. Use the removeWww flag to strip the www sub-domain.
// This method returns an error if parse critically fails.
//
// View examples: sanitize_test.go
func Domain(original string, preserveCase bool, removeWww bool) (string, error) {
// Try to see if we have a host
if len(original) == 0 {
return original, nil
}
// Missing http?
if !strings.Contains(original, "http") {
original = "http://" + strings.TrimSpace(original)
}
// Try to parse the url
u, err := url.Parse(original)
if err != nil {
return original, err
}
// Remove leading www.
if removeWww {
u.Host = wwwRegExp.ReplaceAllString(u.Host, "")
}
// Keeps the exact case of the original input string
if preserveCase {
return string(domainRegExp.ReplaceAll([]byte(u.Host), emptySpace)), nil
}
// Generally all domains should be uniform and lowercase
return string(domainRegExp.ReplaceAll([]byte(strings.ToLower(u.Host)), emptySpace)), nil
}
// Email returns a sanitized email address string. Email addresses are forced
// to lowercase and removes any mail-to prefixes.
//
// View examples: sanitize_test.go
func Email(original string, preserveCase bool) string {
// Leave the email address in its original case
if preserveCase {
return string(emailRegExp.ReplaceAll(
[]byte(strings.Replace(original, "mailto:", "", -1)), emptySpace),
)
}
// Standard is forced to lowercase
return string(emailRegExp.ReplaceAll(
[]byte(strings.ToLower(strings.Replace(original, "mailto:", "", -1))), emptySpace),
)
}
// FirstToUpper overwrites the first letter as an uppercase letter
// and preserves the rest of the string.
//
// View examples: sanitize_test.go
func FirstToUpper(original string) string {
// Handle empty and 1 character strings
if len(original) < 2 {
return strings.ToUpper(original)
}
runes := []rune(original)
runes[0] = unicode.ToUpper(runes[0])
return string(runes)
}
// FormalName returns a formal name or surname (for First, Middle and Last)
//
// View examples: sanitize_test.go
func FormalName(original string) string {
return string(formalNameRegExp.ReplaceAll([]byte(original), emptySpace))
}
// HTML returns a string without any <HTML> tags.
//
// View examples: sanitize_test.go
func HTML(original string) string {
return string(htmlRegExp.ReplaceAll([]byte(original), emptySpace))
}
// IPAddress returns an ip address for both ipv4 and ipv6 formats.
//
// View examples: sanitize_test.go
func IPAddress(original string) string {
// Parse the IP - Remove any invalid characters first
ipAddress := net.ParseIP(
string(ipAddressRegExp.ReplaceAll([]byte(original), emptySpace)),
)
if ipAddress == nil {
return ""
}
return ipAddress.String()
}
// Numeric returns numbers only.
//
// View examples: sanitize_test.go
func Numeric(original string) string {
return string(numericRegExp.ReplaceAll([]byte(original), emptySpace))
}
// PathName returns a formatted path compliant name.
//
// View examples: sanitize_test.go
func PathName(original string) string {
return string(pathNameRegExp.ReplaceAll([]byte(original), emptySpace))
}
// Punctuation returns a string with basic punctuation preserved.
//
// View examples: sanitize_test.go
func Punctuation(original string) string {
return string(punctuationRegExp.ReplaceAll([]byte(original), emptySpace))
}
// ScientificNotation returns sanitized decimal/float values in either positive or negative.
//
// View examples: sanitize_test.go
func ScientificNotation(original string) string {
return string(scientificNotationRegExp.ReplaceAll([]byte(original), emptySpace))
}
// Scripts removes all scripts, iframes and embeds tags from string.
//
// View examples: sanitize_test.go
func Scripts(original string) string {
return string(scriptRegExp.ReplaceAll([]byte(original), emptySpace))
}
// SingleLine returns a single line string, removes all carriage returns.
//
// View examples: sanitize_test.go
func SingleLine(original string) string {
return singleLineRegExp.ReplaceAllString(original, " ")
}
// Time returns just the time part of the string.
//
// View examples: sanitize_test.go
func Time(original string) string {
return string(timeRegExp.ReplaceAll([]byte(original), emptySpace))
}
// URI returns allowed URI characters only.
//
// View examples: sanitize_test.go
func URI(original string) string {
return string(uriRegExp.ReplaceAll([]byte(original), emptySpace))
}
// URL returns a formatted url friendly string.
//
// View examples: sanitize_test.go
func URL(original string) string {
return string(urlRegExp.ReplaceAll([]byte(original), emptySpace))
}
// XML returns a string without any <XML> tags - alias of HTML.
//
// View examples: sanitize_test.go
func XML(original string) string {
return HTML(original)
}
// XSS removes known XSS attack strings or script strings.
//
// View examples: sanitize_test.go
func XSS(original string) string {
original = strings.Replace(original, "<script", "", -1)
original = strings.Replace(original, "script>", "", -1)
original = strings.Replace(original, "eval(", "", -1)
original = strings.Replace(original, "eval(", "", -1)
original = strings.Replace(original, "javascript:", "", -1)
original = strings.Replace(original, "javascript:", "", -1)
original = strings.Replace(original, "fromCharCode", "", -1)
original = strings.Replace(original, ">", "", -1)
original = strings.Replace(original, "<", "", -1)
original = strings.Replace(original, "<", "", -1)
original = strings.Replace(original, "&rt;", "", -1)
return original
}