forked from toml-lang/toml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoml.abnf
258 lines (177 loc) · 8.21 KB
/
toml.abnf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
;; This document describes TOML's syntax, using the ABNF format (defined in
;; RFC 5234 -- https://www.ietf.org/rfc/rfc5234.txt).
;;
;; Although a TOML document must be valid UTF-8, this grammar refers to the
;; Unicode Code Points you get after you decode the UTF-8 input.
;;
;; All valid TOML documents will match this description, however certain
;; invalid documents would need to be rejected as per the semantics described
;; in the supporting text description.
;; It is possible to try this grammar interactively, using instaparse.
;; http://instaparse.mojombo.com/
;;
;; To do so, in the lower right, click on Options and change `:input-format` to
;; ':abnf'. Then paste this entire ABNF document into the grammar entry box
;; (above the options). Then you can type or paste a sample TOML document into
;; the beige box on the left. Tada!
;; Overall Structure
toml = expression *( newline expression )
expression = ws [ comment ]
expression =/ ws keyval ws [ comment ]
expression =/ ws table ws [ comment ]
;; Whitespace
ws = *wschar
wschar = %x20 ; Space
wschar =/ %x09 ; Horizontal tab
;; Newline
newline = %x0A ; LF
newline =/ %x0D.0A ; CRLF
;; Comment
comment = comment-start-symbol *allowed-comment-char
comment-start-symbol = %x23 ; #
allowed-comment-char = %x01-09 / %x0E-7F / non-ascii
non-ascii = %x80-D7FF / %xE000-10FFFF
;; Key-Value pairs
keyval = key keyval-sep val
key = simple-key / dotted-key
val = string / boolean / array / inline-table / date-time / float / integer
simple-key = quoted-key / unquoted-key
;; Unquoted key
unquoted-key = 1*unquoted-key-char
unquoted-key-char = ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _
unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions
unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block
unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon
unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ
unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics
unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces
unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode)
unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF)
;; Quoted and dotted key
quoted-key = basic-string / literal-string
dotted-key = simple-key 1*( dot-sep simple-key )
dot-sep = ws %x2E ws ; . Period
keyval-sep = ws %x3D ws ; =
;; String
string = ml-basic-string / basic-string / ml-literal-string / literal-string
;; Basic String
basic-string = quotation-mark *basic-char quotation-mark
quotation-mark = %x22 ; "
basic-char = basic-unescaped / escaped
basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
escaped = escape escape-seq-char
escape = %x5C ; \
escape-seq-char = %x22 ; " quotation mark U+0022
escape-seq-char =/ %x5C ; \ reverse solidus U+005C
escape-seq-char =/ %x62 ; b backspace U+0008
escape-seq-char =/ %x65 ; e escape U+001B
escape-seq-char =/ %x66 ; f form feed U+000C
escape-seq-char =/ %x6E ; n line feed U+000A
escape-seq-char =/ %x72 ; r carriage return U+000D
escape-seq-char =/ %x74 ; t tab U+0009
escape-seq-char =/ %x78 2HEXDIG ; xHH U+00HH
escape-seq-char =/ %x75 4HEXDIG ; uHHHH U+HHHH
escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH U+HHHHHHHH
;; Multiline Basic String
ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
ml-basic-string-delim
ml-basic-string-delim = 3quotation-mark
ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
mlb-content = basic-char / newline / mlb-escaped-nl
mlb-quotes = 1*2quotation-mark
mlb-escaped-nl = escape ws newline *( wschar / newline )
;; Literal String
literal-string = apostrophe *literal-char apostrophe
apostrophe = %x27 ; ' apostrophe
literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
;; Multiline Literal String
ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
ml-literal-string-delim
ml-literal-string-delim = 3apostrophe
ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
mll-content = literal-char / newline
mll-quotes = 1*2apostrophe
;; Integer
integer = dec-int / hex-int / oct-int / bin-int
minus = %x2D ; -
plus = %x2B ; +
underscore = %x5F ; _
digit1-9 = %x31-39 ; 1-9
digit0-7 = %x30-37 ; 0-7
digit0-1 = %x30-31 ; 0-1
hex-prefix = %x30.78 ; 0x
oct-prefix = %x30.6F ; 0o
bin-prefix = %x30.62 ; 0b
dec-int = [ minus / plus ] unsigned-dec-int
unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT )
hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG )
oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 )
bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 )
;; Float
float = float-int-part ( exp / frac [ exp ] )
float =/ special-float
float-int-part = dec-int
frac = decimal-point zero-prefixable-int
decimal-point = %x2E ; .
zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
exp = "e" float-exp-part
float-exp-part = [ minus / plus ] zero-prefixable-int
special-float = [ minus / plus ] ( inf / nan )
inf = %x69.6E.66 ; inf
nan = %x6E.61.6E ; nan
;; Boolean
boolean = true / false
true = %x74.72.75.65 ; true
false = %x66.61.6C.73.65 ; false
;; Date and Time (as defined in RFC 3339)
date-time = offset-date-time / local-date-time / local-date / local-time
date-fullyear = 4DIGIT
date-month = 2DIGIT ; 01-12
date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year
time-delim = "T" / %x20 ; T, t, or space
time-hour = 2DIGIT ; 00-23
time-minute = 2DIGIT ; 00-59
time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules
time-secfrac = "." 1*DIGIT
time-numoffset = ( "+" / "-" ) time-hour ":" time-minute
time-offset = "Z" / time-numoffset
partial-time = time-hour ":" time-minute [ ":" time-second [ time-secfrac ] ]
full-date = date-fullyear "-" date-month "-" date-mday
full-time = partial-time time-offset
;; Offset Date-Time
offset-date-time = full-date time-delim full-time
;; Local Date-Time
local-date-time = full-date time-delim partial-time
;; Local Date
local-date = full-date
;; Local Time
local-time = partial-time
;; Array
array = array-open [ array-values ] ws-comment-newline array-close
array-open = %x5B ; [
array-close = %x5D ; ]
array-values = ws-comment-newline val ws-comment-newline array-sep array-values
array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
array-sep = %x2C ; , Comma
ws-comment-newline = *( wschar / [ comment ] newline )
;; Table
table = std-table / array-table
;; Standard Table
std-table = std-table-open key std-table-close
std-table-open = %x5B ws ; [ Left square bracket
std-table-close = ws %x5D ; ] Right square bracket
;; Inline Table
inline-table = inline-table-open [ inline-table-keyvals ] ws-comment-newline inline-table-close
inline-table-open = %x7B ; {
inline-table-close = %x7D ; }
inline-table-sep = %x2C ; , Comma
inline-table-keyvals = ws-comment-newline keyval ws-comment-newline inline-table-sep inline-table-keyvals
inline-table-keyvals =/ ws-comment-newline keyval ws-comment-newline [ inline-table-sep ]
;; Array Table
array-table = array-table-open key array-table-close
array-table-open = %x5B.5B ws ; [[ Double left square bracket
array-table-close = ws %x5D.5D ; ]] Double right square bracket
;; Built-in ABNF terms, reproduced here for clarity
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
DIGIT = %x30-39 ; 0-9
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"