diff --git a/CHANGELOG.md b/CHANGELOG.md index 14d3cebc..0650637a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ - Add new `\e` shorthand for the escape character. - Add \x00 notation to basic strings. - Seconds in Date-Time and Time values are now optional. -- Allow non-English scripts in unquoted (bare) keys - Clarify newline normalization in multi-line literal strings. ## 1.0.0 / 2021-01-11 diff --git a/toml.abnf b/toml.abnf index 0446f8b6..d788fb17 100644 --- a/toml.abnf +++ b/toml.abnf @@ -49,19 +49,7 @@ key = simple-key / dotted-key val = string / boolean / array / inline-table / date-time / float / integer simple-key = quoted-key / unquoted-key - -;; Unquoted key - -unquoted-key = 1*unquoted-key-char -unquoted-key-char = ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _ -unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions -unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block -unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon -unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ -unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics -unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces -unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) -unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) +unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ ;; Quoted and dotted key diff --git a/toml.md b/toml.md index 08cc9f67..320040c4 100644 --- a/toml.md +++ b/toml.md @@ -103,11 +103,9 @@ first = "Tom" last = "Preston-Werner" # INVALID A key may be either bare, quoted, or dotted. -**Bare keys** may contain any letter-like or number-like Unicode character from -any Unicode script, as well as ASCII digits, dashes and underscores. -Punctuation, spaces, arrows, box drawing and private use characters are not -allowed. Note that bare keys are allowed to be composed of only ASCII digits, -e.g. 1234, but are always interpreted as strings. +**Bare keys** may only contain ASCII letters, ASCII digits, underscores, and +dashes (`A-Za-z0-9_-`). Note that bare keys are allowed to be composed of only +ASCII digits, e.g. `1234`, but are always interpreted as strings. ℹ️ The exact ranges of allowed code points can be found in the [ABNF grammar file][abnf]. @@ -117,23 +115,18 @@ key = "value" bare_key = "value" bare-key = "value" 1234 = "value" -Fuß = "value" -😂 = "value" -汉语大字典 = "value" -辭源 = "value" -பெண்டிரேம் = "value" ``` **Quoted keys** follow the exact same rules as either basic strings or literal -strings and allow you to use any Unicode character in a key name, including -spaces. Best practice is to use bare keys except when absolutely necessary. +strings and allow you to use a much broader set of key names. Best practice is +to use bare keys except when absolutely necessary. ```toml "127.0.0.1" = "value" "character encoding" = "value" +"ʎǝʞ" = "value" +'key2' = "value" 'quoted "value"' = "value" -"╠═╣" = "value" -"⋰∫∬∭⋱" = "value" ``` A bare key must be non-empty, but an empty quoted key is allowed (though @@ -154,7 +147,6 @@ name = "Orange" physical.color = "orange" physical.shape = "round" site."google.com" = true -பெண்.டிரேம் = "we are women" ``` In JSON land, that would give you the following structure: @@ -168,9 +160,6 @@ In JSON land, that would give you the following structure: }, "site": { "google.com": true - }, - "பெண்": { - "டிரேம்": "we are women" } } ```