From 3c182ffaccb615a787e89eeb2847013dd62e36f5 Mon Sep 17 00:00:00 2001
From: KristofferC <kcarlsson89@gmail.com>
Date: Sun, 24 May 2020 22:44:41 +0200
Subject: [PATCH] add a TOML parser to Base

---
 base/Base.jl        |    5 +
 base/toml_parser.jl | 1098 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1103 insertions(+)
 create mode 100644 base/toml_parser.jl

diff --git a/base/Base.jl b/base/Base.jl
index 9c1cbe735e4fd..3de3d88d40582 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -331,6 +331,11 @@ include("initdefs.jl")
 # worker threads
 include("threadcall.jl")
 
+module TOML
+const HAVE_DATES = false
+include("toml_parser.jl")
+end # module
+
 # code loading
 include("uuid.jl")
 include("loading.jl")
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
new file mode 100644
index 0000000000000..f881aa0b2f3a0
--- /dev/null
+++ b/base/toml_parser.jl
@@ -0,0 +1,1098 @@
+using Base: IdSet
+
+if !@isdefined HAVE_DATES
+    error("need to set the variable `HAVE_DATES` to determine if the Dates stdlib is available")
+end
+
+if HAVE_DATES
+    using Dates
+else
+    # In case we do not have the Dates stdlib available
+    # we parse DateTime into these internal structs,
+    # note that these do not do any argument checking
+    struct Date
+        year::Int
+        month::Int
+        day::Int
+    end
+    struct Time
+        hour::Int
+        minute::Int
+        second::Int
+        ms::Int
+    end
+    struct DateTime
+        date::Date
+        time::Time
+    end
+    DateTime(y, m, d, h, mi, s, ms) =
+        DateTime(Date(y,m,d), Time(h, mi, s, ms))
+end
+
+const EOF_CHAR = typemax(Char)
+
+const TOMLDict  = Dict{String, Any}
+const TOMLArray = Vector{Any}
+
+
+##########
+# Parser #
+##########
+
+mutable struct Parser
+    str::String
+    # 1 character look ahead
+    current_char::Char
+    pos::Int
+    # prevpos equals the startbyte of the look ahead character
+    # prevpos-1 is therefore the end byte of the character we last ate
+    prevpos::Int
+
+    # File info
+    column::Int
+    line::Int
+
+    # The function `take_substring` takes the substring from `marker` up
+    # to `prevpos-1`.
+    marker::Int
+
+    # The current table that `key = value` entries are inserted into
+    active_table::TOMLDict
+
+    # As we parse dotted keys we store each part of the key in this cache
+    # A future improvement would be to also store the spans of the keys
+    # so that in error messages we could also show the previous key
+    # definition in case of duplicated keys
+    dotted_keys::Vector{String}
+
+    # Strings in TOML can have line continuations ('\' as the last character
+    # on a line. We store the byte ranges for each of these "chunks" in here
+    chunks::Vector{UnitRange{Int}}
+
+    # We need to keep track of those tables / arrays that are defined
+    # inline since we are not allowed to add keys to those
+    inline_tables::IdSet{TOMLDict}
+    static_arrays::IdSet{TOMLArray}
+
+    # [a.b.c.d] doesn't "define" the table [a]
+    # so keys can later be added to [a], therefore
+    # we need to keep track of what tables are
+    # actualyl "defined
+    defined_tables::IdSet{TOMLDict}
+
+    # The table we will finally return to the user
+    root::TOMLDict
+
+    # Filled in in case we are parsing a file to improve error messages
+    filepath::Union{String, Nothing}
+end
+
+function Parser(str::String; filepath=nothing)
+    root = TOMLDict()
+    # Can haz name initialization?
+    l = Parser(
+            str,                  # str
+            EOF_CHAR,             # current_char
+            firstindex(str),      # pos
+            0,                    # prevpos
+            0,                    # column
+            1,                    # line
+            0,                    # marker
+            root,                 # active_table
+            String[],             # dotted_keys
+            UnitRange{Int}[],     # chunks
+            IdSet{TOMLDict}(),    # inline_tables
+            IdSet{TOMLArray}(),   # static_arrays
+            IdSet{TOMLDict}(),    # defined_tables
+            root,
+            filepath,
+           )
+    startup(l)
+    return l
+end
+function startup(l::Parser)
+    # Populate our one character look-ahead
+    c = eat_char(l)
+    # Skip BOM
+    if c === '\ufeff'
+        l.column -= 1
+        eat_char(l)
+    end
+end
+
+Parser() = Parser("")
+Parser(io::IO) = Parser(read(io, String))
+
+function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
+    p.str = str
+    p.current_char = EOF_CHAR
+    p.pos = firstindex(str)
+    p.prevpos = 0
+    p.column = 0
+    p.line = 1
+    p.marker = 0
+    p.root = TOMLDict()
+    p.active_table = p.root
+    empty!(p.dotted_keys)
+    empty!(p.chunks)
+    empty!(p.inline_tables)
+    empty!(p.static_arrays)
+    empty!(p.defined_tables)
+    p.filepath = filepath
+    startup(p)
+end
+
+##########
+# Errors #
+##########
+
+throw_internal_error(msg) = error("internal TOML parser error: $msg")
+
+# Many functions return a ParserError. We want this to bubble up
+# all the way and have this error be returned to the user
+# if the parse is called with `raise=false`. This macro
+# makes that easier
+@eval macro $(Symbol("try"))(expr)
+    :(
+        v = $(esc(expr));
+        v isa $ParserError && return v;
+        v;
+    )
+end
+
+# TODO: Check all of these are used
+@enum ErrorType begin
+
+    # Toplevl #
+    ###########
+    ErrRedefineTableArray
+    ErrExpectedNewLineKeyValue
+    ErrAddKeyToInlineTable
+    ErrAddArrayToStaticArray
+    ErrArrayTreatedAsDictionary
+    ErrExpectedEndOfTable
+    ErrExpectedEndArrayOfTable
+
+    # Keys #
+    ########
+    ErrExpectedEqualAfterKey
+    # Check, are these the same?
+    ErrDuplicatedKey
+    ErrKeyAlreadyHasValue
+    ErrInvalidBareKeyCharacter
+    ErrEmptyBareKey
+
+    # Values #
+    ##########
+    ErrUnexpectedStartOfValue
+    ErrGenericValueError
+
+    # Arrays
+    ErrExpectedCommaBetweenItemsArray
+
+    # Inline tables
+    ErrExpectedCommaBetweenItemsInlineTable
+    ErrTrailingCommaInlineTable
+
+    # Numbers
+    ErrUnderscoreNotSurroundedByDigits
+    ErrLeadingZeroNotAllowedInteger
+    ErrOverflowError
+    ErrIntegerParsingError
+    ErrFloatParsingError
+    ErrLeadingDot
+    ErrNoTrailingDigitAfterDot
+    ErrTrailingUnderscoreNumber
+
+    # DateTime
+    ErrParsingDateTime
+    ErrOffsetDateNotSupported
+
+    # Strings
+    ErrNewLineInString
+    ErrUnexpectedEndString
+    ErrInvalidEscapeCharacter
+    ErrInvalidUnicodeScalar
+
+end
+
+
+const err_message = Dict(
+    ErrTrailingCommaInlineTable             => "trailing comma not allowed in inline table",
+    ErrExpectedCommaBetweenItemsArray       => "expected comma between items in array",
+    ErrExpectedCommaBetweenItemsInlineTable => "expected comma between items in inline table",
+    ErrExpectedEndArrayOfTable              => "expected array of table to end with ']]'",
+    ErrInvalidBareKeyCharacter              => "invalid bare key character",
+    ErrRedefineTableArray                   => "tried to redefine an existing table as an array",
+    ErrDuplicatedKey                        => "key already defined",
+    ErrKeyAlreadyHasValue                   => "key already has a value",
+    ErrEmptyBareKey                         => "bare key cannot be empty",
+    ErrExpectedNewLineKeyValue              => "expected newline after key value pair",
+    ErrNewLineInString                      => "newline character in single quoted string",
+    ErrUnexpectedEndString                  => "string literal ened unexpectedly",
+    ErrExpectedEndOfTable                   => "expected end of table ']'",
+    ErrAddKeyToInlineTable                  => "tried to add a new key to an inline table",
+    ErrArrayTreatedAsDictionary             => "tried to add a key to an array",
+    ErrAddArrayToStaticArray                => "tried to append to a statically defined array",
+    ErrGenericValueError                    => "failed to parse value",
+    ErrLeadingZeroNotAllowedInteger         => "leading zero in integer not allowed",
+    ErrUnderscoreNotSurroundedByDigits      => "underscore is not surrounded by digits",
+    ErrUnexpectedStartOfValue               => "unexpected start of value",
+    ErrOffsetDateNotSupported               => "offset date-time is not supported",
+    ErrParsingDateTime                      => "parsing date/time value failed",
+    ErrTrailingUnderscoreNumber             => "trailing underscore in number",
+    ErrLeadingDot                           => "floats require a leading zero",
+    ErrExpectedEqualAfterKey                => "expected equal sign after key",
+    ErrNoTrailingDigitAfterDot              => "expected digit after dot",
+    ErrOverflowError                        => "overflowed when parsing integer",
+)
+
+mutable struct ParserError <: Exception
+    type::ErrorType
+
+    # Arbitrary data to store at the
+    # call site to be used when formatting
+    # the error
+    data
+
+    # These are filled in before returning from parse function
+    str       ::Union{String,   Nothing}
+    filepath  ::Union{String,   Nothing}
+    line      ::Union{Int,      Nothing}
+    column    ::Union{Int,      Nothing}
+    pos       ::Union{Int,      Nothing} # position of parser when
+    table     ::Union{TOMLDict, Nothing} # result parsed until error
+end
+ParserError(type, data) = ParserError(type, data, nothing, nothing, nothing, nothing, nothing, nothing)
+ParserError(type) = ParserError(type, nothing)
+# Defining these below can be useful when debugging code that erroneously returns a
+# ParserError because you get a stacktrace to where the ParserError was created
+#ParserError(type) = error(type)
+#ParserError(type, data) = error(type,data)
+
+# Many functions return either a T or a ParserError
+const Err{T} = Union{T, ParserError}
+
+function format_error_message_for_err_type(error::ParserError)
+    msg = err_message[error.type]
+    if error.type == ErrInvalidBareKeyCharacter
+        c_escaped = escape_string(string(error.data))
+        msg *= ": '$c_escaped'"
+    end
+    return msg
+end
+
+# This is used in error formatting, for example,
+# point_to_line("aa\nfoobar\n\bb", 4, 6) would return the strings:
+# str1 = "foobar"
+# str2 = "^^^"
+# used to show the interval where an error happened
+function point_to_line(str::AbstractString, a::Int, b::Int, context)
+    @assert b >= a
+    a = thisind(str, a)
+    b = thisind(str, b)
+    pos = something(findprev('\n', str, prevind(str, a)), 0) + 1
+    io1 = IOContext(IOBuffer(), context)
+    io2 = IOContext(IOBuffer(), context)
+    while true
+        if a <= pos <= b
+            printstyled(io2, "^"; color=:light_green)
+        else
+            print(io2, " ")
+        end
+        it = iterate(str, pos)
+        it === nothing && break
+        c, pos = it
+        c == '\n' && break
+        print(io1, c)
+    end
+    return String(take!(io1.io)), String(take!(io2.io))
+end
+
+function Base.showerror(io::IO, err::ParserError)
+    printstyled(io, "TOML Parser error:\n"; color=Base.error_color())
+    f = something(err.filepath, "none")
+    printstyled(io, f, ':', err.line, ':', err.column; bold=true)
+    printstyled(io, " error: "; color=Base.error_color())
+    println(io, format_error_message_for_err_type(err))
+
+    str1, err1 = point_to_line(err.str, err.pos, err.pos, io)
+    # See https://github.com/JuliaLang/julia/issues/36015
+    format_fixer = get(io, :color, false) == true ? "\e[0m" : ""
+    println(io, "$format_fixer  ", str1)
+    print(io, "$format_fixer  ", err1)
+end
+
+
+################
+# Parser utils #
+################
+
+@inline function next_char(l::Parser)::Char
+    state = iterate(l.str, l.pos)
+    l.prevpos = l.pos
+    state === nothing && return EOF_CHAR
+    c, pos = state
+    l.pos = pos
+    l.column += 1
+    if c == '\n'
+        l.line += 1
+        l.column = 0
+    end
+    return c
+end
+
+@inline function eat_char(l::Parser)::Char
+    c = l.current_char
+    l.current_char = next_char(l)
+    return c
+end
+
+@inline peek(l::Parser) = l.current_char
+
+# Return true if the character was accepted. When a character
+# is accepted it get's eaten and we move to the next character
+@inline function accept(l::Parser, f::Union{Function, Char})::Bool
+    c = peek(l)
+    c == EOF_CHAR && return false
+    ok = false
+    if isa(f, Function)
+        ok = f(c)
+    elseif isa(f, Char)
+        ok = c === f
+    end
+    ok && eat_char(l)
+    return ok
+end
+
+# Return true if any character was accepted
+function accept_batch(l::Parser, f::F)::Bool where {F}
+    ok = false
+    while accept(l, f)
+        ok = true
+    end
+    return ok
+end
+
+# Return true if `f` was accepted `n` times
+@inline function accept_n(l::Parser, n, f::F)::Bool where {F}
+    for i in 1:n
+        if !accept(l, f)
+            return false
+        end
+    end
+    return true
+end
+
+@inline iswhitespace(c::Char) = c == ' ' || c == '\t'
+@inline isnewline(c::Char) = c == '\n' || c == '\r'
+
+skip_ws(l::Parser) = accept_batch(l, iswhitespace)
+
+skip_ws_nl_no_comment(l::Parser)::Bool = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
+
+function skip_ws_nl(l::Parser)::Bool
+    skipped = false
+    while true
+        skipped_ws = accept_batch(l, x -> iswhitespace(x) || isnewline(x))
+        skipped_comment = skip_comment(l)
+        if !skipped_ws && !skipped_comment
+            break
+        end
+        skipped = true
+    end
+    return skipped
+end
+
+# Returns true if a comment was skipped
+function skip_comment(l::Parser)::Bool
+    found_comment = accept(l, '#')
+    if found_comment
+        accept_batch(l, !isnewline)
+    end
+    return found_comment
+end
+
+skip_ws_comment(l::Parser) = skip_ws(l) && skip_comment(l)
+
+@inline set_marker!(l::Parser) = l.marker = l.prevpos
+take_substring(l::Parser) = SubString(l.str, l.marker:(l.prevpos-1))
+
+############
+# Toplevel #
+############
+
+# Driver, keeps parsing toplevel until we either get
+# a `ParserError` or eof.
+function parse(l::Parser; raise=false)::Err{TOMLDict}
+    while true
+        skip_ws_nl(l)
+        peek(l) == EOF_CHAR && break
+        v = parse_toplevel(l)
+        if v isa ParserError
+            v.str      = l.str
+            v.pos      = l.prevpos-1
+            v.table    = l.root
+            v.filepath = l.filepath
+            v.line     = l.line
+            v.column   = l.column-1
+            raise ? throw(v) : return v
+        end
+    end
+    return l.root
+end
+
+# Top level can be either a table key, an array of table statement
+# or a key/value entry.
+function parse_toplevel(l::Parser)::Err{Nothing}
+    if accept(l, '[')
+        l.active_table = l.root
+        @try parse_table(l)
+        skip_ws_comment(l)
+        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == EOF_CHAR)
+            # TODO: Not really KeyValue error
+            return ParserError(ErrExpectedNewLineKeyValue)
+        end
+    else
+        @try parse_entry(l, l.active_table)
+        skip_ws_comment(l)
+        # SPEC: "There must be a newline (or EOF) after a key/value pair."
+        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == EOF_CHAR)
+            return ParserError(ErrExpectedNewLineKeyValue)
+        end
+    end
+end
+
+function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, check=true)::Err{TOMLDict}
+    for i in 1:length(dotted_keys)
+        key = dotted_keys[i]
+        d = get!(() -> TOMLDict(), d, key)
+        if d isa TOMLArray
+            d = d[end]
+        end
+        check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
+    end
+    return d
+end
+
+function check_allowed_add_key(l::Parser, d, check_defined=true)::Err{Nothing}
+    if !(d isa Dict)
+        return ParserError(ErrKeyAlreadyHasValue)
+    elseif d isa Dict && d in l.inline_tables
+        return ParserError(ErrAddKeyToInlineTable)
+    elseif check_defined && d in l.defined_tables
+        return ParserError(ErrDuplicatedKey)
+    end
+    return nothing
+end
+
+# Can only enter here from toplevel
+function parse_table(l)
+    if accept(l, '[')
+        return parse_array_table(l)
+    end
+    table_key = @try parse_key(l)
+    skip_ws(l)
+    if !accept(l, ']')
+        return ParserError(ErrExpectedEndOfTable)
+    end
+    l.active_table = @try recurse_dict!(l, l.root, table_key)
+    push!(l.defined_tables, l.active_table)
+    return
+end
+
+function parse_array_table(l)::Union{Nothing, ParserError}
+    table_key = @try parse_key(l)
+    skip_ws(l)
+    if !(accept(l, ']') && accept(l, ']'))
+        return ParserError(ErrExpectedEndArrayOfTable)
+    end
+    d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
+    k = table_key[end]
+    old = get!(() -> [], d, k)
+    if old isa Vector
+        if old in l.static_arrays
+            return ParserError(ErrAddArrayToStaticArray)
+        end
+    else
+        return ParserError(ErrArrayTreatedAsDictionary)
+    end
+    d_new = TOMLDict()
+    push!(old, d_new)
+    push!(l.defined_tables, d_new)
+    l.active_table = d_new
+
+    return
+end
+
+function parse_entry(l::Parser, d)::Union{Nothing, ParserError}
+    key = @try parse_key(l)
+    skip_ws(l)
+    if !accept(l, '=')
+        return ParserError(ErrExpectedEqualAfterKey)
+    end
+    if length(key) > 1
+        d = @try recurse_dict!(l, d, @view(key[1:end-1]))
+    end
+    last_key_part = l.dotted_keys[end]
+
+    v = get(d, last_key_part, nothing)
+    if v !== nothing
+        @try check_allowed_add_key(l, v)
+    end
+
+    skip_ws(l)
+    value = @try parse_value(l)
+    # TODO: Performance, hashing `last_key_part` again here
+    d[last_key_part] = value
+    return
+end
+
+
+########
+# Keys #
+########
+
+# SPEC: "Bare keys may only contain ASCII letters, ASCII digits, underscores,
+# and dashes (A-Za-z0-9_-).
+# Note that bare keys are allowed to be composed of only ASCII digits, e.g. 1234,
+# but are always interpreted as strings."
+@inline isvalid_barekey_char(c::Char) =
+    'a' <= c <= 'z' ||
+    'A' <= c <= 'Z' ||
+    isdigit(c) ||
+    c == '-' || c == '_'
+
+# Current key...
+function parse_key(l::Parser)
+    empty!(l.dotted_keys)
+    _parse_key(l)
+end
+
+# Recursively add dotted keys to `l.dotted_key`
+function _parse_key(l::Parser)
+    skip_ws(l)
+    # SPEC: "A bare key must be non-empty,"
+    if isempty(l.dotted_keys) && accept(l, '=')
+        return ParserError(ErrEmptyBareKey)
+    end
+    keyval = if accept(l, '"')
+        @try parse_string_start(l, false)
+    elseif accept(l, '\'')
+        @try parse_string_start(l, true)
+    else
+        set_marker!(l)
+        if accept_batch(l, isvalid_barekey_char)
+            if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
+                c = eat_char(l)
+                return ParserError(ErrInvalidBareKeyCharacter, c)
+            end
+            String(take_substring(l))
+        else
+            c = eat_char(l)
+            return ParserError(ErrInvalidBareKeyCharacter, c)
+        end
+    end
+    new_key = keyval
+    push!(l.dotted_keys, new_key)
+    # SPEC: "Whitespace around dot-separated parts is ignored."
+    skip_ws(l)
+    if accept(l, '.')
+        skip_ws(l)
+        @try _parse_key(l)
+    end
+    return l.dotted_keys
+end
+
+
+##########
+# Values #
+##########
+
+function parse_value(l::Parser)
+    val = if accept(l, '[')
+        parse_array(l)
+    elseif accept(l, '{')
+        parse_inline_table(l)
+    elseif accept(l, '"')
+        parse_string_start(l, false)
+    elseif accept(l, '\'')
+        parse_string_start(l, true)
+    elseif accept(l, 't')
+        parse_bool(l, true)
+    elseif accept(l, 'f')
+        parse_bool(l, false)
+    else
+        parse_number_or_date_start(l)
+    end
+    if val === nothing
+        return ParserError(ErrGenericValueError)
+    end
+    return val
+end
+
+
+#########
+# Array #
+#########
+
+function parse_array(l::Parser)::Err{TOMLArray}
+    array = Any[]
+    push!(l.static_arrays, array)
+    skip_ws_nl(l)
+    accept(l, ']') && return array
+    while true
+        v = @try parse_value(l)
+        push!(array, v)
+        # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
+        skip_ws_nl(l)
+        comma = accept(l, ',')
+        skip_ws_nl(l)
+        accept(l, ']') && return array
+        if !comma
+            return ParserError(ErrExpectedCommaBetweenItemsArray)
+        end
+    end
+end
+
+
+################
+# Inline table #
+################
+
+function parse_inline_table(l::Parser)::Err{TOMLDict}
+    dict = TOMLDict()
+    push!(l.inline_tables, dict)
+    skip_ws(l)
+    accept(l, '}') && return dict
+    while true
+        @try parse_entry(l, dict)
+        # SPEC: No newlines are allowed between the curly braces unless they are valid within a value.
+        skip_ws(l)
+        accept(l, '}') && return dict
+        if accept(l, ',')
+            skip_ws(l)
+            if accept(l, '}')
+                return ParserError(ErrTrailingCommaInlineTable)
+            end
+        else
+            return ParserError(ErrExpectedCommaBetweenItemsInlineTable)
+        end
+    end
+end
+
+
+###########
+# Numbers #
+###########
+
+parse_inf(l::Parser, sgn::Int) = accept(l, 'n') && accept(l, 'f') ? sgn * Inf : nothing
+parse_nan(l::Parser) = accept(l, 'a') && accept(l, 'n') ? NaN : nothing
+
+function parse_bool(l::Parser, v::Bool)::Union{Bool, Nothing}
+    # Have eaten a 't' if `v` is true, otherwise have eaten a `f`.
+    v ? (accept(l, 'r') && accept(l, 'u') && accept(l, 'e') && return true) :
+        (accept(l, 'a') && accept(l, 'l') && accept(l, 's') && accept(l, 'e') && return false)
+    return nothing
+end
+
+isvalid_hex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
+isvalid_oct(c::Char) = '0' <= c <= '7'
+isvalid_binary(c::Char) = '0' <= c <= '2'
+
+const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
+# This function eats things accepted by `f` but also allows eating `_` in between
+# digits. Retruns if it ate at lest one character and if it ate an underscore
+function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
+    contains_underscore = false
+    at_least_one = false
+    last_underscore = false
+    while true
+        c = peek(l)
+        if c == '_'
+            contains_underscore = true
+            if fail_if_underscore
+                return ParserError(ErrUnderscoreNotSurroundedByDigits)
+            end
+            eat_char(l)
+            fail_if_underscore = true
+            last_underscore = true
+        else
+            # SPEC:  "Each underscore must be surrounded by at least one digit on each side."
+            fail_if_underscore = false
+            if f(c)
+                at_least_one = true
+                eat_char(l)
+            else
+                if last_underscore
+                    return ParserError(ErrTrailingUnderscoreNumber)
+                end
+                return at_least_one, contains_underscore
+            end
+            last_underscore = false
+        end
+    end
+end
+
+function parse_number_or_date_start(l::Parser)::Err{Union{Int, Float64, Date, Time, DateTime}}
+    integer = true
+    read_dot = false
+
+    set_marker!(l)
+    sgn = 1
+    if accept(l, '+')
+        # do nothing
+    elseif accept(l, '-')
+        sgn = -1
+    end
+    if accept(l, 'i')
+        return parse_inf(l, sgn)
+    elseif accept(l, 'n')
+        return parse_nan(l)
+    end
+
+    if accept(l, '.')
+        return ParserError(ErrLeadingDot)
+    end
+
+    # Zero is allowed to follow by a end value char, a base x, o, b or a dot
+    readed_zero = false
+    if accept(l, '0')
+        readed_zero = true # Intentional bad grammer to remove the ambiguity in "read"...
+        if ok_end_value(peek(l))
+            return 0
+        elseif accept(l, 'x')
+            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex)
+            ate && return parse_int(l, contains_underscore)
+        elseif accept(l, 'o')
+            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct)
+            ate && return parse_int(l, contains_underscore)
+        elseif accept(l, 'b')
+            ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary)
+            ate && return parse_int(l, contains_underscore)
+        elseif accept(l, isdigit)
+            return parse_local_time(l)
+        elseif peek(l) !== '.'
+            return ParserError(ErrLeadingZeroNotAllowedInteger)
+        end
+    end
+
+    read_underscore = false
+    read_digit = accept(l, isdigit)
+    if !readed_zero && !read_digit
+        return ParserError(ErrUnexpectedStartOfValue)
+    end
+    ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
+    read_underscore |= contains_underscore
+    if (read_digit || ate) && ok_end_value(peek(l))
+        return parse_int(l, contains_underscore)
+    end
+    # Done with integers here
+
+    if !read_underscore
+        # No underscores in date / times
+        if peek(l) == '-'
+            return parse_datetime(l)
+        elseif peek(l) == ':'
+            return parse_local_time(l)
+        end
+    end
+    # Done with datetime / localtime here
+
+    # can optionally read a . + digits and then exponent
+    ate_dot = accept(l, '.')
+    ate, contains_underscore = @try accept_batch_underscore(l, isdigit, true)
+    if ate_dot && !ate
+        return ParserError(ErrNoTrailingDigitAfterDot)
+    end
+    read_underscore |= contains_underscore
+    if accept(l, x -> x == 'e' || x == 'E')
+        accept(l, x-> x == '+' || x == '-')
+        # SPEC: (which follows the same rules as decimal integer values but may include leading zeros)
+        read_digit = accept_batch(l, isdigit)
+        ate, read_underscore = @try accept_batch_underscore(l, isdigit, !read_digit)
+        contains_underscore |= read_underscore
+    end
+    if !ok_end_value(peek(l))
+        error()
+        return ParserError(ErrLeadingZeroNotAllowedInteger)
+    end
+    return parse_float(l, read_underscore)
+end
+
+
+function take_string_or_substring(l, contains_underscore)::Union{String, SubString}
+    subs = take_substring(l)
+    # Need to pass a AbstractString to `parse` so materialize it in case it
+    # contains underscore.
+    #                                   vvvvvvv <- this looksl like a dude flipping the bird
+    return contains_underscore ? filter(!=('_'), subs) : subs
+end
+
+function parse_float(l::Parser, contains_underscore)::Err{Float64}
+    s = take_string_or_substring(l, contains_underscore)
+    v = tryparse(Float64, s)
+    v === nothing && return(ParserError(ErrGenericValueError))
+    return v
+end
+
+function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int}
+    s = take_string_or_substring(l, contains_underscore)
+    v = try
+        Base.parse(Int, s; base=base)
+    catch e
+        e isa Base.OverflowError && return(ParserError(ErrOverflowError))
+        error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+    end
+end
+
+
+##########################
+# Date / Time / DateTime #
+##########################
+
+ok_end_value(c::Char) = iswhitespace(c) || c == '#' || c == EOF_CHAR || c == ']' ||
+                               c == '}' || c == ',' || c == '\n'     || c == '\r'
+
+#=
+# https://tools.ietf.org/html/rfc3339
+
+# Internet Protocols MUST generate four digit years in dates.
+
+   date-fullyear   = 4DIGIT
+   date-month      = 2DIGIT  ; 01-12
+   date-mday       = 2DIGIT  ; 01-28, 01-29, 01-30, 01-31 based on
+                             ; month/year
+   time-hour       = 2DIGIT  ; 00-23
+   time-minute     = 2DIGIT  ; 00-59
+   time-second     = 2DIGIT  ; 00-58, 00-59, 00-60 based on leap second
+                             ; rules
+   time-secfrac    = "." 1*DIGIT
+   time-numoffset  = ("+" / "-") time-hour ":" time-minute
+   time-offset     = "Z" / time-numoffset
+
+   partial-time    = time-hour ":" time-minute ":" time-second
+                     [time-secfrac]
+   full-date       = date-fullyear "-" date-month "-" date-mday
+   full-time       = partial-time time-offset
+
+   date-time       = full-date "T" full-time
+=#
+
+accept_two(l, f::F) where {F} = accept_n(l, 2, f) || return(ParserError(ErrParsingDateTime))
+function parse_datetime(l)::Err{Union{DateTime, Date}}
+    # Year has already been eaten when we reach here
+    year = parse_int(l, false)
+    year in 0:9999 || return ParserError(ErrParsingDateTime)
+
+    # Month
+    accept(l, '-') || return ParserError(ErrParsingDateTime)
+    set_marker!(l)
+    @try accept_two(l, isdigit)
+    month = parse_int(l, false)
+    month in 1:12 || return ParserError(ErrParsingDateTime)
+    accept(l, '-') || return ParserError(ErrParsingDateTime)
+
+    # Day
+    set_marker!(l)
+    @try accept_two(l, isdigit)
+    day = parse_int(l, false)
+    # Verify the real range in the constructor below
+    day in 1:31 || return ParserError(ErrParsingDateTime)
+
+    # We might have a local date now
+    read_space = false
+    if ok_end_value(peek(l))
+        if (read_space = accept(l, ' '))
+            if !isdigit(peek(l))
+                return Date(year, month, day)
+            end
+        else
+            return Date(year, month, day)
+        end
+    end
+    if !read_space
+        accept(l, 'T') || accept(l, 't') || return ParserError(ErrParsingDateTime)
+    end
+
+    h, m, s, ms = @try _parse_local_time(l)
+
+    # Julia doesn't support offset times
+    if !accept(l, 'Z')
+        if accept(l, '+') || accept(l, '-')
+            return ParserError(ErrOffsetDateNotSupported)
+        end
+    end
+
+    if !ok_end_value(peek(l))
+        return ParserError(ErrParsingDateTime)
+    end
+
+    # The DateTime parser verifies things like leap year for us
+    try
+        DateTime(year, month, day,
+                 h, m, s, ms)
+    catch e
+         ParserError(ErrParsingDateTime)
+    end
+end
+
+function parse_local_time(l::Parser)::Err{Time}
+    h = parse_int(l, false)
+    h in 0:23 || return ParserError(ErrParsingDateTime)
+    _, m, s, ms = @try _parse_local_time(l, true)
+    # TODO: Could potentially parse greater accuracy for the
+    # fractional seconds here.
+    return Time(h, m, s, ms)
+end
+
+function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int}}
+    # Hour has potentially been already parsed in
+    # `parse_number_or_date_start` already
+    if skip_hour
+        hour = 0
+    else
+        set_marker!(l)
+        @try accept_two(l, isdigit)
+        hour = parse_int(l, false)
+        hour in 0:23 || return ParserError(ErrParsingDateTime)
+    end
+
+    accept(l, ':') || return ParserError(ErrParsingDateTime)
+
+    # minute
+    set_marker!(l)
+    @try accept_two(l, isdigit)
+    minute = parse_int(l, false)
+    minute in 0:59 || return ParserError(ErrParsingDateTime)
+
+    accept(l, ':') || return ParserError(ErrParsingDateTime)
+
+    # second
+    set_marker!(l)
+    @try accept_two(l, isdigit)
+    second = parse_int(l, false)
+    second in 0:59 || return ParserError(ErrParsingDateTime)
+
+    # optional fractional second
+    fractional_second = 0
+    if accept(l, '.')
+        set_marker!(l)
+        found_fractional_digit = false
+        for i in 1:3
+            found_fractional_digit |= accept(l, isdigit)
+        end
+        if !found_fractional_digit
+            return ParserError(ErrParsingDateTime)
+        end
+        # DateTime in base only manages 3 significant digits in fractional
+        # second
+        fractional_second = parse_int(l, false)
+        # Truncate off the rest eventual digits
+        accept_batch(l, isdigit)
+    end
+    return hour, minute, second, fractional_second
+end
+
+
+##########
+# String #
+##########
+
+function parse_string_start(l::Parser, quoted::Bool)::Err{String}
+    # Have eaten a `'` if `quoted is true, otherwise have eaten a `"`
+    multiline = false
+    c = quoted ? '\'' : '"'
+    if accept(l, c) # Eat second quote
+        if !accept(l, c)
+            return ""
+        end
+        accept(l, '\r') # Eat third quote
+        accept(l, '\n') # Eat third quote
+        multiline = true
+    end
+    return parse_string_continue(l, multiline, quoted)
+end
+
+@inline stop_candidates_multiline(x)         = x != '"'  &&  x != '\\'
+@inline stop_candidates_singleline(x)        = x != '"'  &&  x != '\\' && x != '\n'
+@inline stop_candidates_multiline_quoted(x)  = x != '\'' &&  x != '\\'
+@inline stop_candidates_singleline_quoted(x) = x != '\'' &&  x != '\\' && x != '\n'
+
+function parse_string_continue(l::Parser, multiline::Bool, quoted::Bool)::Err{String}
+    start_chunk = l.prevpos
+    q = quoted ? '\'' : '"'
+    contains_backslash = false
+    offset = multiline ? 3 : 1
+    while true
+        if peek(l) == EOF_CHAR
+            return ParserError(ErrUnexpectedEndString)
+        end
+        if quoted
+            accept_batch(l, multiline ? stop_candidates_multiline_quoted : stop_candidates_singleline_quoted)
+        else
+            accept_batch(l, multiline ? stop_candidates_multiline : stop_candidates_singleline)
+        end
+        if !multiline && peek(l) == '\n'
+            return ParserError(ErrNewLineInString)
+        end
+        next_slash = peek(l) == '\\'
+        if !next_slash
+            # TODO: This is not true, could be """"foo""""
+            if accept(l, q) && (!multiline || (accept(l, q) && accept(l, q)))
+                push!(l.chunks, start_chunk:(l.prevpos-offset-1))
+                return take_chunks(l, contains_backslash)
+            end
+        end
+        # This shouldn't be needed?
+        c = eat_char(l) # eat the character we stopped at
+        next_slash = c == '\\'
+        if next_slash && !quoted
+            if peek(l) == '\n' || peek(l) == '\r'
+                push!(l.chunks, start_chunk:(l.prevpos-1-1)) # -1 due to eating the slash
+                skip_ws_nl_no_comment(l)
+                start_chunk = l.prevpos
+            else
+                c = eat_char(l) # eat the escaped character
+                if c == 'u'  || c == 'U'
+                    n = c == 'u' ? 4 : 6
+                    set_marker!(l)
+                    if !accept_n(l, n, isvalid_hex)
+                        return ParserError(ErrInvalidUnicodeScalar)
+                    end
+                    codepoint = parse_int(l, false, 16)
+                    #=
+                    Unicode Scalar Value
+                    ---------------------
+                    Any Unicode code point except high-surrogate and
+                    low-surrogate code points.  In other words, the ranges of
+                    integers 0 to D7FF16 and E00016 to 10FFFF16 inclusive.
+                    =#
+                    if !(codepoint <= 0xD7FF || 0xE000 <= codepoint <= 0x10FFFF)
+                        return ParserError(ErrInvalidUnicodeScalar)
+                    end
+                elseif c != 'b' && c != 't' && c != 'n' && c != 'f' && c != 'r' && c != '"' && c!= '\\'
+                    return ParserError(ErrInvalidEscapeCharacter)
+                end
+                contains_backslash = true
+            end
+        end
+    end
+end
+
+function take_chunks(l::Parser, unescape::Bool)::String
+    nbytes = sum(length, l.chunks)
+    str = Base._string_n(nbytes)
+    offset = 1
+    for chunk in l.chunks
+        # The SubString constructor takes as an index the first byte of the
+        # last character but we have the last byte.
+        n = length(chunk)
+        GC.@preserve str begin
+            unsafe_copyto!(pointer(str, offset), pointer(l.str, first(chunk)), n)
+        end
+        offset += n
+    end
+    empty!(l.chunks)
+    return unescape ? unescape_string(str) : str
+end