Skip to content

Commit

Permalink
tryparse: parse string to Nullable
Browse files Browse the repository at this point in the history
Introduces the tryparse method:
- tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString)
- tryparse(::Type{Float..},s::AbstractString)
- a few variants of the above

And:
- tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod.
- The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods.
- The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions.

This should fix JuliaLang#10498 as well.

Ref: discussions at JuliaLang#9316, JuliaLang#3631, JuliaLang#5704
  • Loading branch information
tanmaykm committed Mar 17, 2015
1 parent 5195cc8 commit de27f5e
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 130 deletions.
1 change: 0 additions & 1 deletion base/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -277,4 +277,3 @@ immutable Nullable{T}
Nullable() = new(true)
Nullable(value::T) = new(false, value)
end

21 changes: 0 additions & 21 deletions base/combinatorics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,6 @@ const _fact_table64 =
87178291200,1307674368000,20922789888000,355687428096000,6402373705728000,
121645100408832000,2432902008176640000]

const _fact_table128 =
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]

function factorial_lookup(n::Integer, table, lim)
n < 0 && throw(DomainError())
n > lim && throw(OverflowError())
Expand All @@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim)
return oftype(n, f)
end

factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20)

if Int === Int32
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ export
fldmod,
flipsign,
float,
tryparse,
floor,
fma,
frexp,
Expand Down
18 changes: 13 additions & 5 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export BigInt
import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($),
binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod,
ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod,
sum, trailing_zeros, trailing_ones, count_ones, base, parseint,
sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal,
serialize, deserialize, bin, oct, dec, hex, isequal, invmod,
prevpow2, nextpow2, ndigits0z, widen, signed

Expand Down Expand Up @@ -76,15 +76,23 @@ signed(x::BigInt) = x
BigInt(x::BigInt) = x
BigInt(s::AbstractString) = parseint(BigInt,s)

function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool)
_n = Nullable{BigInt}()
s = bytestring(s)
sgn, base, i = Base.parseint_preamble(true,s,base)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end
z = BigInt()
err = ccall((:__gmpz_set_str, :libgmp),
Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
&z, SubString(s,i), base)
err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
return sgn < 0 ? -z : z
if err != 0
raise && throw(ArgumentError("invalid BigInt: $(repr(s))"))
return _n
end
Nullable(sgn < 0 ? -z : z)
end

function BigInt(x::Union(Clong,Int32))
Expand Down Expand Up @@ -217,7 +225,7 @@ function serialize(s, n::BigInt)
serialize(s, base(62,n))
end

deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62)
deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true))

# Binary ops
for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul),
Expand Down
4 changes: 2 additions & 2 deletions base/nullable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ convert( ::Type{Nullable }, ::Void) = Nullable{Union()}()

function show{T}(io::IO, x::Nullable{T})
if x.isnull
@printf(io, "Nullable{%s}()", repr(T))
println(io, "Nullable{$(repr(T))}()")
else
@printf(io, "Nullable(%s)", repr(x.value))
println(io, "Nullable($(repr(x.value)))")
end
end

Expand Down
129 changes: 81 additions & 48 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))

function parseint_next(s::AbstractString, i::Int=start(s))
done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
done(s,i) && (return Char(0), 0, 0)
j = i
c, i = next(s,i)
c, i, j
end

function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
c, i, j = parseint_next(s)

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

sgn = 1
if signed
if c == '-' || c == '+'
(c == '-') && (sgn = -1)
c, i, j = parseint_next(s,i)
end
end

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

if base == 0
if c == '0' && !done(s,i)
c, i = next(s,i)
Expand All @@ -1522,94 +1528,121 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
return sgn, base, j
end

function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)

function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool)
_n = Nullable{T}()
sgn, base, i = parseint_preamble(T<:Signed,s,base)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end
c, i = parseint_next(s,i)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end

base = convert(T,base)
## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)
m::T = div(typemax(T)-base+1,base)
n::T = 0
while n <= m
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
return _n
end
n *= base
n += d
if done(s,i)
n *= sgn
return n
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
return _n
end
(T <: Signed) && (d *= sgn)
n = checked_mul(n,base)
n = checked_add(n,d)
done(s,i) && return n

safe_n = safe_mul(n, base)
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
if isnull(safe_n)
raise && throw(OverflowError())
return _n
end
n = get(safe_n)
done(s,i) && return Nullable{T}(n)
c, i = next(s,i)
end
while !done(s,i)
c, i = next(s,i)
isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
if !isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
return _n
end
end
return n
return Nullable{T}(n)
end
parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
parseint_nocheck(T, s, base, base <= 36 ? 10 : 36)
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise)
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)

parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) =
2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0)
function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
get(tryparse_internal(T, s, base, true))
end
parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
parseint(s::AbstractString, base::Integer) = parseint(Int,s,base)
parseint(s::AbstractString) = parseint_nocheck(Int,s,0)
parseint(s::AbstractString) = parseint(Int,s)

## stringifying integers more efficiently ##

string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x)

## string to float functions ##

float64_isvalid(s::AbstractString, out::Array{Float64,1}) =
ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0
float32_isvalid(s::AbstractString, out::Array{Float32,1}) =
ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0

float64_isvalid(s::SubString, out::Array{Float64,1}) =
ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0
float32_isvalid(s::SubString, out::Array{Float32,1}) =
ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0

begin
local tmp::Array{Float64,1} = Array(Float64,1)
local tmpf::Array{Float32,1} = Array(Float32,1)
global parsefloat
function parsefloat(::Type{Float64}, s::AbstractString)
if !float64_isvalid(s, tmp)
throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))"))
end
return tmp[1]
end
tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

function parsefloat(::Type{Float32}, s::AbstractString)
if !float32_isvalid(s, tmpf)
throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))"))
end
return tmpf[1]
end
tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString)
nf = tryparse(T, s)
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
end

float(x::AbstractString) = parsefloat(x)
parsefloat(x::AbstractString) = parsefloat(Float64,x)
parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s)

float(x::AbstractString) = parse(Float64,x)
parsefloat(x::AbstractString) = parse(Float64,x)

float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)

function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1})
tf = tryparse(T, s)
isnull(tf) || (out[1] = get(tf))
!isnull(tf)
end

float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out)
float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out)

# find the index of the first occurrence of a value in a byte array

typealias ByteArray Union(Array{UInt8,1},Array{Int8,1})
Expand Down
26 changes: 23 additions & 3 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ using .Errno
include("path.jl")
include("intfuncs.jl")

# nullable types
include("nullable.jl")

# I/O
include("task.jl")
Expand Down Expand Up @@ -180,6 +182,27 @@ big(n::Integer) = convert(BigInt,n)
big(x::FloatingPoint) = convert(BigFloat,x)
big(q::Rational) = big(num(q))//big(den(q))

const _fact_table128 =
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)

# more hashing definitions
include("hashing2.jl")

Expand All @@ -192,9 +215,6 @@ importall .Random
include("printf.jl")
importall .Printf

# nullable types
include("nullable.jl")

# concurrency and parallelism
include("serialize.jl")
include("multi.jl")
Expand Down
Loading

0 comments on commit de27f5e

Please sign in to comment.