Skip to content

Commit

Permalink
Optimize integer to string conversions (#36470)
Browse files Browse the repository at this point in the history
* Optimize integer-->string conversions

This avoids invalidations caused by invalidating `StringVector(::Integer)`.
This also makes `bin()`, `dec`() and `hex()` slightly faster,
but does not change the Printf.
  • Loading branch information
kimikage authored Oct 1, 2020
1 parent a4bfb9c commit e7872b3
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 35 deletions.
108 changes: 73 additions & 35 deletions base/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -615,75 +615,112 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba

## integer to string functions ##

function bin(x::Unsigned, pad::Integer, neg::Bool)
i = neg + max(pad,sizeof(x)<<3-leading_zeros(x))
a = StringVector(i)
function bin(x::Unsigned, pad::Int, neg::Bool)
m = 8 * sizeof(x) - leading_zeros(x)
n = neg + max(pad, m)
a = StringVector(n)
# for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
# @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
# end
i = n
@inbounds while i >= 4
b = UInt32((x % UInt8)::UInt8)
d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101
a[i-3] = (d >> 0x00) % UInt8
a[i-2] = (d >> 0x08) % UInt8
a[i-1] = (d >> 0x10) % UInt8
a[i] = (d >> 0x18) % UInt8
x >>= 0x4
i -= 4
end
while i > neg
@inbounds a[i] = 48+(x&0x1)
x >>= 1
@inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x1)
x >>= 0x1
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
String(a)
end

function oct(x::Unsigned, pad::Integer, neg::Bool)
i = neg + max(pad,div((sizeof(x)<<3)-leading_zeros(x)+2,3))
a = StringVector(i)
function oct(x::Unsigned, pad::Int, neg::Bool)
m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3)
n = neg + max(pad, m)
a = StringVector(n)
i = n
while i > neg
@inbounds a[i] = 48+(x&0x7)
x >>= 3
@inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
x >>= 0x3
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
String(a)
end

function dec(x::Unsigned, pad::Integer, neg::Bool)
i = neg + ndigits(x, base=10, pad=pad)
a = StringVector(i)
while i > neg
@inbounds a[i] = 48+rem(x,10)
x = oftype(x,div(x,10))
i -= 1
# 2-digit decimal characters ("00":"99")
const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]

function dec(x::Unsigned, pad::Int, neg::Bool)
n = neg + ndigits(x, pad=pad)
a = StringVector(n)
i = n
@inbounds while i >= 2
d, r = divrem(x, 0x64)
d100 = _dec_d100[(r % Int)::Int + 1]
a[i-1] = d100 % UInt8
a[i] = (d100 >> 0x8) % UInt8
x = oftype(x, d)
i -= 2
end
if i > neg
@inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
end
if neg; @inbounds a[1]=0x2d; end
String(a)
end

function hex(x::Unsigned, pad::Integer, neg::Bool)
i = neg + max(pad,(sizeof(x)<<1)-(leading_zeros(x)>>2))
a = StringVector(i)
while i > neg
d = x & 0xf
@inbounds a[i] = 48+d+39*(d>9)
x >>= 4
i -= 1
function hex(x::Unsigned, pad::Int, neg::Bool)
m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
n = neg + max(pad, m)
a = StringVector(n)
i = n
while i >= 2
b = (x % UInt8)::UInt8
d1, d2 = b >> 0x4, b & 0xf
@inbounds a[i-1] = d1 + ifelse(d1 > 0x9, 0x57, 0x30)
@inbounds a[i] = d2 + ifelse(d2 > 0x9, 0x57, 0x30)
x >>= 0x8
i -= 2
end
if i > neg
d = (x % UInt8)::UInt8 & 0xf
@inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
end
if neg; @inbounds a[1]=0x2d; end
String(a)
end

const base36digits = ['0':'9';'a':'z']
const base62digits = ['0':'9';'A':'Z';'a':'z']
const base36digits = UInt8['0':'9';'a':'z']
const base62digits = UInt8['0':'9';'A':'Z';'a':'z']

function _base(b::Integer, x::Integer, pad::Integer, neg::Bool)
(x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `b` must be negative."))
2 <= abs(b) <= 62 || throw(DomainError(b, "base must satisfy 2 ≤ abs(base) ≤ 62"))
function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
(x >= 0) | (base < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative."))
2 <= abs(base) <= 62 || throw(DomainError(base, "base must satisfy 2 ≤ abs(base) ≤ 62"))
b = (base % Int)::Int
digits = abs(b) <= 36 ? base36digits : base62digits
i = neg + ndigits(x, base=b, pad=pad)
a = StringVector(i)
n = neg + ndigits(x, base=b, pad=pad)
a = StringVector(n)
i = n
@inbounds while i > neg
if b > 0
a[i] = digits[1+rem(x,b)]
a[i] = digits[1 + (rem(x, b) % Int)::Int]
x = div(x,b)
else
a[i] = digits[1+mod(x,-b)]
a[i] = digits[1 + (mod(x, -b) % Int)::Int]
x = cld(x,b)
end
i -= 1
end
if neg; a[1]='-'; end
if neg; @inbounds a[1]=0x2d; end
String(a)
end

Expand All @@ -705,6 +742,7 @@ julia> string(13, base = 5, pad = 4)
```
"""
function string(n::Integer; base::Integer = 10, pad::Integer = 1)
pad = (min(max(pad, typemin(Int)), typemax(Int)) % Int)::Int
if base == 2
(n_positive, neg) = split_sign(n)
bin(n_positive, pad, neg)
Expand Down
3 changes: 3 additions & 0 deletions test/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ end
@test string(3, base = 2) == "11"
@test string(3, pad = 2, base = 2) == "11"
@test string(3, pad = Int32(2), base = Int32(2)) == "11"
@test string(3, pad = typemin(Int128) + 3, base = 0x2) == "11"
@test string(3, pad = 3, base = 2) == "011"
@test string(-3, base = 2) == "-11"
@test string(-3, pad = 3, base = 2) == "-011"
Expand Down Expand Up @@ -338,6 +339,8 @@ end
@test digits(-3, base = 2) == -[1, 1]
@test digits(-42, base = 4) == -[2, 2, 2]

@test_throws DomainError string(5, base = typemin(Int128) + 10)

@testset "digits/base with bases powers of 2" begin
@test digits(4, base = 2) == [0, 0, 1]
@test digits(5, base = Int32(2), pad=Int32(3)) == [1, 0, 1]
Expand Down

0 comments on commit e7872b3

Please sign in to comment.