-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
inline ^ for literal powers of numbers #20637
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -219,6 +219,113 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}} | |
@inline internal_pow(x::HWNumber, ::Type{Val{2}}) = x*x | ||
@inline internal_pow(x::HWNumber, ::Type{Val{3}}) = x*x*x | ||
|
||
# This table represents the optimal "power tree" | ||
# based on Knuth's "TAOCP vol 2: Seminumerical Algorithms", | ||
# third edition, section 4.6.3, Fig. 15. It was | ||
# transcribed into this array form in the gcc source | ||
# code (tree-ssa-math-opts.c), but since this is just | ||
# a table of mathematical facts it should not be copyrightable. | ||
# | ||
# To compute x^q in a minimal number of multiplications | ||
# for 1 ≤ q ≤ 255, you compute x^r * x^s for | ||
# r = power_tree[i] and s = i-q, recursively | ||
# (memoizing each power computation), and we implicitly | ||
# define power_tree[0] = 0. | ||
const power_tree = [ | ||
1, 1, 2, 2, 3, 3, 4, # 1 - 7 | ||
4, 6, 5, 6, 6, 10, 7, 9, # 8 - 15 | ||
8, 16, 9, 16, 10, 12, 11, 13, # 16 - 23 | ||
12, 17, 13, 18, 14, 24, 15, 26, # 24 - 31 | ||
16, 17, 17, 19, 18, 33, 19, 26, # 32 - 39 | ||
20, 25, 21, 40, 22, 27, 23, 44, # 40 - 47 | ||
24, 32, 25, 34, 26, 29, 27, 44, # 48 - 55 | ||
28, 31, 29, 34, 30, 60, 31, 36, # 56 - 63 | ||
32, 64, 33, 34, 34, 46, 35, 37, # 64 - 71 | ||
36, 65, 37, 50, 38, 48, 39, 69, # 72 - 79 | ||
40, 49, 41, 43, 42, 51, 43, 58, # 80 - 87 | ||
44, 64, 45, 47, 46, 59, 47, 76, # 88 - 95 | ||
48, 65, 49, 66, 50, 67, 51, 66, # 96 - 103 | ||
52, 70, 53, 74, 54, 104, 55, 74, # 104 - 111 | ||
56, 64, 57, 69, 58, 78, 59, 68, # 112 - 119 | ||
60, 61, 61, 80, 62, 75, 63, 68, # 120 - 127 | ||
64, 65, 65, 128, 66, 129, 67, 90, # 128 - 135 | ||
68, 73, 69, 131, 70, 94, 71, 88, # 136 - 143 | ||
72, 128, 73, 98, 74, 132, 75, 121, # 144 - 151 | ||
76, 102, 77, 124, 78, 132, 79, 106, # 152 - 159 | ||
80, 97, 81, 160, 82, 99, 83, 134, # 160 - 167 | ||
84, 86, 85, 95, 86, 160, 87, 100, # 168 - 175 | ||
88, 113, 89, 98, 90, 107, 91, 122, # 176 - 183 | ||
92, 111, 93, 102, 94, 126, 95, 150, # 184 - 191 | ||
96, 128, 97, 130, 98, 133, 99, 195, # 192 - 199 | ||
100, 128, 101, 123, 102, 164, 103, 138, # 200 - 207 | ||
104, 145, 105, 146, 106, 109, 107, 149, # 208 - 215 | ||
108, 200, 109, 146, 110, 170, 111, 157, # 216 - 223 | ||
112, 128, 113, 130, 114, 182, 115, 132, # 224 - 231 | ||
116, 200, 117, 132, 118, 158, 119, 206, # 232 - 239 | ||
120, 240, 121, 162, 122, 147, 123, 152, # 240 - 247 | ||
124, 166, 125, 214, 126, 138, 127, 153, # 248 - 255 | ||
] | ||
|
||
# return the inlined/unrolled expression to compute x^p | ||
# by repeated multiplications (using an optimal addition | ||
# chain for |p|<256 and power-by-squaring thereafter. | ||
function inlined_pow(x::Symbol, p::Int) | ||
p == 0 && return :(one($x)) | ||
ex = Union{Expr,Symbol}[] # expressions to compute intermediate powers | ||
if p < 0 | ||
x′ = gensym() | ||
push!(ex, :($x′ = inv($x))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this likely to be more accurate if the inversion happens at the end? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe, for integers, but in that case it's also likelier to overflow. For floating-point values, I don't see why it would be more accurate to put the |
||
x = x′ | ||
p = -p | ||
end | ||
pows_computed = Dict(1 => x) # powers q => variable storing x^q | ||
pows_to_compute = [p] | ||
while !isempty(pows_to_compute) | ||
q = pop!(pows_to_compute) | ||
if !haskey(pows_computed, q) | ||
if q ≤ length(power_tree) # use optimal addition chain | ||
q1 = power_tree[q] | ||
q2 = q-q1 | ||
assert(q1 > 0 && q2 > 0) | ||
has1 = haskey(pows_computed, q1) | ||
has2 = haskey(pows_computed, q2) | ||
if has1 && has2 | ||
xq = gensym() | ||
push!(ex, :($xq = $(pows_computed[q1]) * $(pows_computed[q2]))) | ||
pows_computed[q] = xq | ||
else | ||
push!(pows_to_compute, q) # try again after computing q1, q2 | ||
has1 || push!(pows_to_compute, q1) | ||
has2 || push!(pows_to_compute, q2) | ||
end | ||
else # q too big, use power-by-squaring algorithm | ||
q1 = q >> 1 | ||
if haskey(pows_computed, q1) | ||
xq = gensym() | ||
xq1 = pows_computed[q1] | ||
push!(ex, iseven(q) ? :($xq = $xq1 * $xq1) : :($xq = $xq1 * $xq1 * $x)) | ||
pows_computed[q] = xq | ||
else | ||
push!(pows_to_compute, q) # try again after computing q1 | ||
push!(pows_to_compute, q1) | ||
end | ||
end | ||
end | ||
end | ||
push!(ex, pows_computed[p]) # the variable for the final result x^p | ||
return Expr(:block, ex...) | ||
end | ||
inlined_pow(x::Symbol, p::Integer) = inlined_pow(x, Int(p)) | ||
|
||
# for cases where we use power_by_squaring, inline | ||
# the unrolled expression for literal p | ||
# (this also allows integer^-p to give a floating-point result | ||
# in a type-stable fashion). | ||
@inline @generated internal_pow{p}(x::HWNumber, ::Type{Val{p}}) = inlined_pow(:x, p) | ||
|
||
# Float32/Float64 may not inline without @fastmath, for accuracy | ||
internal_pow{p}(x::Union{Float32,Float64}, ::Type{Val{p}}) = x^p | ||
|
||
# b^p mod m | ||
|
||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we still call GMP in this case?