From a602ec02fa848ef2ddaeec7247517ce7410a2518 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 23 Mar 2022 15:34:54 -0400 Subject: [PATCH 1/5] faster _log_ext --- base/special/log.jl | 195 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 164 insertions(+), 31 deletions(-) diff --git a/base/special/log.jl b/base/special/log.jl index bca0d7143db48..261f7539bc0d4 100644 --- a/base/special/log.jl +++ b/base/special/log.jl @@ -396,8 +396,6 @@ function log1p(x::Float32) throw_complex_domainerror(:log1p, x) end end - - @inline function log_ext_kernel(x_hi::Float64, x_lo::Float64) c1hi = 0.666666666666666629659233 hi_order = evalpoly(x_hi, (0.400000000000000077715612, 0.285714285714249172087875, @@ -412,36 +410,171 @@ end return ans_hi, ans_lo end +const t_log_ext_Float64 = ( +(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48), +(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46), +(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45), +(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49), +(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47), +(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46), +(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50), +(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45), +(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45), +(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45), +(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46), +(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46), +(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46), +(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46), +(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46), +(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45), +(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47), +(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48), +(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48), +(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47), +(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45), +(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46), +(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45), +(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45), +(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45), +(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46), +(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52), +(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45), +(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45), +(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45), +(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45), +(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45), +(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45), +(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46), +(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46), +(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46), +(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45), +(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46), +(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46), +(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48), +(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45), +(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45), +(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45), +(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47), +(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47), +(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45), +(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45), +(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45), +(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46), +(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45), +(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45), +(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46), +(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46), +(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45), +(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46), +(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46), +(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45), +(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45), +(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46), +(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45), +(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45), +(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46), +(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46), +(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45), +(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45), +(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48), +(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48), +(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45), +(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45), +(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45), +(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50), +(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50), +(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46), +(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46), +(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0), +(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0), +(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46), +(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45), +(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45), +(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47), +(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45), +(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46), +(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46), +(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47), +(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45), +(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45), +(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45), +(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49), +(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45), +(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46), +(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45), +(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45), +(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45), +(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45), +(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45), +(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47), +(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51), +(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45), +(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45), +(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46), +(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45), +(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46), +(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47), +(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47), +(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45), +(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47), +(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45), +(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48), +(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45), +(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51), +(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51), +(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46), +(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48), +(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45), +(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45), +(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45), +(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45), +(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47), +(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45), +(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45), +(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46), +(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46), +(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47), +(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45), +(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45), +(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45), +(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46), +(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)) + # Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision -# Implimentation adapted from SLEEFPirates.jl # Does not normalize results. # Must be caused with positive finite arguments -function _log_ext(d::Float64) - m, e = significand(d), exponent(d) - if m > 1.5 - m *= 0.5 - e += 1.0 - end - # x = (m-1)/(m+1) - mp1hi = m + 1.0 - mp1lo = m + (1.0 - mp1hi) - invy = inv(mp1hi) - xhi = (m - 1.0) * invy - xlo = fma(-xhi, mp1lo, fma(-xhi, mp1hi, m - 1.0)) * invy - x2hi, x2lo = two_mul(xhi, xhi) - x2lo = muladd(xhi, xlo * 2.0, x2lo) - thi, tlo = log_ext_kernel(x2hi, x2lo) - - shi = 0.6931471805582987 * e - xhi2 = xhi * 2.0 - shinew = muladd(xhi, 2.0, shi) - slo = muladd(1.6465949582897082e-12, e, muladd(xlo, 2.0, (((shi - shinew) + xhi2)))) - shi = shinew - x3hi, x3lo = two_mul(x2hi, xhi) - x3lo = muladd(x2hi, xlo, muladd(xhi, x2lo,x3lo)) - x3thi, x3tlo = two_mul(x3hi, thi) - x3tlo = muladd(x3hi, tlo, muladd(x3lo, thi, x3tlo)) - anshi = x3thi + shi - anslo = slo + x3tlo - ((anshi - shi) - x3thi) - return anshi, anslo +# Copyright (c) 2018-2020, Arm Limited. +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception +function _log_ext(x) + ix = reinterpret(UInt64, x) + # x = 2^k z; where z is in range [OFF,2*OFF) and exact. + # The range is split into N subintervals. + # The ith subinterval contains z and c is near its center. + tmp = reinterpret(Int64, ix - 0x3fe6955500000000) + i = (tmp >> 45) & 127 + z = reinterpret(Float64, ix - (tmp & 0xfff0000000000000)) + k = Float64(tmp >> 52) + + # log(x) = k*Ln2 + log(c) + log1p(z/c-1). + invc, logc, logctail = t_log_ext_Float64[i+1] + # Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and + # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. + r = fma(z, invc, -1.0) + # k*Ln2 + log(c) + r. + t1 = muladd(k, 0.6931471805598903, logc) + t2 = t1 + r + lo1 = muladd(k, 5.497923018708371e-14, logctail) + lo2 = t1 - t2 + r + + ar = -0.5 * r + ar2, lo3 = two_mul(r, ar) + # k*Ln2 + log(c) + r + .5*r*r. + hi = t2 + ar2 + lo4 = t2 - hi + ar2 + #p = r*ar2*evalpoly(r, (0x1.555555555556p-2, -0x1.0000000000006p-2, 0x1.999999959554ep-3, -0x1.555555529a47ap-3, 0x1.2495b9b4845e9p-3,-0x1.0002b8b263fc3p-3)) + p = r*ar2 * muladd(ar2, muladd(ar2, muladd(r, 0x1.0002b8b263fc3p+0, -0x1.2495b9b4845e9p+0), muladd(r, -0x1.555555529a47ap-1, 0x1.999999959554ep-1)), (muladd(r, 0x1.0000000000006p-1, -0x1.555555555556p-1))) + lo = lo1 + lo2 + lo3 + lo4 + p + return hi, lo + #y = hi + lo + #return y, hi - y + lo end From d9289a66ab98584c3b3917a17aee1b6ab76df693 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 24 Mar 2022 00:34:01 -0400 Subject: [PATCH 2/5] fix subnormal x --- base/math.jl | 18 ++++++++++++------ base/special/log.jl | 31 +++++++------------------------ 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/base/math.jl b/base/math.jl index 0d20c7bf1cca5..d534f852a6958 100644 --- a/base/math.jl +++ b/base/math.jl @@ -1001,15 +1001,21 @@ end y == yint && return x^yint #numbers greater than 2*inv(eps(T)) must be even, and the pow will overflow y >= 2*inv(eps()) && return x^(typemax(Int64)-1) + xu = reinterpret(UInt64, x) x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer - x == 1 && return 1.0 - return pow_body(x, y) + x === 1.0 && return 1.0 + x==0 && return abs(y)*Inf*(!(y>0)) + !isfinite(x) && return x*(y>0 || isnan(x)) # x is inf or NaN + if xu < (UInt64(1)<<52) # x is subnormal + xu = reinterpret(UInt64, x * 0x1p52) # normalize x + xu &= ~sign_mask(Float64) + xu -= UInt64(52) << 52 # mess with the exponent + end + return pow_body(xu, y) end -@inline function pow_body(x::Float64, y::Float64) - !isfinite(x) && return x*(y>0 || isnan(x)) - x==0 && return abs(y)*Inf*(!(y>0)) - logxhi,logxlo = Base.Math._log_ext(x) +@inline function pow_body(xu::UInt64, y::Float64) + logxhi,logxlo = Base.Math._log_ext(xu) xyhi, xylo = two_mul(logxhi,y) xylo = muladd(logxlo, y, xylo) hi = xyhi+xylo diff --git a/base/special/log.jl b/base/special/log.jl index 261f7539bc0d4..bc8a19cbecc75 100644 --- a/base/special/log.jl +++ b/base/special/log.jl @@ -396,19 +396,6 @@ function log1p(x::Float32) throw_complex_domainerror(:log1p, x) end end -@inline function log_ext_kernel(x_hi::Float64, x_lo::Float64) - c1hi = 0.666666666666666629659233 - hi_order = evalpoly(x_hi, (0.400000000000000077715612, 0.285714285714249172087875, - 0.222222222230083560345903, 0.181818180850050775676507, - 0.153846227114512262845736, 0.13332981086846273921509, - 0.117754809412463995466069, 0.103239680901072952701192, - 0.116255524079935043668677)) - res_hi, res_lo = two_mul(hi_order, x_hi) - res_lo = fma(x_lo, hi_order, res_lo) - ans_hi = c1hi + res_hi - ans_lo = ((c1hi - ans_hi) + res_hi) + (res_lo + 3.80554962542412056336616e-17) - return ans_hi, ans_lo -end const t_log_ext_Float64 = ( (0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48), @@ -541,20 +528,19 @@ const t_log_ext_Float64 = ( (0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)) # Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision +# Since `log` only makes sense for positive exponents, we speed up the implimentation by stealing the sign bit +# of the input for an extra bit of the exponent which is used to normalize subnormal inputs. # Does not normalize results. -# Must be caused with positive finite arguments # Copyright (c) 2018-2020, Arm Limited. # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception -function _log_ext(x) - ix = reinterpret(UInt64, x) +function _log_ext(xu) # x = 2^k z; where z is in range [OFF,2*OFF) and exact. # The range is split into N subintervals. # The ith subinterval contains z and c is near its center. - tmp = reinterpret(Int64, ix - 0x3fe6955500000000) + tmp = reinterpret(Int64, xu - 0x3fe6955500000000) i = (tmp >> 45) & 127 - z = reinterpret(Float64, ix - (tmp & 0xfff0000000000000)) + z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000)) k = Float64(tmp >> 52) - # log(x) = k*Ln2 + log(c) + log1p(z/c-1). invc, logc, logctail = t_log_ext_Float64[i+1] # Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and @@ -571,10 +557,7 @@ function _log_ext(x) # k*Ln2 + log(c) + r + .5*r*r. hi = t2 + ar2 lo4 = t2 - hi + ar2 - #p = r*ar2*evalpoly(r, (0x1.555555555556p-2, -0x1.0000000000006p-2, 0x1.999999959554ep-3, -0x1.555555529a47ap-3, 0x1.2495b9b4845e9p-3,-0x1.0002b8b263fc3p-3)) - p = r*ar2 * muladd(ar2, muladd(ar2, muladd(r, 0x1.0002b8b263fc3p+0, -0x1.2495b9b4845e9p+0), muladd(r, -0x1.555555529a47ap-1, 0x1.999999959554ep-1)), (muladd(r, 0x1.0000000000006p-1, -0x1.555555555556p-1))) - lo = lo1 + lo2 + lo3 + lo4 + p + p = evalpoly(r, (-0x1.555555555556p-1, 0x1.0000000000006p-1, -0x1.999999959554ep-2, 0x1.555555529a47ap-2, -0x1.2495b9b4845e9p-2, 0x1.0002b8b263fc3p-2)) + lo = lo1 + lo2 + lo3 + muladd(r*ar2, p, lo4) return hi, lo - #y = hi + lo - #return y, hi - y + lo end From 5b95457aed9bfb565823bbb2c454ab9d03cff0ab Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Sun, 27 Mar 2022 23:27:06 -0400 Subject: [PATCH 3/5] use compact table --- base/special/log.jl | 303 ++++++++++++++++++++++++-------------------- 1 file changed, 163 insertions(+), 140 deletions(-) diff --git a/base/special/log.jl b/base/special/log.jl index bc8a19cbecc75..50e39c17df505 100644 --- a/base/special/log.jl +++ b/base/special/log.jl @@ -397,161 +397,184 @@ function log1p(x::Float32) end end -const t_log_ext_Float64 = ( -(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48), -(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46), -(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45), -(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49), -(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47), -(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46), -(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50), -(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45), -(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45), -(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45), -(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46), -(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46), -(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46), -(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46), -(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46), -(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45), -(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47), -(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48), -(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48), -(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47), -(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45), -(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46), -(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45), -(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45), -(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45), -(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46), -(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52), -(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45), -(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45), -(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45), -(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45), -(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45), -(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45), -(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46), -(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46), -(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46), -(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45), -(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46), -(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46), -(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48), -(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45), -(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45), -(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45), -(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47), -(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47), -(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45), -(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45), -(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45), -(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46), -(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45), -(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45), -(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46), -(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46), -(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45), -(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46), -(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46), -(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45), -(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45), -(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46), -(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45), -(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45), -(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46), -(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46), -(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45), -(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45), -(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48), -(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48), -(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45), -(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45), -(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45), -(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50), -(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50), -(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46), -(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46), -(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0), -(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0), -(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46), -(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45), -(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45), -(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47), -(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45), -(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46), -(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46), -(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47), -(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45), -(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45), -(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45), -(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49), -(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45), -(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46), -(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45), -(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45), -(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45), -(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45), -(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45), -(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47), -(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51), -(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45), -(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45), -(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46), -(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45), -(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46), -(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47), -(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47), -(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45), -(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47), -(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45), -(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48), -(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45), -(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51), -(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51), -(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46), -(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48), -(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45), -(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45), -(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45), -(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45), -(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47), -(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45), -(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45), -(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46), -(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46), -(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47), -(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45), -(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45), -(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45), -(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46), -(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)) +#function make_compact_table(N) +# table = Tuple{UInt64,Float64}[] +# lo, hi = 0x1.69555p-1, 0x1.69555p0 +# for i in 0:N-1 +# # I am not fully sure why this is the right formula to use, but it apparently is +# center = i/(2*N) + lo < 1 ? (i+.5)/(2*N) + lo : (i+.5)/N + hi -1 +# invc = Float64(center < 1 ? round(N/center)/N : round(2*N/center)/(N*2)) +# c = inv(big(invc)) +# logc = Float64(round(0x1p43*log(c))/0x1p43) +# logctail = reinterpret(Float64, Float64(log(c) - logc)) +# p1 = (reinterpret(UInt64,invc) >> 45) % UInt8 +# push!(table, (p1|reinterpret(UInt64,logc),logctail)) +# end +# return Tuple(table) +#end +#const t_log_table_compat = make_compact_table(128) +const t_log_table_compat = ( + (0xbfd62c82f2b9c8b5, 5.929407345889625e-15), + (0xbfd5d1bdbf5808b4, -2.544157440035963e-14), + (0xbfd57677174558b3, -3.443525940775045e-14), + (0xbfd51aad872df8b2, -2.500123826022799e-15), + (0xbfd4be5f957778b1, -8.929337133850617e-15), + (0xbfd4618bc21c60b0, 1.7625431312172662e-14), + (0xbfd404308686a8af, 1.5688303180062087e-15), + (0xbfd3a64c556948ae, 2.9655274673691784e-14), + (0xbfd347dd9a9880ad, 3.7923164802093147e-14), + (0xbfd2e8e2bae120ac, 3.993416384387844e-14), + (0xbfd2895a13de88ab, 1.9352855826489123e-14), + (0xbfd2895a13de88ab, 1.9352855826489123e-14), + (0xbfd22941fbcf78aa, -1.9852665484979036e-14), + (0xbfd1c898c16998a9, -2.814323765595281e-14), + (0xbfd1675cababa8a8, 2.7643769993528702e-14), + (0xbfd1058bf9ae48a7, -4.025092402293806e-14), + (0xbfd0a324e27390a6, -1.2621729398885316e-14), + (0xbfd0402594b4d0a5, -3.600176732637335e-15), + (0xbfd0402594b4d0a5, -3.600176732637335e-15), + (0xbfcfb9186d5e40a4, 1.3029797173308663e-14), + (0xbfcef0adcbdc60a3, 4.8230289429940886e-14), + (0xbfce27076e2af0a2, -2.0592242769647135e-14), + (0xbfcd5c216b4fc0a1, 3.149265065191484e-14), + (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14), + (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14), + (0xbfcbc286742d909f, 2.2477465222466186e-14), + (0xbfcaf3c94e80c09e, 3.6507188831790577e-16), + (0xbfca23bc1fe2b09d, -3.827767260205414e-14), + (0xbfca23bc1fe2b09d, -3.827767260205414e-14), + (0xbfc9525a9cf4509c, -4.7641388950792196e-14), + (0xbfc87fa06520d09b, 4.9278276214647115e-14), + (0xbfc7ab890210e09a, 4.9485167661250996e-14), + (0xbfc7ab890210e09a, 4.9485167661250996e-14), + (0xbfc6d60fe719d099, -1.5003333854266542e-14), + (0xbfc5ff3070a79098, -2.7194441649495324e-14), + (0xbfc5ff3070a79098, -2.7194441649495324e-14), + (0xbfc526e5e3a1b097, -2.99659267292569e-14), + (0xbfc44d2b6ccb8096, 2.0472357800461955e-14), + (0xbfc44d2b6ccb8096, 2.0472357800461955e-14), + (0xbfc371fc201e9095, 3.879296723063646e-15), + (0xbfc29552f81ff094, -3.6506824353335045e-14), + (0xbfc1b72ad52f6093, -5.4183331379008994e-14), + (0xbfc1b72ad52f6093, -5.4183331379008994e-14), + (0xbfc0d77e7cd09092, 1.1729485484531301e-14), + (0xbfc0d77e7cd09092, 1.1729485484531301e-14), + (0xbfbfec9131dbe091, -3.811763084710266e-14), + (0xbfbe27076e2b0090, 4.654729747598445e-14), + (0xbfbe27076e2b0090, 4.654729747598445e-14), + (0xbfbc5e548f5bc08f, -2.5799991283069902e-14), + (0xbfba926d3a4ae08e, 3.7700471749674615e-14), + (0xbfba926d3a4ae08e, 3.7700471749674615e-14), + (0xbfb8c345d631a08d, 1.7306161136093256e-14), + (0xbfb8c345d631a08d, 1.7306161136093256e-14), + (0xbfb6f0d28ae5608c, -4.012913552726574e-14), + (0xbfb51b073f06208b, 2.7541708360737882e-14), + (0xbfb51b073f06208b, 2.7541708360737882e-14), + (0xbfb341d7961be08a, 5.0396178134370583e-14), + (0xbfb341d7961be08a, 5.0396178134370583e-14), + (0xbfb16536eea38089, 1.8195060030168815e-14), + (0xbfaf0a30c0118088, 5.213620639136504e-14), + (0xbfaf0a30c0118088, 5.213620639136504e-14), + (0xbfab42dd71198087, 2.532168943117445e-14), + (0xbfab42dd71198087, 2.532168943117445e-14), + (0xbfa77458f632c086, -5.148849572685811e-14), + (0xbfa77458f632c086, -5.148849572685811e-14), + (0xbfa39e87b9fec085, 4.6652946995830086e-15), + (0xbfa39e87b9fec085, 4.6652946995830086e-15), + (0xbf9f829b0e780084, -4.529814257790929e-14), + (0xbf9f829b0e780084, -4.529814257790929e-14), + (0xbf97b91b07d58083, -4.361324067851568e-14), + (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15), + (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15), + (0xbf7fe02a6b100081, -2.298941004620351e-14), + (0xbf7fe02a6b100081, -2.298941004620351e-14), + (0x0000000000000080, 0.0), + (0x0000000000000080, 0.0), + (0x3f8010157589007e, -1.4902732911301337e-14), + (0x3f9020565893807c, -3.527980389655325e-14), + (0x3f98492528c9007a, -4.730054772033249e-14), + (0x3fa0415d89e74078, 7.580310369375161e-15), + (0x3fa466aed42e0076, -4.9893776716773285e-14), + (0x3fa894aa149fc074, -2.262629393030674e-14), + (0x3faccb73cdddc072, -2.345674491018699e-14), + (0x3faeea31c006c071, -1.3352588834854848e-14), + (0x3fb1973bd146606f, -3.765296820388875e-14), + (0x3fb3bdf5a7d1e06d, 5.1128335719851986e-14), + (0x3fb5e95a4d97a06b, -5.046674438470119e-14), + (0x3fb700d30aeac06a, 3.1218748807418837e-15), + (0x3fb9335e5d594068, 3.3871241029241416e-14), + (0x3fbb6ac88dad6066, -1.7376727386423858e-14), + (0x3fbc885801bc4065, 3.957125899799804e-14), + (0x3fbec739830a2063, -5.2849453521890294e-14), + (0x3fbfe89139dbe062, -3.767012502308738e-14), + (0x3fc1178e8227e060, 3.1859736349078334e-14), + (0x3fc1aa2b7e23f05f, 5.0900642926060466e-14), + (0x3fc2d1610c86805d, 8.710783796122478e-15), + (0x3fc365fcb015905c, 6.157896229122976e-16), + (0x3fc4913d8333b05a, 3.821577743916796e-14), + (0x3fc527e5e4a1b059, 3.9440046718453496e-14), + (0x3fc6574ebe8c1057, 2.2924522154618074e-14), + (0x3fc6f0128b757056, -3.742530094732263e-14), + (0x3fc7898d85445055, -2.5223102140407338e-14), + (0x3fc8beafeb390053, -1.0320443688698849e-14), + (0x3fc95a5adcf70052, 1.0634128304268335e-14), + (0x3fca93ed3c8ae050, -4.3425422595242564e-14), + (0x3fcb31d8575bd04f, -1.2527395755711364e-14), + (0x3fcbd087383be04e, -5.204008743405884e-14), + (0x3fcc6ffbc6f0104d, -3.979844515951702e-15), + (0x3fcdb13db0d4904b, -4.7955860343296286e-14), + (0x3fce530effe7104a, 5.015686013791602e-16), + (0x3fcef5ade4dd0049, -7.252318953240293e-16), + (0x3fcf991c6cb3b048, 2.4688324156011588e-14), + (0x3fd07138604d5846, 5.465121253624792e-15), + (0x3fd0c42d67616045, 4.102651071698446e-14), + (0x3fd1178e8227e844, -4.996736502345936e-14), + (0x3fd16b5ccbacf843, 4.903580708156347e-14), + (0x3fd1bf99635a6842, 5.089628039500759e-14), + (0x3fd214456d0eb841, 1.1782016386565151e-14), + (0x3fd2bef07cdc903f, 4.727452940514406e-14), + (0x3fd314f1e1d3603e, -4.4204083338755686e-14), + (0x3fd36b6776be103d, 1.548345993498083e-14), + (0x3fd3c2527733303c, 2.1522127491642888e-14), + (0x3fd419b423d5e83b, 1.1054030169005386e-14), + (0x3fd4718dc271c83a, -5.534326352070679e-14), + (0x3fd4c9e09e173039, -5.351646604259541e-14), + (0x3fd522ae0738a038, 5.4612144489920215e-14), + (0x3fd57bf753c8d037, 2.8136969901227338e-14), + (0x3fd5d5bddf596036, -1.156568624616423e-14)) + + @inline function log_tab_unpack(t::UInt64) + invc = UInt64(t&UInt64(0xff)|0x1ff00)<<45 + logc = t&(~UInt64(0xff)) + return (reinterpret(Float64, invc), reinterpret(Float64, logc)) +end # Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision # Since `log` only makes sense for positive exponents, we speed up the implimentation by stealing the sign bit # of the input for an extra bit of the exponent which is used to normalize subnormal inputs. # Does not normalize results. -# Copyright (c) 2018-2020, Arm Limited. -# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception -function _log_ext(xu) - # x = 2^k z; where z is in range [OFF,2*OFF) and exact. +# Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c +# Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed) +# note that this isn't an exact translation as this version compacts the table to reduce cache pressure. +function _log_ext2(xu) + # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact. # The range is split into N subintervals. - # The ith subinterval contains z and c is near its center. - tmp = reinterpret(Int64, xu - 0x3fe6955500000000) + # The ith subinterval contains z and c is near the center of the interval. + tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1 i = (tmp >> 45) & 127 z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000)) k = Float64(tmp >> 52) # log(x) = k*Ln2 + log(c) + log1p(z/c-1). - invc, logc, logctail = t_log_ext_Float64[i+1] - # Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and + t, logctail = t_log_table_compat[i+1] + invc, logc = log_tab_unpack(t) + # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. r = fma(z, invc, -1.0) # k*Ln2 + log(c) + r. - t1 = muladd(k, 0.6931471805598903, logc) + t1 = muladd(k, 0.6931471805598903, logc) #ln(2) hi part t2 = t1 + r - lo1 = muladd(k, 5.497923018708371e-14, logctail) + lo1 = muladd(k, 5.497923018708371e-14, logctail) #ln(2) lo part lo2 = t1 - t2 + r - ar = -0.5 * r ar2, lo3 = two_mul(r, ar) # k*Ln2 + log(c) + r + .5*r*r. From e7008d4ef950ea3016ac8c3d61313c249cc860bb Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Sun, 27 Mar 2022 23:48:19 -0400 Subject: [PATCH 4/5] typo --- base/special/log.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/special/log.jl b/base/special/log.jl index 50e39c17df505..440a32f8da0f0 100644 --- a/base/special/log.jl +++ b/base/special/log.jl @@ -556,7 +556,7 @@ end # Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c # Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed) # note that this isn't an exact translation as this version compacts the table to reduce cache pressure. -function _log_ext2(xu) +function _log_ext(xu) # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact. # The range is split into N subintervals. # The ith subinterval contains z and c is near the center of the interval. From c5b2583afef393d2c940c8f742d6a7dafa33d405 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Tue, 29 Mar 2022 15:52:16 -0400 Subject: [PATCH 5/5] test subnormals --- test/math.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/math.jl b/test/math.jl index 3ae8703c09100..00847ee283257 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1332,6 +1332,13 @@ end @test abs(expected-got) <= 1.3*eps(T(expected)) || (x,y) end end + for _ in 1:2^10 + x=rand(T)*floatmin(T); y=rand(T)*2-1 + got, expected = x^y, widen(x)^y + if isfinite(eps(T(expected))) + @test abs(expected-got) <= 1.3*eps(T(expected)) || (x,y) + end + end # test (-x)^y for y larger than typemax(Int) @test T(-1)^floatmax(T) === T(1) @test prevfloat(T(-1))^floatmax(T) === T(Inf)