From fbe04a6527e6bb146d81a09eeae66ebceb0757b3 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Mon, 12 Aug 2024 20:03:10 -0400 Subject: [PATCH] initial check-in of quad-double skeleton --- include/universal/number/qd/attributes.hpp | 65 + include/universal/number/qd/exceptions.hpp | 64 + include/universal/number/qd/manipulators.hpp | 43 + include/universal/number/qd/math/classify.hpp | 55 + include/universal/number/qd/math/complex.hpp | 12 + .../number/qd/math/error_and_gamma.hpp | 21 + include/universal/number/qd/math/exponent.hpp | 105 ++ .../universal/number/qd/math/fractional.hpp | 21 + .../universal/number/qd/math/hyperbolic.hpp | 41 + include/universal/number/qd/math/hypot.hpp | 15 + .../universal/number/qd/math/logarithm.hpp | 209 +++ include/universal/number/qd/math/minmax.hpp | 19 + include/universal/number/qd/math/next.hpp | 73 + include/universal/number/qd/math/numerics.hpp | 38 + include/universal/number/qd/math/pow.hpp | 69 + include/universal/number/qd/math/sqrt.hpp | 120 ++ .../universal/number/qd/math/trigonometry.hpp | 440 ++++++ include/universal/number/qd/math/truncate.hpp | 23 + include/universal/number/qd/mathlib.hpp | 24 + .../universal/number/qd/numeric_limits.hpp | 82 ++ include/universal/number/qd/qd.hpp | 76 + include/universal/number/qd/qd_fwd.hpp | 24 + include/universal/number/qd/qd_impl.hpp | 1281 +++++++++++++++++ include/universal/traits/qd_traits.hpp | 31 + static/qd/CMakeLists.txt | 15 +- static/qd/api/api.cpp | 312 ++++ static/qd/api/experiments.cpp | 207 +++ 27 files changed, 3477 insertions(+), 8 deletions(-) create mode 100644 include/universal/number/qd/attributes.hpp create mode 100644 include/universal/number/qd/exceptions.hpp create mode 100644 include/universal/number/qd/manipulators.hpp create mode 100644 include/universal/number/qd/math/classify.hpp create mode 100644 include/universal/number/qd/math/complex.hpp create mode 100644 include/universal/number/qd/math/error_and_gamma.hpp create mode 100644 include/universal/number/qd/math/exponent.hpp create mode 100644 include/universal/number/qd/math/fractional.hpp create mode 100644 include/universal/number/qd/math/hyperbolic.hpp create mode 100644 include/universal/number/qd/math/hypot.hpp create mode 100644 include/universal/number/qd/math/logarithm.hpp create mode 100644 include/universal/number/qd/math/minmax.hpp create mode 100644 include/universal/number/qd/math/next.hpp create mode 100644 include/universal/number/qd/math/numerics.hpp create mode 100644 include/universal/number/qd/math/pow.hpp create mode 100644 include/universal/number/qd/math/sqrt.hpp create mode 100644 include/universal/number/qd/math/trigonometry.hpp create mode 100644 include/universal/number/qd/math/truncate.hpp create mode 100644 include/universal/number/qd/mathlib.hpp create mode 100644 include/universal/number/qd/numeric_limits.hpp create mode 100644 include/universal/number/qd/qd.hpp create mode 100644 include/universal/number/qd/qd_fwd.hpp create mode 100644 include/universal/number/qd/qd_impl.hpp create mode 100644 include/universal/traits/qd_traits.hpp create mode 100644 static/qd/api/api.cpp create mode 100644 static/qd/api/experiments.cpp diff --git a/include/universal/number/qd/attributes.hpp b/include/universal/number/qd/attributes.hpp new file mode 100644 index 000000000..7dbd3ca18 --- /dev/null +++ b/include/universal/number/qd/attributes.hpp @@ -0,0 +1,65 @@ +#pragma once +// attributes.hpp: information functions for quad-double floating-point type and value attributes +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// functions to provide details about the properties of a quad-double (qd) configuration + inline bool sign(const qd& a) { + return a.sign(); + } + + inline int scale(const qd& a) { + return a.scale(); + } + + // generate the maxneg through maxpos value range of a quad-double configuration + std::string qd_range() { + qd v; + std::stringstream s; + s << std::setw(80) << type_tag(v) << " : [ " + << v.maxneg() << " ... " + << v.minneg() << " " + << "0 " + << v.minpos() << " ... " + << v.maxpos() << " ]"; + return s.str(); + } + + /* + // report dynamic range of a type, specialized for a quad-double + std::string dynamic_range(const qd& a) { + std::stringstream s; + qd b(SpecificValue::maxneg), c(SpecificValue::minneg), d(SpecificValue::minpos), e(SpecificValue::maxpos); + s << type_tag(a) << ": "; + s << "minpos scale " << std::setw(10) << d.scale() << " "; + s << "maxpos scale " << std::setw(10) << e.scale() << '\n'; + s << "[" << b << " ... " << c << ", -0, +0, " << d << " ... " << e << "]\n"; + s << "[" << to_binary(b) << " ... " << to_binary(c) << ", -0, +0, " << to_binary(d) << " ... " << to_binary(e) << "]\n"; + qd ninf(SpecificValue::infneg), pinf(SpecificValue::infpos); + s << "inclusive range = (" << to_binary(ninf) << ", " << to_binary(pinf) << ")\n"; + s << "inclusive range = (" << ninf << ", " << pinf << ")\n"; + return s.str(); + } + */ + + int minpos_scale(const qd& b) { + qd c(b); + return c.minpos().scale(); + } + + int maxpos_scale(const qd& b) { + qd c(b); + return c.maxpos().scale(); + } + + int max_negative_scale(const qd& b) { + qd c(b); + return c.maxneg().scale(); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/exceptions.hpp b/include/universal/number/qd/exceptions.hpp new file mode 100644 index 000000000..aaa79ca4c --- /dev/null +++ b/include/universal/number/qd/exceptions.hpp @@ -0,0 +1,64 @@ +#pragma once +// exceptions.hpp: definition of arbitrary configuration quad-double exceptions +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +// base class for quad-double arithmetic exceptions +struct qd_arithmetic_exception : public universal_arithmetic_exception { + qd_arithmetic_exception(const std::string& err) : universal_arithmetic_exception(std::string("quad-double arithmetic exception: ") + err) {}; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////// +/// specialized exceptions to aid application level exception handling + +// invalid_argument is thrown when a mathematical function argument is invalid +struct qd_invalid_argument : public qd_arithmetic_exception { + qd_invalid_argument() : qd_arithmetic_exception("invalid argument") {} +}; + +// not_a_number is thrown when a rvar is NaN +struct qd_not_a_number : public qd_arithmetic_exception { + qd_not_a_number() : qd_arithmetic_exception("not a number") {} +}; + +// divide by zero arithmetic exception for reals +struct qd_divide_by_zero : public qd_arithmetic_exception { + qd_divide_by_zero() : qd_arithmetic_exception("divide by zero") {} +}; + +// divide_by_nan is thrown when the denominator in a division operator is NaN +struct qd_divide_by_nan : public qd_arithmetic_exception { + qd_divide_by_nan() : qd_arithmetic_exception("divide by nan") {} +}; + +// operand_is_nan is thrown when an rvar in a binary operator is NaN +struct qd_operand_is_nan : public qd_arithmetic_exception { + qd_operand_is_nan() : qd_arithmetic_exception("operand is nan") {} +}; + +// negative argument to sqrt +struct qd_negative_sqrt_arg : public qd_arithmetic_exception { + qd_negative_sqrt_arg() : qd_arithmetic_exception("negative sqrt argument") {} +}; + +// negative argument to nroot +struct qd_negative_nroot_arg : public qd_arithmetic_exception { + qd_negative_nroot_arg() : qd_arithmetic_exception("negative nroot argument") {} +}; + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/// REAL INTERNAL OPERATION EXCEPTIONS + +struct qd_internal_exception : public universal_internal_exception { + qd_internal_exception(const std::string& err) : universal_internal_exception(std::string("quad-double internal exception: ") + err) {}; +}; + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/manipulators.hpp b/include/universal/number/qd/manipulators.hpp new file mode 100644 index 000000000..ed5af5265 --- /dev/null +++ b/include/universal/number/qd/manipulators.hpp @@ -0,0 +1,43 @@ +// manipulators.hpp: definitions of helper functions for quad-double type manipulation +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +// pull in the color printing for shells utility +#include + +namespace sw { namespace universal { + + // Generate a type tag for a quad-double + inline std::string type_tag(const qd& = {}) { + return std::string("quad-double"); + } + + // generate a binary, color-coded representation of the quad-double + inline std::string color_print(qd const& r, bool nibbleMarker = false) { + //constexpr unsigned es = 11; + //constexpr unsigned fbits = 106; + std::stringstream s; + + /* + Color red(ColorCode::FG_RED); + Color yellow(ColorCode::FG_YELLOW); + Color blue(ColorCode::FG_BLUE); + Color magenta(ColorCode::FG_MAGENTA); + Color cyan(ColorCode::FG_CYAN); + Color white(ColorCode::FG_WHITE); + Color def(ColorCode::FG_DEFAULT); + */ + for (int i = 0; i < 4; ++i) { + s << color_print(r[i], nibbleMarker); + if (i < 3) s << ", "; + } + return s.str(); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/classify.hpp b/include/universal/number/qd/math/classify.hpp new file mode 100644 index 000000000..c7791e962 --- /dev/null +++ b/include/universal/number/qd/math/classify.hpp @@ -0,0 +1,55 @@ +#pragma once +// classify.hpp: classification functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// STD LIB function for IEEE floats: Categorizes floating point value arg into the following categories: zero, subnormal, normal, infinite, NAN, or implementation-defined category. +int fpclassify(const qd& a) { + return (std::fpclassify(a.high())); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is a cfloative or negative infinity. +// specialized for quad-double (qd) +inline bool isinf(const qd& a) { + return (std::fpclassify(a.high()) == FP_INFINITE); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is a not-a-number (NaN) value. +// specialized for quad-double (qd) +inline bool isnan(const qd& a) { + return (std::fpclassify(a.high()) == FP_NAN); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg has finite value i.e. it is normal, subnormal or zero, but not infinite or NaN. +// specialized for quad-double (qd) +inline bool isfinite(const qd& a) { + return (std::fpclassify(a.high()) != FP_INFINITE) && (std::fpclassify(a.high()) != FP_NAN); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is normal, i.e. is neither zero, subnormal, infinite, nor NaN. +// specialized for quad-double (qd) +inline bool isnormal(const qd& a) { + return (std::fpclassify(a.high()) == FP_NORMAL); +} + +// STD LIB function for IEEE floats: Determines if the given floating point number arg is denormal, i.e. is neither zero, normal, infinite, nor NaN. +// specialized for quad-double (qd) +inline bool isdenorm(const qd& a) { + return (std::fpclassify(a.high()) == FP_SUBNORMAL); +} + +inline bool iszero(const qd& a) { + return (std::fpclassify(a.high()) == FP_ZERO); +} + +bool signbit(qd const& a) { + auto signA = std::copysign(1.0, a.high()); + return signA < 0.0; +} + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/complex.hpp b/include/universal/number/qd/math/complex.hpp new file mode 100644 index 000000000..79adf6c46 --- /dev/null +++ b/include/universal/number/qd/math/complex.hpp @@ -0,0 +1,12 @@ +#pragma once +// complex.hpp: complex support for double-double floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/error_and_gamma.hpp b/include/universal/number/qd/math/error_and_gamma.hpp new file mode 100644 index 000000000..0999f6945 --- /dev/null +++ b/include/universal/number/qd/math/error_and_gamma.hpp @@ -0,0 +1,21 @@ +#pragma once +// error_and_gamma.hpp: error/gamma function support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // Compute the error function erf(x) = 2 over sqrt(PI) times Integral from 0 to x of e ^ (-t)^2 dt + qd erf(qd x) { + return qd(std::erf(double(x.high()))); + } + + // Compute the complementary error function: 1 - erf(x) + qd erfc(qd x) { + return qd(std::erfc(double(x.high()))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/exponent.hpp b/include/universal/number/qd/math/exponent.hpp new file mode 100644 index 000000000..087b5f0e9 --- /dev/null +++ b/include/universal/number/qd/math/exponent.hpp @@ -0,0 +1,105 @@ +#pragma once +// exponent.hpp: exponent functions for quad-double (qd) floating-point +// +// algorithms courtesy Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // fwd reference + qd ldexp(qd const&, int); + +static const int n_inv_fact = 15; +static const double inv_fact[n_inv_fact][2] = { + { 1.66666666666666657e-01, 9.25185853854297066e-18}, + { 4.16666666666666644e-02, 2.31296463463574266e-18}, + { 8.33333333333333322e-03, 1.15648231731787138e-19}, + { 1.38888888888888894e-03, -5.30054395437357706e-20}, + { 1.98412698412698413e-04, 1.72095582934207053e-22}, + { 2.48015873015873016e-05, 2.15119478667758816e-23}, + { 2.75573192239858925e-06, -1.85839327404647208e-22}, + { 2.75573192239858883e-07, 2.37677146222502973e-23}, + { 2.50521083854417202e-08, -1.44881407093591197e-24}, + { 2.08767569878681002e-09, -1.20734505911325997e-25}, + { 1.60590438368216133e-10, 1.25852945887520981e-26}, + { 1.14707455977297245e-11, 2.06555127528307454e-28}, + { 7.64716373181981641e-13, 7.03872877733453001e-30}, + { 4.77947733238738525e-14, 4.39920548583408126e-31}, + { 2.81145725434552060e-15, 1.65088427308614326e-31} +}; + +// Base-e exponential function +qd exp(const qd& a) { + /* Strategy: We first reduce the size of x by noting that + + exp(kr + m * log(2)) = 2^m * exp(r)^k + + where m and k are integers. By choosing m appropriately + we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is + evaluated using the familiar Taylor series. Reducing the + argument substantially speeds up the convergence. */ + + const double k = 512.0; + const double inv_k = 1.0 / k; + + if (a.high() <= -709.0) return qd(0.0); + + if (a.high() >= 709.0) return qd(SpecificValue::infpos); + + if (a.iszero()) return qd(1.0); + + if (a.isone()) return qd_e; + + double m = std::floor(a.high() / qd_log2.high() + 0.5); + qd r = mul_pwr2(a - qd_log2 * m, inv_k); + qd s, t, p; + + p = sqr(r); + s = r + mul_pwr2(p, 0.5); + p *= r; + t = p * qd(inv_fact[0][0], inv_fact[0][1]); + int i = 0; + do { + s += t; + p *= r; + ++i; + t = p * qd(inv_fact[i][0], inv_fact[i][1]); + } while (std::abs(double(t)) > inv_k * d_eps && i < 5); + + s += t; + + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s = mul_pwr2(s, 2.0) + sqr(s); + s += 1.0; + + return ldexp(s, static_cast(m)); +} + +// Base-2 exponential function +qd exp2(qd x) { + return qd(std::exp2(double(x))); +} + +// Base-10 exponential function +qd exp10(qd x) { + return qd(std::exp10(double(x))); +} + +// Base-e exponential function exp(x)-1 +qd expm1(qd x) { + return qd(std::expm1(double(x))); +} + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/fractional.hpp b/include/universal/number/qd/math/fractional.hpp new file mode 100644 index 000000000..d1ef7f9da --- /dev/null +++ b/include/universal/number/qd/math/fractional.hpp @@ -0,0 +1,21 @@ +#pragma once +// fractional.hpp: fractional support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // fmod retuns x - n*y where n = x/y with the fractional part truncated + qd fmod(qd x, qd y) { + return qd(std::fmod(double(x), double(y))); + } + + // shim to stdlib + qd remainder(qd x, qd y) { + return qd(std::remainder(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/hyperbolic.hpp b/include/universal/number/qd/math/hyperbolic.hpp new file mode 100644 index 000000000..33f7b0a49 --- /dev/null +++ b/include/universal/number/qd/math/hyperbolic.hpp @@ -0,0 +1,41 @@ +#pragma once +// hyperbolic.hpp: hyperbolic function support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // hyperbolic sine of an angle of x radians + qd sinh(qd x) { + return qd(std::sinh(double(x))); + } + + // hyperbolic cosine of an angle of x radians + qd cosh(qd x) { + return qd(std::cosh(double(x))); + } + + // hyperbolic tangent of an angle of x radians + qd tanh(qd x) { + return qd(std::tanh(double(x))); + } + + // hyperbolic cotangent of an angle of x radians + qd atanh(qd x) { + return qd(std::atanh(double(x))); + } + + // hyperbolic cosecant of an angle of x radians + qd acosh(qd x) { + return qd(std::acosh(double(x))); + } + + // hyperbolic secant of an angle of x radians + qd asinh(qd x) { + return qd(std::asinh(double(x))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/hypot.hpp b/include/universal/number/qd/math/hypot.hpp new file mode 100644 index 000000000..04e3f8e3e --- /dev/null +++ b/include/universal/number/qd/math/hypot.hpp @@ -0,0 +1,15 @@ +#pragma once +// hypot.hpp: hypot support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + qd hypot(qd x, qd y) { + return qd(std::hypot(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/logarithm.hpp b/include/universal/number/qd/math/logarithm.hpp new file mode 100644 index 000000000..4f3a57f5b --- /dev/null +++ b/include/universal/number/qd/math/logarithm.hpp @@ -0,0 +1,209 @@ +#pragma once +// logarithm.hpp: logarithm functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + + +// assumes 0.0 < a < inf +qd _log(qd const& a) { +#ifdef LATER + int k; + qd f = std::frexp(a, &k); // 0.5 <= |f| < 1.0 + if (f < _inv_sqrt2) { + f = std::ldexp(f, 1); + --k; + } + + // sqrt( 0.5 ) <= f < sqrt( 2.0 ) + // -0.1715... <= s < 0.1715... + // + double res[3]; + res[0] = two_sum(f.high(), 1.0, res[1]); + res[1] = two_sum(f.high(), res[1], res[2]); + qd f_plus = res[0] == 0.0 ? qd(res[1], res[2]) : qd(res[0], res[1]); + res[0] = two_sum(f.high(), -1.0, res[1]); + res[1] = two_sum(f.low(), res[1], res[2]); + qd f_minus = res[0] == 0.0 ? qd_real(res[1], res[2]) : qd(res[0], res[1]); + + qd s = f_minus / f_plus; + + // calculate log( f ) = log( 1 + s ) - log( 1 - s ) + // + // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... + // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... + // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... + // + qd s2 = s * s; + + // TODO - economize the power series using Chebyshev polynomials + // + qd x = inv_int[41]; + for (int i = 41; i > 1; i -= 2) { + x = std::Fma(x, s2, inv_int[i - 2]); + } + x *= std::ldexp(s, 1); // x *= 2*s + + return std::Fma(k, _ln2, x); +#endif + return qd(std::log(a.high()), 0.0); +} + +// assumes -1.0 < a < 2.0 +// +qd _log1p(qd const& a) { +#ifdef LATER + static const qd a_max = _sqrt2 - 1.0; + static const qd a_min = _inv_sqrt2 - 1.0; + + int ilog = std::ilogb(a) + 1; // 0.5 <= frac < 1.0 + + if (ilog < -std::numeric_limits::digits / 2) // |a| <= 2^-54 - error O( 2^-108) + return a; + if (ilog < -std::numeric_limits::digits / 3) // |a| <= 2^-36 - error O( 2^-108) + return a * std::Fma(a, -0.5, 1.0); + if (ilog < -std::numeric_limits::digits / 4) // |a| <= 2^-27 - error O( 2^-108) + return a * std::Fma(a, -std::Fma(a, -_third, 0.5), 1.0); + + qd f_minus = a; + int k = 0; + + if ((a > a_max) || (a < a_min)) + { + double res[3]; + res[0] = two_sum(a.high(), 1.0, res[1]); + res[1] = two_sum(a.low(), res[1], res[2]); + qd f_p1 = res[0] == 0.0 ? qd(res[1], res[2]) : qd_real(res[0], res[1]); + + f_p1 = std::frexp(f_p1, &k); // 0.5 <= |f_p1| < 1.0; k <= 2 + if (f_p1 < _inv_sqrt2) { + --k; + std::ldexp(f_p1, 1); + } + + // at this point, we have 2^k * ( 1.0 + f ) = 1.0 + a + // sqrt( 0.5 ) <= 1.0 + f <= sqrt( 2.0 ) + // + // f = 2^-k * a - ( 1.0 - 2^-k ) + double df[2]; + df[0] = two_sum(1.0, -std::ldexp(1.0, -k), df[1]); + f_minus = std::ldexp(a, -k) - qd_real(df[0], df[1]); + } + + qd f_plus = f_minus + 2.0; + qd s = f_minus / f_plus; + + // calculate log( f ) = log( 1 + s ) - log( 1 - s ) + // + // log( 1+s ) = s - s^2/2 + s^3/3 - s^4/4 ... + // log( 1-s ) = -s + s^2/2 - s^3/3 - s^4/4 ... + // log( f ) = 2*s + 2s^3/3 + 2s^5/5 + ... + // + qd s2 = s * s; + + // TODO - economize the power series using Chebyshev polynomials + // + qd x = inv_int[41]; + for (int i = 41; i > 1; i -= 2) { + x = std::Fma(x, s2, inv_int[i - 2]); + } + x *= std::ldexp(s, 1); // x *= 2*s + + return std::Fma(k, _ln2, x); +#endif + return qd(std::log1p(a.high()), 0.0); +} + +// Natural logarithm of x +qd log(qd const& a) { + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< qd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log: non-positive argument"; + errno = EDOM; + return std::numeric_limits< qd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + return _log(a); +} + +// Binary logarithm of x +qd log2(qd const& a) +{ + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< qd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log2: non-positive argument"; + errno = EDOM; + return std::numeric_limits< qd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + qd _lge{}; + return _lge * _log(a); +} + +// Decimal logarithm of x +qd log10(qd const& a) { + if (a.isnan()) return a; + + if (a.iszero()) return -std::numeric_limits< qd >::infinity(); + + if (a.isone()) return 0.0; + + if (a.sign()) { + std::cerr << "log10: non-positive argument"; + errno = EDOM; + return std::numeric_limits< qd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + qd _loge{}; + return _loge * _log(a); +} + +// Natural logarithm of 1+x +qd log1p(qd const& a) +{ + if (a.isnan()) return a; + + if (a.iszero()) return 0.0; + + if (a == -1.0) return -std::numeric_limits< qd >::infinity(); + + if (a < -1.0) { + std::cerr << "log1p: non-positive argument"; + errno = EDOM; + return std::numeric_limits< qd >::quiet_NaN(); + } + + if (a.isinf()) return a; + + + if ((a >= 2.0) || (a <= -0.5)) // a >= 2.0 - no loss of significant bits - use log() + return _log(1.0 + a); + + // at this point, -1.0 < a < 2.0 + // + return _log1p(a); +} + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/minmax.hpp b/include/universal/number/qd/math/minmax.hpp new file mode 100644 index 000000000..bccbfdcaf --- /dev/null +++ b/include/universal/number/qd/math/minmax.hpp @@ -0,0 +1,19 @@ +#pragma once +// minmax.hpp: minmax support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + qd min(qd x, qd y) { + return qd(std::min(double(x), double(y))); + } + + qd max(qd x, qd y) { + return qd(std::max(double(x), double(y))); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/next.hpp b/include/universal/number/qd/math/next.hpp new file mode 100644 index 000000000..e995082e3 --- /dev/null +++ b/include/universal/number/qd/math/next.hpp @@ -0,0 +1,73 @@ +#pragma once +// next.hpp: nextafter/nexttoward functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +/* +Parameters + x Base value. + t Value toward which the return value is approximated. +If both parameters compare equal, the function returns t. + +Return Value + The next representable value after x in the direction of t. + + If x is the largest finite value representable in the type, + and the result is infinite or not representable, an overflow range error occurs. + + If an overflow range error occurs: + - And math_errhandling has MATH_ERRNO set: the global variable errno is set to ERANGE. + - And math_errhandling has MATH_ERREXCEPT set: FE_OVERFLOW is raised. + */ +qd nextafter(qd x, qd target) { + if (x == target) return target; + if (target.isnan()) { + if (x.isneg()) { + --x; + } + else { + ++x; + } + } + else { + if (x > target) { + --x; + } + else { + ++x; + } + } + return x; +} + +/* TODO +qd nexttoward(qd x, qd target) { + double _x(x); + if (_x == target) return x; + if (target.isnan()) { + if (_x.isneg()) { + --_x; + } + else { + ++_x; + } + } + else { + if (_x > target) { + --_x; + } + else { + ++_x; + } + } + x = _x; + return x; +} +*/ + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/numerics.hpp b/include/universal/number/qd/math/numerics.hpp new file mode 100644 index 000000000..d63d47f4f --- /dev/null +++ b/include/universal/number/qd/math/numerics.hpp @@ -0,0 +1,38 @@ +#pragma once +// numerics.hpp: numerics functions for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // clang implementation is calling these functions so we need implementations for doubledouble (qd) + + + // copysign returns a value with the magnitude of a, and the sign of b + qd copysign(qd const& a, qd const& b) { + auto signA = std::copysign(1.0, a.high()); + auto signB = std::copysign(1.0, b.high()); + + return signA != signB ? -a : a; + } + + // decompose doubledouble into a fraction and an exponent + qd frexp(qd const& a, int* pexp) { + double hi = std::frexp(a.high(), pexp); + double lo = std::ldexp(a.low(), -*pexp); + return qd(hi, lo); + } + + // recompose doubledouble from a fraction and an exponent + qd ldexp(qd const& a, int exp) { + static_assert(std::numeric_limits< qd >::radix == 2, "CONFIGURATION: qd radix must be 2!"); + static_assert(std::numeric_limits< double >::radix == 2, "CONFIGURATION: double radix must be 2!"); + + return qd(std::ldexp(a.high(), exp), std::ldexp(a.low(), exp)); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/pow.hpp b/include/universal/number/qd/math/pow.hpp new file mode 100644 index 000000000..b20fc5ab7 --- /dev/null +++ b/include/universal/number/qd/math/pow.hpp @@ -0,0 +1,69 @@ +#pragma once +// pow.hpp: pow functions for quad-double (qd) floating-point +// +// algorithms courtesy Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // fwd reference + qd exp(const qd&); + + // power function + qd pow(const qd& a, const qd& b) { + return exp(b * log(a)); + } + + // power function of a qd to double + qd pow(qd x, double y) { + return pow(x, qd(y)); + } + + // Computes the n-th power of a double-double number. + // NOTE: 0^0 causes an error. + qd npwr(const qd& a, int n) { + if (n == 0) { +#if DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) throw qd_invalid_argument(); +#else // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) { + std::cerr << "(npwr): Invalid argument\n"; + return qd(SpecificValue::snan); + } +#endif // ! DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION + return 1.0; + } + + qd r = a; + qd s = 1.0; + int N = std::abs(n); + + if (N > 1) { + // Use binary exponentiation + while (N > 0) { + if (N % 2 == 1) { + s *= r; + } + N /= 2; + if (N > 0) r = sqr(r); + } + } else { + s = r; + } + + // if n is negative then compute the reciprocal + if (n < 0) return (1.0 / s); + return s; + } + + qd pow(const qd& a, int n) { + return npwr(a, n); + } + + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/sqrt.hpp b/include/universal/number/qd/math/sqrt.hpp new file mode 100644 index 000000000..14e6ee6ba --- /dev/null +++ b/include/universal/number/qd/math/sqrt.hpp @@ -0,0 +1,120 @@ +#pragma once +// sqrt.hpp: sqrt functions for quad-double (qd) floats +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +#ifndef QUADDOUBLE_NATIVE_SQRT +#define QUADDOUBLE_NATIVE_SQRT 1 +#endif + +namespace sw { namespace universal { + +#if QUADDOUBLE_NATIVE_SQRT + + // Computes the square root of the quad-double number qd. + // NOTE: qd must be a non-negative number + inline qd sqrt(qd a) { + /* Strategy: Use Karp's trick: if x is an approximation + to sqrt(a), then + + sqrt(a) = a*x + [a - (a*x)^2] * x / 2 (approx) + + The approximation is accurate to twice the accuracy of x. + Also, the multiplication (a*x) and [-]*x can be done with + only half the precision. + */ + + if (a.iszero()) return qd{}; + +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) throw qd_negative_sqrt_arg(); +#else + if (a.isneg()) std::cerr << "quad-double argument to sqrt is negative: " << a << std::endl; +#endif + + double x = 1.0 / std::sqrt(a.high()); + double ax = a.high() * x; + return aqd(ax, (a - sqr(qd(ax))).high() * (x * 0.5)); + } + +#else + + // sqrt shim for quad-double + inline qd sqrt(qd a) { +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) throw qd_negative_sqrt_arg(); +#else // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.isneg()) std::cerr << "quad-double argument to sqrt is negative: " << a << std::endl; +#endif // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (a.iszero()) return a; + return qd(std::sqrt(double(a))); + } + +#endif // ! QUADDOUBLE_NATIVE_SQRT + + // Computes the square root of a double in quad-double precision. + qd sqrt(double d) { + return sw::universal::sqrt(qd(d)); + } + + // reciprocal sqrt + inline qd rsqrt(qd a) { + qd v = sw::universal::sqrt(a); + return reciprocal(v); + } + + + /* Computes the n-th root of the quad-double number a. + NOTE: n must be a positive integer. + NOTE: If n is even, then a must not be negative. */ + qd nroot(const qd& a, int n) { + /* Strategy: Use Newton iteration for the function + + f(x) = x^(-n) - a + + to find its root a^{-1/n}. The iteration is thus + + x' = x + x * (1 - a * x^n) / n + + which converges quadratically. We can then find + a^{1/n} by taking the reciprocal. + */ + +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (n <= 0) throw qd_negative_nroot_arg(); + + if (n % 2 == 0 && a.isneg()) throw qd_negative_nroot_arg(); + +#else // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + if (n <= 0) { + std::cerr << "quad-double nroot argument is negative: " << n << std::endl; + } + + if (n % 2 == 0 && a.isneg()) { + std::cerr << "quad-double nroot argument is negative: " << n << std::endl; + return qd(SpecificValue::snan); + } + +#endif // ! QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION + + if (n == 1) return a; + if (n == 2) return sqrt(a); + + if (a.iszero()) return qd(0.0); + + // Note a^{-1/n} = exp(-log(a)/n) + qd r = abs(a); + qd x = std::exp(-std::log(r.high()) / n); + + // Perform Newton's iteration. + x += x * (1.0 - r * npwr(x, n)) / static_cast(n); + if (a.high() < 0.0) x = -x; + + return 1.0/x; + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/trigonometry.hpp b/include/universal/number/qd/math/trigonometry.hpp new file mode 100644 index 000000000..019298bb8 --- /dev/null +++ b/include/universal/number/qd/math/trigonometry.hpp @@ -0,0 +1,440 @@ +#pragma once +// trigonometry.hpp: trigonometry function support for quad-double (qd) floating-point +// +// algorithms and constants courtesy of Scibuilders, Jack Poulson +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // pi/16 + static constexpr qd _pi16(1.963495408493620697e-01, 7.654042494670957545e-18); + + // Table of sin(k * pi/16) and cos(k * pi/16). + static const double sin_table[4][2] = { + {1.950903220161282758e-01, -7.991079068461731263e-18}, + {3.826834323650897818e-01, -1.005077269646158761e-17}, + {5.555702330196021776e-01, 4.709410940561676821e-17}, + {7.071067811865475727e-01, -4.833646656726456726e-17} + }; + + static const double cos_table[4][2] = { + {9.807852804032304306e-01, 1.854693999782500573e-17}, + {9.238795325112867385e-01, 1.764504708433667706e-17}, + {8.314696123025452357e-01, 1.407385698472802389e-18}, + {7.071067811865475727e-01, -4.833646656726456726e-17} + }; + + /* Computes sin(a) using Taylor series. + Assumes |a| <= pi/32. */ + inline qd sin_taylor(const qd& a) { + const double thresh = 0.5 * std::abs(double(a)) * d_eps; + + if (a.iszero()) return 0.0; + + qd x = -sqr(a); + qd s{ a }; + qd r{ a }; + qd t{}; + int i = 0; + do { + r *= x; + t = r * qd(inv_fact[i][0], inv_fact[i][1]); + s += t; + i += 2; + } while (i < n_inv_fact && std::abs(double(t)) > thresh); + + return s; + } + + inline qd cos_taylor(const qd& a) { + const double thresh = 0.5 * d_eps; + + if (a.iszero()) { + return 1.0; + } + + qd x = -sqr(a); + qd r{ x }; + qd s = 1.0 + mul_pwr2(r, 0.5); + qd t{}; + int i = 1; + do { + r *= x; + t = r * qd(inv_fact[i][0], inv_fact[i][1]); + s += t; + i += 2; + } while (i < n_inv_fact && std::abs(double(t)) > thresh); + + return s; + } + + inline void sincos_taylor(const qd& a, qd& sin_a, qd& cos_a) { + if (a.iszero()) { + sin_a = 0.0; + cos_a = 1.0; + return; + } + + sin_a = sin_taylor(a); + cos_a = sqrt(1.0 - sqr(sin_a)); + } + + + inline qd sin(const qd& a) { + + /* Strategy. To compute sin(x), we choose integers a, b so that + + x = s + a * (pi/2) + b * (pi/16) + + and |s| <= pi/32. Using the fact that + + sin(pi/16) = 0.5 * sqrt(2 - sqrt(2 + sqrt(2))) + + we can compute sin(x) from sin(s), cos(s). This greatly + increases the convergence of the sine Taylor series. */ + + if (a.iszero()) return 0.0; + + // approximately reduce modulo 2*pi + qd z = nint(a / qd_2pi); + qd r = a - qd_2pi * z; + + // approximately reduce modulo pi/2 and then modulo pi/16. + qd t; + double q = std::floor(r.high() / qd_pi2.high() + 0.5); + t = r - qd_pi2 * q; + int j = static_cast(q); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (j < -2 || j > 2) { + std::cerr << "sin: Cannot reduce modulo pi/2\n"; + return qd(SpecificValue::snan); + } + + if (abs_k > 4) { + std::cerr << "(qd::sin): Cannot reduce modulo pi/16\n"; + return qd(SpecificValue::snan); + } + + if (k == 0) { + switch (j) { + case 0: + return sin_taylor(t); + case 1: + return cos_taylor(t); + case -1: + return -cos_taylor(t); + default: + return -sin_taylor(t); + } + } + + qd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + qd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + qd sin_t, cos_t; + sincos_taylor(t, sin_t, cos_t); + if (j == 0) { + if (k > 0) { + r = u * sin_t + v * cos_t; + } + else { + r = u * sin_t - v * cos_t; + } + } + else if (j == 1) { + if (k > 0) { + r = u * cos_t - v * sin_t; + } + else { + r = u * cos_t + v * sin_t; + } + } + else if (j == -1) { + if (k > 0) { + r = v * sin_t - u * cos_t; + } + else if (k < 0) { + r = -u * cos_t - v * sin_t; + } + } + else { + if (k > 0) { + r = -u * sin_t - v * cos_t; + } + else { + r = v * cos_t - u * sin_t; + } + } + + return r; + } + + inline qd cos(const qd& a) { + + if (a.iszero()) return 1.0; + + // approximately reduce modulo 2*pi + qd z = nint(a / qd_2pi); + qd r = a - z * qd_2pi; + + // approximately reduce modulo pi/2 and then modulo pi/16 + qd t; + double q = std::floor(r.high() / qd_pi2.high() + 0.5); + t = r - qd_pi2 * q; + int j = static_cast(q); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (j < -2 || j > 2) { + std::cerr << "cos: Cannot reduce modulo pi/2\n"; + return qd(SpecificValue::snan); + } + + if (abs_k > 4) { + std::cerr << "cos: Cannot reduce modulo pi / 16\n"; + return qd(SpecificValue::snan); + } + + if (k == 0) { + switch (j) { + case 0: + return cos_taylor(t); + case 1: + return -sin_taylor(t); + case -1: + return sin_taylor(t); + default: + return -cos_taylor(t); + } + } + + qd sin_t, cos_t; + sincos_taylor(t, sin_t, cos_t); + qd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + qd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + + if (j == 0) { + if (k > 0) { + r = u * cos_t - v * sin_t; + } + else { + r = u * cos_t + v * sin_t; + } + } + else if (j == 1) { + if (k > 0) { + r = -u * sin_t - v * cos_t; + } + else { + r = v * cos_t - u * sin_t; + } + } + else if (j == -1) { + if (k > 0) { + r = u * sin_t + v * cos_t; + } + else { + r = u * sin_t - v * cos_t; + } + } + else { + if (k > 0) { + r = v * sin_t - u * cos_t; + } + else { + r = -u * cos_t - v * sin_t; + } + } + + return r; + } + + inline void sincos(const qd& a, qd& sin_a, qd& cos_a) { + + if (a.iszero()) { + sin_a = 0.0; + cos_a = 1.0; + return; + } + + // approximately reduce modulo 2*pi + qd z = nint(a / qd_2pi); + qd r = a - qd_2pi * z; + + // approximately reduce module pi/2 and pi/16 + qd t; + double q = std::floor(r.high() / qd_pi2.high() + 0.5); + t = r - qd_pi2 * q; + int j = static_cast(q); + int abs_j = std::abs(j); + q = std::floor(t.high() / _pi16.high() + 0.5); + t -= _pi16 * q; + int k = static_cast(q); + int abs_k = std::abs(k); + + if (abs_j > 2) { + std::cerr << "sincos: Cannot reduce modulo pi/2\n"; + cos_a = sin_a = qd(SpecificValue::snan); + return; + } + + if (abs_k > 4) { + std::cerr << "sincos: Cannot reduce modulo pi/16\n"; + cos_a = sin_a = qd(SpecificValue::snan); + return; + } + + qd sin_t, cos_t; + qd s, c; + + sincos_taylor(t, sin_t, cos_t); + + if (abs_k == 0) { + s = sin_t; + c = cos_t; + } + else { + qd u(cos_table[abs_k - 1][0], cos_table[abs_k - 1][1]); + qd v(sin_table[abs_k - 1][0], sin_table[abs_k - 1][1]); + + if (k > 0) { + s = u * sin_t + v * cos_t; + c = u * cos_t - v * sin_t; + } + else { + s = u * sin_t - v * cos_t; + c = u * cos_t + v * sin_t; + } + } + + if (abs_j == 0) { + sin_a = s; + cos_a = c; + } + else if (j == 1) { + sin_a = c; + cos_a = -s; + } + else if (j == -1) { + sin_a = -c; + cos_a = s; + } + else { + sin_a = -s; + cos_a = -c; + } + + } + + inline qd atan2(const qd& y, const qd& x) { + /* Strategy: Instead of using Taylor series to compute + arctan, we instead use Newton's iteration to solve + the equation + + sin(z) = y/r or cos(z) = x/r + + where r = sqrt(x^2 + y^2). + The iteration is given by + + z' = z + (y - sin(z)) / cos(z) (for equation 1) + z' = z - (x - cos(z)) / sin(z) (for equation 2) + + Here, x and y are normalized so that x^2 + y^2 = 1. + If |x| > |y|, then first iteration is used since the + denominator is larger. Otherwise, the second is used. + */ + + if (x.iszero()) { + + if (y.iszero()) { + /* Both x and y is zero. */ + std::cerr << "atan2: Both arguments zero\n"; + return qd(SpecificValue::snan); + } + + return (y.ispos()) ? qd_pi2 : -qd_pi2; + } + else if (y.iszero()) { + return (x.ispos()) ? qd(0.0) : qd_pi; + } + + if (x == y) { + return (y.ispos()) ? qd_pi4 : -qd_3pi4; + } + + if (x == -y) { + return (y.ispos()) ? qd_3pi4 : -qd_pi4; + } + + qd r = sqrt(sqr(x) + sqr(y)); + qd xx = x / r; + qd yy = y / r; + + // Compute double precision approximation to atan. + qd z = std::atan2(double(y), double(x)); + qd sin_z, cos_z; + + if (std::abs(xx.high()) > std::abs(yy.high())) { + // Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) + sincos(z, sin_z, cos_z); + z += (yy - sin_z) / cos_z; + } + else { + // Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) + sincos(z, sin_z, cos_z); + z -= (xx - cos_z) / sin_z; + } + + return z; + } + + inline qd atan(const qd& a) { + return atan2(a, qd(1.0)); + } + + inline qd tan(const qd& a) { + qd s, c; + sincos(a, s, c); + return s / c; + } + + inline qd asin(const qd& a) { + qd abs_a = abs(a); + + if (abs_a > 1.0) { + std::cerr << "asin: Argument out of domain\n"; + return qd(SpecificValue::snan); + } + + if (abs_a.isone()) { + return (a.ispos()) ? qd_pi2 : -qd_pi2; + } + + return atan2(a, sqrt(1.0 - sqr(a))); + } + + inline qd acos(const qd& a) { + qd abs_a = abs(a); + + if (abs_a > 1.0) { + std::cerr << "acos: Argument out of domain\n"; + return qd(SpecificValue::snan); + } + + if (abs_a.isone()) { + return (a.ispos()) ? qd(0.0) : qd_pi; + } + + return atan2(sqrt(1.0 - sqr(a)), a); + } + +}} // namespace sw::universal diff --git a/include/universal/number/qd/math/truncate.hpp b/include/universal/number/qd/math/truncate.hpp new file mode 100644 index 000000000..bcbcb3236 --- /dev/null +++ b/include/universal/number/qd/math/truncate.hpp @@ -0,0 +1,23 @@ +#pragma once +// truncate.hpp: truncate support for quad-double (qd) floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + // Truncate value by rounding toward zero, returning the nearest integral value that is not larger in magnitude than x + qd trunc(qd x) { + return qd(std::trunc(double(x))); + } + + // Round to nearest: returns the integral value that is nearest to x, with halfway cases rounded away from zero + qd round(qd x) { + return qd(std::round(double(x))); + } + + // floor and ceil are being used in the class definition and are defined in that file + +}} // namespace sw::universal diff --git a/include/universal/number/qd/mathlib.hpp b/include/universal/number/qd/mathlib.hpp new file mode 100644 index 000000000..ad52efcb8 --- /dev/null +++ b/include/universal/number/qd/mathlib.hpp @@ -0,0 +1,24 @@ +#pragma once +// mathlib.hpp: definition of mathematical functions for the double-double floating-point +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/include/universal/number/qd/numeric_limits.hpp b/include/universal/number/qd/numeric_limits.hpp new file mode 100644 index 000000000..53bba8c2d --- /dev/null +++ b/include/universal/number/qd/numeric_limits.hpp @@ -0,0 +1,82 @@ +#pragma once +// numeric_limits.hpp: definition of numeric_limits for quad-double types +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +namespace std { + +template<> +class numeric_limits< sw::universal::qd > { +public: + using QuadDouble = sw::universal::qd; + static constexpr bool is_specialized = true; + static constexpr QuadDouble min() { // return minimum value + // return QuadDouble(sw::universal::SpecificValue::minpos); + return QuadDouble(radix * (numeric_limits< double >::min() / numeric_limits< double >::epsilon())); + } + static constexpr QuadDouble max() { // return maximum value + //return QuadDouble(sw::universal::SpecificValue::maxpos); + return QuadDouble(numeric_limits< double >::max()); + } + static constexpr QuadDouble lowest() { // return most negative value + //return QuadDouble(sw::universal::SpecificValue::maxneg); + return (-(max)()); + } + static constexpr QuadDouble epsilon() { // return smallest effective increment from 1.0 + return numeric_limits< double >::epsilon() * numeric_limits< double >::epsilon() / radix; + } + static constexpr QuadDouble round_error() { // return largest rounding error + return QuadDouble(1.0 / radix); + } + static constexpr QuadDouble denorm_min() { // return minimum denormalized value + // return QuadDouble(sw::universal::SpecificValue::minpos); + return 0.0; + } + static constexpr QuadDouble infinity() { // return positive infinity + return QuadDouble(sw::universal::SpecificValue::infpos); + //return numeric_limits< double >::infinity(); + } + static constexpr QuadDouble quiet_NaN() { // return non-signaling NaN + return QuadDouble(sw::universal::SpecificValue::qnan); + //return numeric_limits< double >::quiet_NaN(); + } + static constexpr QuadDouble signaling_NaN() { // return signaling NaN + return QuadDouble(sw::universal::SpecificValue::snan); + //return numeric_limits< double >::signaling_NaN(); + } + + static constexpr int digits = 2 * std::numeric_limits::digits; + static constexpr int digits10 = static_cast(digits * 0.30103); + static constexpr int max_digits10 = digits10; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr int radix = 2; + + // C++ specification: min_exponent is one more than the smallest negative power + // of the radix that is a valid normalized number + static constexpr int min_exponent = QuadDouble::MIN_EXP_NORMAL + 1; + static constexpr int min_exponent10 = static_cast(min_exponent * 0.30103); + // C++ specification: max_exponent is one more than the largest integer power + // of the radix that is a valid finite floating-point number + static constexpr int max_exponent = QuadDouble::MAX_EXP; + static constexpr int max_exponent10 = static_cast(max_exponent * 0.30103); + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr float_denorm_style has_denorm = denorm_absent; + static constexpr bool has_denorm_loss = false; + + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = false; + static constexpr bool is_modulo = false; + static constexpr bool traps = false; + static constexpr bool tinyness_before = false; + static constexpr float_round_style round_style = round_toward_zero; +}; + +} diff --git a/include/universal/number/qd/qd.hpp b/include/universal/number/qd/qd.hpp new file mode 100644 index 000000000..a9694ee01 --- /dev/null +++ b/include/universal/number/qd/qd.hpp @@ -0,0 +1,76 @@ +// quad-double floating-point arithmetic standard header +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#ifndef _QUADDOUBLE_STANDARD_HEADER_ +#define _QUADDOUBLE_STANDARD_HEADER_ + +//////////////////////////////////////////////////////////////////////////////////////// +/// COMPILATION DIRECTIVES TO DIFFERENT COMPILERS +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// required std libraries +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// BEHAVIORAL COMPILATION SWITCHES + +//////////////////////////////////////////////////////////////////////////////////////// +// enable/disable the ability to use literals in binary logic and arithmetic operators +#if !defined(QUADDOUBLE_ENABLE_LITERALS) +// default is to enable them +#define QUADDOUBLE_ENABLE_LITERALS 1 +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable throwing specific exceptions for arithmetic errors +// left to application to enable +#if !defined(QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION) +// default is to use std::cerr for signalling an error +#define QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#define QUADDOUBLE_EXCEPT noexcept +#else +#if QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION +#define QUADDOUBLE_EXCEPT +#else +#define QUADDOUBLE_EXCEPT noexcept +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable native sqrt implementation +// +#if !defined(QUADDOUBLE_NATIVE_SQRT) +#define QUADDOUBLE_NATIVE_SQRT 1 +#endif + +/////////////////////////////////////////////////////////////////////////////////////// +// bring in the trait functions +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// INCLUDE FILES that make up the library +#include +#include +#include +#include +#include + +// useful functions to work with doubledoubles +#include +#include + +/////////////////////////////////////////////////////////////////////////////////////// +/// elementary math functions library +// #include + +#endif diff --git a/include/universal/number/qd/qd_fwd.hpp b/include/universal/number/qd/qd_fwd.hpp new file mode 100644 index 000000000..f96ecf840 --- /dev/null +++ b/include/universal/number/qd/qd_fwd.hpp @@ -0,0 +1,24 @@ +#pragma once +// qd_fwd.hpp : forward declarations for the quad-double floating-point environment +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + + // forward references + class qd; + + bool parse(const std::string& number, qd& v); + + qd abs(qd const&); + qd sqrt(qd); + qd fabs(qd); + + qd fma(qd const&, qd const&, qd const&); + +}} // namespace sw::universal + diff --git a/include/universal/number/qd/qd_impl.hpp b/include/universal/number/qd/qd_impl.hpp new file mode 100644 index 000000000..10a120a72 --- /dev/null +++ b/include/universal/number/qd/qd_impl.hpp @@ -0,0 +1,1281 @@ +#pragma once +// qd_impl.hpp: implementation of the double-double floating-point number system described in +// +// Sherry Li, David Bailey, LBNL, "Library for Double-Double and Quad-Double Arithmetic", 2008 +// https://www.researchgate.net/publication/228570156_Library_for_Double-Double_and_Quad-Double_Arithmetic +// +// Adapted core subroutines from QD library by Yozo Hida +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include +#include +#include + +// supporting types and functions +#include +#include +#include +#include +// qd exception structure +#include +#include + +namespace sw { namespace universal { + +// fwd references to free functions +qd operator*(const qd&, const qd&); +qd operator/(const qd&, const qd&); +qd pown(qd const&, int); + +// qd is an unevaluated quadruple of IEEE-754 doubles that provides a (1,11,212) floating-point triple +class qd { +public: + static constexpr unsigned nbits = 256; + static constexpr unsigned es = 11; + static constexpr unsigned fbits = 212; // number of fraction digits + // exponent characteristics are the same as native double precision floating-point + static constexpr int EXP_BIAS = ((1 << (es - 1u)) - 1l); + static constexpr int MAX_EXP = (es == 1) ? 1 : ((1 << es) - EXP_BIAS - 1); + static constexpr int MIN_EXP_NORMAL = 1 - EXP_BIAS; + static constexpr int MIN_EXP_SUBNORMAL = 1 - EXP_BIAS - int(fbits); // the scale of smallest ULP + + /// trivial constructor + qd() = default; + + qd(const qd&) = default; + qd(qd&&) = default; + + qd& operator=(const qd&) = default; + qd& operator=(qd&&) = default; + + // converting constructors + qd(const std::string& stringRep) : x{0} { assign(stringRep); } + + // specific value constructor + constexpr qd(const SpecificValue code) noexcept : x{0.0} { + switch (code) { + case SpecificValue::maxpos: + maxpos(); + break; + case SpecificValue::minpos: + minpos(); + break; + case SpecificValue::zero: + default: + zero(); + break; + case SpecificValue::minneg: + minneg(); + break; + case SpecificValue::maxneg: + maxneg(); + break; + case SpecificValue::infpos: + setinf(false); + break; + case SpecificValue::infneg: + setinf(true); + break; + case SpecificValue::nar: // approximation as qds don't have a NaR + case SpecificValue::qnan: + setnan(NAN_TYPE_QUIET); + break; + case SpecificValue::snan: + setnan(NAN_TYPE_SIGNALLING); + break; + } + } + + // raw limb constructor: no argument checking + constexpr qd(double x0) noexcept : x{ 0 } { x[0] = x0; } + constexpr qd(double x0, double x1) noexcept : x{ 0 } { x[0] = x0; x[1] = x1; } + constexpr qd(double x0, double x1, double x2, double x3) noexcept : x{ 0 } { x[0] = x0; x[1] = x1; x[2] = x2; x[3] = x3; } + + // initializers for native types + constexpr qd(signed char iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(short iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(int iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(long iv) noexcept { *this = iv; } + constexpr qd(long long iv) noexcept { *this = iv; } + constexpr qd(char iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned short iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned int iv) noexcept : x{0} { x[0] = static_cast(iv); } + constexpr qd(unsigned long iv) noexcept { *this = iv; } + constexpr qd(unsigned long long iv) noexcept { *this = iv; } + constexpr qd(float iv) noexcept : x{0} { x[0] = iv; } + + // assignment operators for native types + constexpr qd& operator=(signed char rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(short rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(int rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(long rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(long long rhs) noexcept { return convert_signed(rhs); } + constexpr qd& operator=(unsigned char rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned short rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned int rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned long rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); } + constexpr qd& operator=(float rhs) noexcept { return convert_ieee754(rhs); } + constexpr qd& operator=(double rhs) noexcept { return convert_ieee754(rhs); } + + // conversion operators + explicit operator int() const noexcept { return convert_to_signed(); } + explicit operator long() const noexcept { return convert_to_signed(); } + explicit operator long long() const noexcept { return convert_to_signed(); } + explicit operator unsigned int() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long long() const noexcept { return convert_to_unsigned(); } + explicit operator float() const noexcept { return convert_to_ieee754(); } + explicit operator double() const noexcept { return convert_to_ieee754(); } + + +#if LONG_DOUBLE_SUPPORT + // can't be constexpr as remainder calculation requires volatile designation + qd(long double iv) noexcept { *this = iv; } + qd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + explicit operator long double() const noexcept { return convert_to_ieee754(); } +#endif + + // prefix operators + constexpr qd operator-() const noexcept { + qd negated(*this); + negated.x[0] = -negated.x[0]; + negated.x[1] = -negated.x[1]; + negated.x[2] = -negated.x[2]; + negated.x[3] = -negated.x[3]; + return negated; + } + + // arithmetic operators + qd& operator+=(const qd& rhs) { + + double s0 = x[0] + rhs[0]; + double s1 = x[1] + rhs[1]; + double s2 = x[2] + rhs[2]; + double s3 = x[3] + rhs[3]; + + double v0 = s0 - x[0]; + double v1 = s1 - x[1]; + double v2 = s2 - x[2]; + double v3 = s3 - x[3]; + + double u0 = s0 - v0; + double u1 = s1 - v1; + double u2 = s2 - v2; + double u3 = s3 - v3; + + double w0 = x[0] - u0; + double w1 = x[1] - u1; + double w2 = x[2] - u2; + double w3 = x[3] - u3; + + u0 = rhs[0] - v0; + u1 = rhs[1] - v1; + u2 = rhs[2] - v2; + u3 = rhs[3] - v3; + + double t0 = w0 + u0; + double t1 = w1 + u1; + double t2 = w2 + u2; + double t3 = w3 + u3; + + s1 = two_sum(s1, t0, t0); + three_sum(s2, t0, t1); + three_sum2(s3, t0, t2); + t0 = t0 + t1 + t3; + + renorm(s0, s1, s2, s3, t0); + x[0] = s0; + x[1] = s1; + x[2] = s2; + x[3] = s3;; + return *this; + } + qd& operator+=(double rhs) { + return operator+=(qd(rhs)); + } + qd& operator-=(const qd& rhs) { + + return *this; + } + qd& operator-=(double rhs) { + return operator-=(qd(rhs)); + } + qd& operator*=(const qd& rhs) { + + return *this; + } + qd& operator*=(double rhs) { + return operator*=(qd(rhs)); + } + qd& operator/=(const qd& rhs) { + if (isnan()) return *this; + + if (rhs.isnan()) { + *this = rhs; + return *this; + } + + if (rhs.iszero()) { + if (iszero()) { + *this = qd(SpecificValue::qnan); + } + else { + // auto signA = std::copysign(1.0, hi); + // auto signB = std::copysign(1.0, rhs.hi); + // *this = (signA * signB) * qd(SpecificValue::infpos); + *this = qd(SpecificValue::infpos); + } + return *this; + } + + + return *this; + } + qd& operator/=(double rhs) { + return operator/=(qd(rhs)); + } + + // unary operators + qd& operator++() { + return *this; + } + qd operator++(int) { + qd tmp(*this); + operator++(); + return tmp; + } + qd& operator--() { + return *this; + } + qd operator--(int) { + qd tmp(*this); + operator--(); + return tmp; + } + + // modifiers + constexpr void clear() noexcept { x[0] = 0.0; x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setzero() noexcept { x[0] = 0.0; x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setinf(bool sign = true) noexcept { x[0] = (sign ? -INFINITY : INFINITY); x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setnan(int NaNType = NAN_TYPE_SIGNALLING) noexcept { x[0] = (NaNType == NAN_TYPE_SIGNALLING ? std::numeric_limits::signaling_NaN() : std::numeric_limits::quiet_NaN()); x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; } + constexpr void setsign(bool sign = true) noexcept { if (sign && x[0] > 0.0) x[0] = -x[0]; } + + constexpr void setbit(unsigned index, bool b = true) noexcept { + if (index < 64) { // set bit in lowest limb + sw::universal::setbit(x[3], index, b); + } + else if (index < 128) { // set bit in second to lowest limb + sw::universal::setbit(x[2], index - 64, b); + } + else if (index < 192) { // set bit in second to upper limb + sw::universal::setbit(x[1], index - 128, b); + } + else if (index < 128) { // set bit in upper limb + sw::universal::setbit(x[0], index - 192, b); + } + else { + // NOP if index out of bounds + } + } + constexpr void setbits(uint64_t value) noexcept { + x[0] = static_cast(value); + x[1] = 0.0; x[2] = 0.0; x[3] = 0.0; + } + + double operator[](int index) const { + if (0 <= index && index < 4) return x[index]; + return 0.0; + } + double& operator[](int index) { + if (0 <= index && index < 4) return x[index]; + } + + // create specific number system values of interest + constexpr qd& maxpos() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& minpos() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& zero() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& minneg() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& maxneg() noexcept { + x[0] = 0.0; + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + + qd& assign(const std::string& txt) { + qd v; + if (parse(txt, v)) *this = v; + return *this; // Is this what we want? when the string is not valid, keep the current value? + } + + // selectors + constexpr bool iszero() const noexcept { return x[0] == 0.0; } + constexpr bool isone() const noexcept { return x[0] == 1.0 && x[1] == 0.0; } + constexpr bool ispos() const noexcept { return x[0] > 0.0; } + constexpr bool isneg() const noexcept { return x[0] < 0.0; } + BIT_CAST_CONSTEXPR bool isnan(int NaNType = NAN_TYPE_EITHER) const noexcept { + bool negative = isneg(); + int nan_type; + bool isNaN = checkNaN(x[0], nan_type); + bool isNegNaN = isNaN && negative; + bool isPosNaN = isNaN && !negative; + return (NaNType == NAN_TYPE_EITHER ? (isNegNaN || isPosNaN) : + (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN : + (NaNType == NAN_TYPE_QUIET ? isPosNaN : false))); + } + BIT_CAST_CONSTEXPR bool isinf(int InfType = INF_TYPE_EITHER) const noexcept { + bool negative = isneg(); + int inf_type; + bool isInf = checkInf(x[0], inf_type); + bool isNegInf = isInf && negative; + bool isPosInf = isInf && !negative; + return (InfType == INF_TYPE_EITHER ? (isNegInf || isPosInf) : + (InfType == INF_TYPE_NEGATIVE ? isNegInf : + (InfType == INF_TYPE_POSITIVE ? isPosInf : false))); + } + + constexpr bool sign() const noexcept { return (x[0] < 0.0); } + constexpr int scale() const noexcept { return _extractExponent(x[0]); } + constexpr int exponent() const noexcept { return _extractExponent(x[0]); } + + // precondition: string s must be all digits + void round_string(char* s, int precision, int* decimalPoint) const { + int nrDigits = precision; + // round decimal string and propagate carry + int lastDigit = nrDigits - 1; + if (s[lastDigit] >= '5') { + int i = nrDigits - 2; + s[i]++; + while (i > 0 && s[i] > '9') { + s[i] -= 10; + s[--i]++; + } + } + + // if first digit is 10, shift everything. + if (s[0] > '9') { + for (int i = precision; i >= 2; i--) s[i] = s[i - 1]; + s[0] = '1'; + s[1] = '0'; + + (*decimalPoint)++; // increment decimal point + ++precision; + } + + s[precision] = 0; // aqd termination null + } + + void append_exponent(std::string& str, int e) const { + str += (e < 0 ? '-' : '+'); + e = std::abs(e); + int k; + if (e >= 100) { + k = (e / 100); + str += static_cast('0' + k); + e -= 100 * k; + } + + k = (e / 10); + str += static_cast('0' + k); + e -= 10 * k; + + str += static_cast('0' + e); + } + + // convert to string containing digits number of digits + std::string to_string(std::streamsize precision = 7, std::streamsize width = 15, bool fixed = false, bool scientific = true, bool internal = false, bool left = false, bool showpos = false, bool uppercase = false, char fill = ' ') const { + std::string s; + bool negative = sign() ? true : false; + int e{ 0 }; + if (fixed && scientific) fixed = false; // scientific format takes precedence + if (isnan()) { + s = uppercase ? "NAN" : "nan"; + negative = false; + } + else { + if (negative) { s += '-'; } else { if (showpos) s += '+'; } + + if (isinf()) { + s += uppercase ? "INF" : "inf"; + } + else if (iszero()) { + s += '0'; + if (precision > 0) { + s += '.'; + s.append(static_cast(precision), '0'); + } + } + else { + int powerOfTenScale = static_cast(std::log10(std::fabs(x[0]))); + int integerDigits = (fixed ? (powerOfTenScale + 1) : 1); + int nrDigits = integerDigits + static_cast(precision); + + int nrDigitsForFixedFormat = nrDigits; + if (fixed) + nrDigitsForFixedFormat = std::max(60, nrDigits); // can be much longer than the max accuracy for double-double + + // a number in the range of [0.5, 1.0) to be printed with zero precision + // must be rounded up to 1 to print correctly + if (fixed && (precision == 0) && (std::abs(x[0]) < 1.0)) { + s += (std::abs(x[0]) >= 0.5) ? '1' : '0'; + return s; + } + + if (fixed && nrDigits <= 0) { + // process values with negative exponents (powerOfTenScale < 0) + s += '0'; + if (precision > 0) { + s += '.'; + s.append(static_cast(precision), '0'); + } + } + else { + char* t; + + if (fixed) { + t = new char[static_cast(nrDigitsForFixedFormat + 1)]; + to_digits(t, e, nrDigitsForFixedFormat); + } + else { + t = new char[static_cast(nrDigits + 1)]; + to_digits(t, e, nrDigits); + } + + if (fixed) { + // round the decimal string + round_string(t, nrDigits, &integerDigits); + + if (integerDigits > 0) { + int i; + for (i = 0; i < integerDigits; ++i) s += t[i]; + if (precision > 0) { + s += '.'; + for (int j = 0; j < precision; ++j, ++i) s += t[i]; + } + } + else { + s += "0."; + if (integerDigits < 0) s.append(static_cast(-integerDigits), '0'); + for (int i = 0; i < nrDigits; ++i) s += t[i]; + } + } + else { + s += t[0]; + if (precision > 0) s += '.'; + + for (int i = 1; i <= precision; ++i) + s += t[i]; + + } + delete[] t; + } + } + + // trap for improper offset with large values + // without this trap, output of values of the for 10^j - 1 fail for j > 28 + // and are output with the point in the wrong place, leading to a dramatically off value + if (fixed && (precision > 0)) { + // make sure that the value isn't dramatically larger + double from_string = atof(s.c_str()); + + // if this ratio is large, then we've got problems + if (std::fabs(from_string / x[0]) > 3.0) { + + // loop on the string, find the point, move it up one + // don't act on the first character + for (std::string::size_type i = 1; i < s.length(); ++i) { + if (s[i] == '.') { + s[i] = s[i - 1]; + s[i - 1] = '.'; + break; + } + } + + from_string = atof(s.c_str()); + // if this ratio is large, then the string has not been fixed + if (std::fabs(from_string / x[0]) > 3.0) { + //error("Re-rounding unsuccessful in large number fixed point trap."); + } + } + } + + if (!fixed && !isinf()) { + // construct the exponent + s += uppercase ? 'E' : 'e'; + append_exponent(s, e); + } + } + + // process any fill + size_t strLength = s.length(); + if (strLength < static_cast(width)) { + size_t nrCharsToFill = (width - strLength); + if (internal) { + if (negative) + s.insert(static_cast(1), nrCharsToFill, fill); + else + s.insert(static_cast(0), nrCharsToFill, fill); + } + else if (left) { + s.append(nrCharsToFill, fill); + } + else { + s.insert(static_cast(0), nrCharsToFill, fill); + } + } + + return s; + } + +protected: + double x[4]; // fixed four (4) limbs, x[0] is highest order limb + + // HELPER methods + + constexpr qd& convert_signed(int64_t v) noexcept { + if (0 == v) { + setzero(); + } + else { + x[0] = static_cast(v); + x[1] = static_cast(v - static_cast(x[0])); + } + return *this; + } + + constexpr qd& convert_unsigned(uint64_t v) noexcept { + if (0 == v) { + setzero(); + } + else { + x[0] = static_cast(v); + x[1] = static_cast(v - static_cast(x[0])); // difference is always positive + } + return *this; + } + + // no need to SFINAE this as it is an internal method that we ONLY call when we know the argument type is a native float + constexpr qd& convert_ieee754(float rhs) noexcept { + x[0] = double(rhs); + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } + constexpr qd& convert_ieee754(double rhs) noexcept { + x[0] = double(rhs); + x[1] = 0.0; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } +#if LONG_DOUBLE_SUPPORT + qd& convert_ieee754(long double rhs) { + volatile long double truncated = static_cast(double(rhs)); + volatile double remainder = static_cast(rhs - truncated); + x[0] = static_cast(truncated); + x[1] = remainder; + x[2] = 0.0; + x[3] = 0.0; + return *this; + } +#endif + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Unsigned convert_to_unsigned() const noexcept { + int64_t h = static_cast(x[0]); + int64_t l = static_cast(x[1]); + return Unsigned(h + l); + } + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Signed convert_to_signed() const noexcept { + int64_t h = static_cast(x[0]); + int64_t l = static_cast(x[1]); + return Signed(h + l); + } + + // convert to native floating-point, use C++ conversion rules to cast down to float and double + template + Real convert_to_ieee754() const noexcept { + return Real(x[0] + x[1] + x[2] + x[3]); + } + + /// + /// to_digits generates the decimal digits representing + /// + /// + /// + /// + void to_digits(char* s, int& exponent, int precision) const { + constexpr qd _one(1.0), _ten(10.0); + constexpr double _log2(0.301029995663981); + double hi = x[0]; + //double lo = x[1]; + + if (iszero()) { + std::cout << "I am zero\n"; + exponent = 0; + for (int i = 0; i < precision; ++i) s[i] = '0'; + s[precision] = 0; // termination null + return; + } + + // First determine the (approximate) exponent. + // std::frexp(*this, &e); // e is appropriate for 0.5 <= x < 1 + int e; + std::frexp(hi, &e); + --e; // adjust e as frexp gives a binary e that is 1 too big + e = static_cast(_log2 * e); // estimate the power of ten exponent + qd r = abs(*this); + if (e < 0) { + if (e < -300) { + //r = qd(std::ldexp(r[0], 53), std::ldexp(r[1], 53)); + r *= pown(_ten, -e); + //r = qd(std::ldexp(r[0], -53), std::ldexp(r[1], -53)); + } + else { + r *= pown(_ten, -e); + } + } + else { + if (e > 0) { + if (e > 300) { + //r = qd(std::ldexp(r[0], -53), std::ldexp(r[1], -53)); + r /= pown(_ten, e); + //r = qd(std::ldexp(r[0], 53), std::ldexp(r[1], 53)); + } + else { + r /= pown(_ten, e); + } + } + } + + // Fix exponent if we have gone too far + if (r >= _ten) { + r /= _ten; + ++e; + } + else { + if (r < 1.0) { + r *= _ten; + --e; + } + } + + if ((r >= _ten) || (r < _one)) { + std::cerr << "to_digits() failed to compute exponent\n"; + return; + } + + // at this point the value is normalized to a decimal value between (0, 10) + // generate the digits + int nrDigits = precision + 1; + for (int i = 0; i < nrDigits; ++i) { + int mostSignificantDigit = static_cast(r[0]); + r -= mostSignificantDigit; + r *= 10.0; + + s[i] = static_cast(mostSignificantDigit + '0'); + } + + // Fix out of range digits + for (int i = nrDigits - 1; i > 0; --i) { + if (s[i] < '0') { + s[i - 1]--; + s[i] += 10; + } + else { + if (s[i] > '9') { + s[i - 1]++; + s[i] -= 10; + } + } + } + + if (s[0] <= '0') { + std::cerr << "to_digits() non-positive leading digit\n"; + return; + } + + // Round and propagate carry + int lastDigit = nrDigits - 1; + if (s[lastDigit] >= '5') { + int i = nrDigits - 2; + s[i]++; + while (i > 0 && s[i] > '9') { + s[i] -= 10; + s[--i]++; + } + } + + // If first digit is 10, shift left and increment exponent + if (s[0] > '9') { + ++e; + for (int i = precision; i >= 2; --i) { + s[i] = s[i - 1]; + } + s[0] = '1'; + s[1] = '0'; + } + + s[precision] = 0; // termination null + exponent = e; + } + +private: + + // qd - qd logic comparisons + friend bool operator==(const qd& lhs, const qd& rhs); + friend bool operator!=(const qd& lhs, const qd& rhs); + friend bool operator<=(const qd& lhs, const qd& rhs); + friend bool operator>=(const qd& lhs, const qd& rhs); + friend bool operator<(const qd& lhs, const qd& rhs); + friend bool operator>(const qd& lhs, const qd& rhs); + + // qd - literal logic comparisons + friend bool operator==(const qd& lhs, const double rhs); + + // literal - qd logic comparisons + friend bool operator==(const double lhs, const qd& rhs); + + friend bool operator<(const qd& lhs, const qd& rhs); + +}; + +//////////////////////// precomputed constants of note ///////////////////////////////// + +// precomputed quad-double constants + +constexpr qd qd_2pi (6.283185307179586232e+00, 2.449293598294706414e-16); +constexpr qd qd_pi (3.141592653589793116e+00, 1.224646799147353207e-16); +constexpr qd qd_pi2 (1.570796326794896558e+00, 6.123233995736766036e-17); +constexpr qd qd_pi4 (7.853981633974482790e-01, 3.061616997868383018e-17); +constexpr qd qd_3pi4 (2.356194490192344837e+00, 9.1848509936051484375e-17); +constexpr qd qd_e (2.718281828459045091e+00, 1.445646891729250158e-16); +constexpr qd qd_log2 (6.931471805599452862e-01, 2.319046813846299558e-17); +constexpr qd qd_log10 (2.302585092994045901e+00, -2.170756223382249351e-16); + +constexpr double d_eps = 4.93038065763132e-32; // 2^-104 +constexpr double d_min_normalized = 2.0041683600089728e-292; // = 2^(-1022 + 53) +constexpr qd qd_max (1.79769313486231570815e+308, 9.97920154767359795037e+291); +constexpr qd qd_safe_max(1.7976931080746007281e+308, 9.97920154767359795037e+291); + + +// precomputed quad-double constants courtesy of constants example program + +constexpr qd qd_ln2 (0.69314718055994529e+00, 2.3190468138462996e-17); +constexpr qd qd_ln10 (2.30258509299404590e+00, -2.1707562233822494e-16); +constexpr qd qd_lge (1.44269504088896340e+00, 2.0355273740931027e-17); +constexpr qd qd_lg10 (3.32192809488736220e+00, 1.6616175169735918e-16); +constexpr qd qd_loge (0.43429448190325182e+00, 1.0983196502167652e-17); + +constexpr qd qd_sqrt2 (1.41421356237309510e+00, -9.6672933134529122e-17); + +constexpr qd qd_inv_pi (0.31830988618379069e+00, -1.9678676675182486e-17); +constexpr qd qd_inv_pi2 (0.63661977236758138e+00, -3.9357353350364972e-17); +constexpr qd qd_inv_e (0.36787944117144233e+00, -1.2428753672788364e-17); +constexpr qd qd_inv_sqrt2(0.70710678118654757e+00, -4.8336466567264561e-17); + +//////////////////////// helper functions ///////////////////////////////// + +inline std::string to_quad(const qd& v, int precision = 17) { + std::stringstream s; + s << std::setprecision(precision) << "( " << v[0] << ", " << v[1] << ", " << v[2] << ", " << v[3] << ')'; + return s.str(); +} + +inline std::string to_binary(const qd& number, bool bNibbleMarker = false) { + std::stringstream s; + for (int i = 0; i < 4; ++i) { + double_decoder decoder; + decoder.d = number[i]; + + s << "0b"; + // print sign bit + s << (decoder.parts.sign ? '1' : '0') << '.'; + + // print exponent bits + { + uint64_t mask = 0x400; + for (int bit = 10; bit >= 0; --bit) { + s << ((decoder.parts.exponent & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; + mask >>= 1; + } + } + + s << '.'; + + // print hi fraction bits + uint64_t mask = (uint64_t(1) << 51); + for (int bit = 51; bit >= 0; --bit) { + s << ((decoder.parts.fraction & mask) ? '1' : '0'); + if (bNibbleMarker && bit != 0 && (bit % 4) == 0) s << '\''; + mask >>= 1; + } + + if (i < 3) s << ", "; + } + + return s.str(); +} + +//////////////////////// math functions ///////////////////////////////// + +inline qd abs(qd const& a) { + return (a[0] < 0.0) ? -a : a; +} + +inline qd ceil(qd const& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = std::ceil(a[0]); + + if (x0 == a[0]) { + x1 = std::ceil(a[1]); + + if (x1 == a[1]) { + x2 = std::ceil(a[2]); + + if (x2 == a[2]) { + x3 = std::ceil(a[3]); + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); + } + + return qd(x0, x1, x2, x3); +} + +inline qd floor(qd const& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = std::floor(a[0]); + + if (x0 == a[0]) { + x1 = std::floor(a[1]); + + if (x1 == a[1]) { + x2 = std::floor(a[2]); + + if (x2 == a[2]) { + x3 = std::floor(a[3]); + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); + } + + return qd(x0, x1, x2, x3); +} + +// Round to Nearest integer +qd nint(qd const& a) { + double x0{ 0.0 }, x1{ 0.0 }, x2{ 0.0 }, x3{ 0.0 }; + x0 = nint(a[0]); + + if (x0 == a[0]) { + // First double is already an integer + x1 = nint(a[1]); + + if (x1 == a[1]) { + // Second double is already an integer + x2 = nint(a[2]); + + if (x2 == a[2]) { + // Third double is already an integer + x3 = nint(a[3]); + } + else { + if (std::abs(x2 - a[2]) == 0.5 && a[3] < 0.0) { + x2 -= 1.0; + } + } + + } + else { + if (std::abs(x1 - a[1]) == 0.5 && a[2] < 0.0) { + x1 -= 1.0; + } + } + + } + else { + /* First double is not an integer. */ + if (std::abs(x0 - a[0]) == 0.5 && a[1] < 0.0) { + x0 -= 1.0; + } + } + + renorm(x0, x1, x2, x3); + return qd(x0, x1, x2, x3); +} + + +/* quad-double ^ 2 = (x0 + x1 + x2 + x3) ^ 2 + = x0 ^ 2 + 2 x0 * x1 + (2 x0 * x2 + x1 ^ 2) + + (2 x0 * x3 + 2 x1 * x2) */ +inline qd sqr(const qd& a) { + double p0, p1, p2, p3, p4, p5; + double q0, q1, q2, q3; + double s0, s1; + double t0, t1; + + p0 = two_sqr(a[0], q0); + p1 = two_prod(2.0 * a[0], a[1], q1); + p2 = two_prod(2.0 * a[0], a[2], q2); + p3 = two_sqr(a[1], q3); + + p1 = two_sum(q0, p1, q0); + + q0 = two_sum(q0, q1, q1); + p2 = two_sum(p2, p3, p3); + + s0 = two_sum(q0, p2, t0); + s1 = two_sum(q1, p3, t1); + + s1 = two_sum(s1, t0, t0); + t0 += t1; + + s1 = quick_two_sum(s1, t0, t0); + p2 = quick_two_sum(s0, s1, t1); + p3 = quick_two_sum(t1, t0, q0); + + p4 = 2.0 * a[0] * a[3]; + p5 = 2.0 * a[1] * a[2]; + + p4 = two_sum(p4, p5, p5); + q2 = two_sum(q2, q3, q3); + + t0 = two_sum(p4, q2, t1); + t1 = t1 + p5 + q3; + + p3 = two_sum(p3, t0, p4); + p4 = p4 + q0 + t1; + + renorm(p0, p1, p2, p3, p4); + return qd(p0, p1, p2, p3); +} + +// Computes pow(qd, n), where n is an integer +qd pown(qd const& a, int n) { + if (n == 0) + return 1.0; + + qd r{ a }; // odd-case multiplier + qd s{ 1.0 }; + int N = std::abs(n); + + if (N > 1) { + while (N > 0) { + if (N % 2 == 1) { + s *= r; + } + N /= 2; + if (N > 0) r = sqr(r); + } + } + else { + s = r; + } + + if (n < 0) + return (qd(1.0) / s); + + return s; +} + +//////////////////////// stream operators ///////////////////////////////// + +// stream out a decimal floating-point representation of the quad-double +inline std::ostream& operator<<(std::ostream& ostr, const qd& v) { + std::ios_base::fmtflags fmt = ostr.flags(); + std::streamsize precision = ostr.precision(); + std::streamsize width = ostr.width(); + char fillChar = ostr.fill(); + bool showpos = fmt & std::ios_base::showpos; + bool uppercase = fmt & std::ios_base::uppercase; + bool fixed = fmt & std::ios_base::fixed; + bool scientific = fmt & std::ios_base::scientific; + bool internal = fmt & std::ios_base::internal; + bool left = fmt & std::ios_base::left; + return ostr << v.to_string(precision, width, fixed, scientific, internal, left, showpos, uppercase, fillChar); +} + +// stream in an ASCII decimal floating-point format and assign it to a quad-double +inline std::istream& operator>>(std::istream& istr, qd& v) { + std::string txt; + istr >> txt; + if (!parse(txt, v)) { + std::cerr << "unable to parse -" << txt << "- into a quad-double value\n"; + } + return istr; +} + +////////////////// string operators + +// parse a decimal ASCII floating-point format and make a quad-double (qd) out of it +bool parse(const std::string& number, qd& value) { + char const* p = number.c_str(); + + // Skip any leading spaces + while (std::isspace(*p)) ++p; + + qd r{ 0.0 }; + int nrDigits{ 0 }; + int decimalPoint{ -1 }; + int sign{ 0 }, eSign{ 1 }; + int e{ 0 }; + bool done{ false }, parsingMantissa{ true }; + char ch; + while (!done && (ch = *p) != '\0') { + if (std::isdigit(ch)) { + if (parsingMantissa) { + int digit = ch - '0'; + r *= 10.0; + r += static_cast(digit); + ++nrDigits; + } + else { // parsing exponent section + int digit = ch - '0'; + e *= 10; + e += digit; + } + } + else { + switch (ch) { + case '.': + if (decimalPoint >= 0) return false; + decimalPoint = nrDigits; + break; + + case '-': + case '+': + if (parsingMantissa) { + if (sign != 0 || nrDigits > 0) return false; + sign = (ch == '-' ? -1 : 1); + } + else { + eSign = (ch == '-' ? -1 : 1); + } + break; + + case 'E': + case 'e': + parsingMantissa = false; + break; + + default: + return false; + } + } + + ++p; + } + e *= eSign; + + if (decimalPoint >= 0) e -= (nrDigits - decimalPoint); + qd _ten(10.0, 0.0); + if (e > 0) { + r *= pown(_ten, e); + } + else { + if (e < 0) r /= pown(_ten, -e); + } + value = (sign == -1) ? -r : r; + return true; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - qd binary logic operators + +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(const qd& lhs, const qd& rhs) { + return (lhs[0] == rhs[0]) && (lhs[1] == rhs[1] && lhs[2] == rhs[2]) && (lhs[3] == rhs[3]); +} + +inline bool operator!=(const qd& lhs, const qd& rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (const qd& lhs, const qd& rhs) { + return (lhs[0] < rhs[0] || + (lhs[0] == rhs[0] && (lhs[1] < rhs[1] || + (lhs[1] == rhs[1] && (lhs[2] < rhs[2] || + (lhs[2] == rhs[2] && lhs[3] < rhs[3])))))); +} + +inline bool operator> (const qd& lhs, const qd& rhs) { + return operator< (rhs, lhs); +} + +inline bool operator<=(const qd& lhs, const qd& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(const qd& lhs, const qd& rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - literal binary logic operators +// +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(const qd& lhs, double rhs) { + return operator==(lhs, qd(rhs)); +} + +inline bool operator!=(const qd& lhs, double rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (const qd& lhs, double rhs) { + return operator<(lhs, qd(rhs)); +} + +inline bool operator> (const qd& lhs, double rhs) { + return operator< (qd(rhs), lhs); +} + +inline bool operator<=(const qd& lhs, double rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(const qd& lhs, double rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// literal - qd binary logic operators +// +// equal: precondition is that the storage is properly nulled in all arithmetic paths +inline bool operator==(double lhs, const qd& rhs) { + return operator==(qd(lhs), rhs); +} + +inline bool operator!=(double lhs, const qd& rhs) { + return !operator==(lhs, rhs); +} + +inline bool operator< (double lhs, const qd& rhs) { + return operator<(qd(lhs), rhs); +} + +inline bool operator> (double lhs, const qd& rhs) { + return operator< (rhs, lhs); +} + +inline bool operator<=(double lhs, const qd& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} + +inline bool operator>=(double lhs, const qd& rhs) { + return !operator< (lhs, rhs); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - qd binary arithmetic operators +// BINARY ADDITION +inline qd operator+(const qd& lhs, const qd& rhs) { + qd sum{ lhs }; + sum += rhs; + return sum; +} +// BINARY SUBTRACTION +inline qd operator-(const qd& lhs, const qd& rhs) { + qd diff = lhs; + diff -= rhs; + return diff; +} +// BINARY MULTIPLICATION +inline qd operator*(const qd& lhs, const qd& rhs) { + qd mul = lhs; + mul *= rhs; + return mul; +} +// BINARY DIVISION +inline qd operator/(const qd& lhs, const qd& rhs) { + qd ratio = lhs; + ratio /= rhs; + return ratio; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// qd - literal binary arithmetic operators +// BINARY ADDITION +inline qd operator+(const qd& lhs, double rhs) { + return operator+(lhs, qd(rhs)); +} +// BINARY SUBTRACTION +inline qd operator-(const qd& lhs, double rhs) { + return operator-(lhs, qd(rhs)); +} +// BINARY MULTIPLICATION +inline qd operator*(const qd& lhs, double rhs) { + return operator*(lhs, qd(rhs)); +} +// BINARY DIVISION +inline qd operator/(const qd& lhs, double rhs) { + return operator/(lhs, qd(rhs)); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// literal - qd binary arithmetic operators +// BINARY ADDITION +inline qd operator+(double lhs, const qd& rhs) { + return operator+(qd(lhs), rhs); +} +// BINARY SUBTRACTION +inline qd operator-(double lhs, const qd& rhs) { + return operator-(qd(lhs), rhs); +} +// BINARY MULTIPLICATION +inline qd operator*(double lhs, const qd& rhs) { + return operator*(qd(lhs), rhs); +} +// BINARY DIVISION +inline qd operator/(double lhs, const qd& rhs) { + return operator/(qd(lhs), rhs); +} + +}} // namespace sw::universal diff --git a/include/universal/traits/qd_traits.hpp b/include/universal/traits/qd_traits.hpp new file mode 100644 index 000000000..f1c9f58df --- /dev/null +++ b/include/universal/traits/qd_traits.hpp @@ -0,0 +1,31 @@ +#pragma once +// qd_traits.hpp : traits for quad-double (qd) arithmetic type +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +// define a trait for quad-double (qd) type +template +struct is_qd_trait + : false_type +{ +}; + +template<> +struct is_qd_trait< qd > + : true_type +{ +}; + +template +constexpr bool is_qd = is_qd_trait<_Ty>::value; + +template +using enable_if_qd = std::enable_if_t, _Ty>; + +}} // namespace sw::universal diff --git a/static/qd/CMakeLists.txt b/static/qd/CMakeLists.txt index 2072f6423..93f8d83a5 100644 --- a/static/qd/CMakeLists.txt +++ b/static/qd/CMakeLists.txt @@ -3,12 +3,11 @@ file (GLOB LOGIC_SRC "logic/*.cpp") file (GLOB CONVERSION_SRC "conversion/*.cpp") file (GLOB ARITHMETIC_SRC "arithmetic/*.cpp") file (GLOB MATH_SRC "./math/*.cpp") -#file (GLOB PERFORMANCE_SRC "./performance/*.cpp") - -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/api" "${API_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/logic" "${LOGIC_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/conversion" "${CONVERSION_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/arithmetic" "${ARITHMETIC_SRC}") -compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/math" "${MATH_SRC}") -#compile_all("true" "dd" "Number Systems/static/floating-point/binary/dd/performance" "${PERFORMANCE_SRC}") +file (GLOB PERFORMANCE_SRC "./performance/*.cpp") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/api" "${API_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/logic" "${LOGIC_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/conversion" "${CONVERSION_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/arithmetic" "${ARITHMETIC_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/math" "${MATH_SRC}") +compile_all("true" "qd" "Number Systems/static/floating-point/binary/qd/performance" "${PERFORMANCE_SRC}") diff --git a/static/qd/api/api.cpp b/static/qd/api/api.cpp new file mode 100644 index 000000000..fde519878 --- /dev/null +++ b/static/qd/api/api.cpp @@ -0,0 +1,312 @@ +// api.cpp: application programming interface tests for quad-double (qd) number system +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +// minimum set of include files to reflect source code dependencies +// Configure the qd template environment +// enable/disable arithmetic exceptions +#define QUADDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include +#include +#include + +namespace sw { + namespace universal { + + template + void Progression(Real v) { + using namespace sw::universal; + + auto oldPrec = std::cout.precision(); + float f{ float(v) }; + std::cout << std::setprecision(7); + std::cout << to_binary(f, true) << " : " << f << '\n'; + + double d{ v }; + std::cout << std::setprecision(17); + std::cout << to_binary(d, true) << " : " << d << '\n'; + + qd a{ v }; + std::cout << std::setprecision(35); + std::cout << to_binary(a, true) << " : " << a << '\n'; + std::cout << std::setprecision(oldPrec); + } + + qd parse(const std::string& str) { + using namespace sw::universal; + + qd v(str); + auto oldPrec = std::cout.precision(); + std::cout << std::setprecision(std::numeric_limits::digits10); + std::cout << "string: " << str << " = ( " << v[0] << ", " << v[1] << ") "; + std::cout << std::setprecision(oldPrec); + return v; + } + + void print(std::ostream& ostr, qd const& v) { + std::ios_base::fmtflags fmt = ostr.flags(); + bool showpos = (fmt & std::ios_base::showpos) != 0; + bool uppercase = (fmt & std::ios_base::uppercase) != 0; + bool fixed = (fmt & std::ios_base::fixed) != 0; + bool scientific = (fmt & std::ios_base::scientific) != 0; + bool internal = (fmt & std::ios_base::internal) != 0; + bool left = (fmt & std::ios_base::left) != 0; + std::string str = v.to_string(ostr.precision(), ostr.width(), fixed, scientific, internal, left, showpos, uppercase, ostr.fill()); + ostr << str << '\n'; + } + + } +} + + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "quad-double (qd) API tests"; + int nrOfFailedTestCases = 0; + + auto oldPrec = std::cout.precision(); + + // important behavioral traits + { + using TestType = qd; + ReportTrivialityOfType(); + } + + // default behavior + std::cout << "+--------- Default qd has subnormals, but no supernormals\n"; + { + uint64_t big = (1ull << 53); + std::cout << to_binary(big) << " : " << big << '\n'; + qd a(big), b(1.0), c{}; + c = a + b; + ReportValue(a, "a"); + ReportValue(b, "b"); + ReportValue(c, "c"); + } + + // arithmetic behavior + std::cout << "+--------- Default qd has subnormals, but no supernormals\n"; + { + qd a(2.0), b(4.0); + ArithmeticOperators(a, b); + } + + std::cout << "+--------- fraction bit progressions \n"; + { + float fulp = ulp(1.0f); + Progression(1.0f + fulp); + Progression(1.0 + ulp(2.0)); + double v = ulp(1.0); + Progression( 1.0 - v/2.0 ); + std::cout << to_quad(qd(1.0 - v / 2.0)) << '\n'; + } + + // report on the dynamic range of some standard configurations + std::cout << "+--------- Dynamic range doubledouble configurations --------+\n"; + { + qd a; // uninitialized + + a.maxpos(); + std::cout << "maxpos doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.setbits(0x0080); // positive min normal + std::cout << "minnorm doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.minpos(); + std::cout << "minpos doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.zero(); + std::cout << "zero : " << to_binary(a) << " : " << a << '\n'; + a.minneg(); + std::cout << "minneg doubledouble : " << to_binary(a) << " : " << a << '\n'; + a.maxneg(); + std::cout << "maxneg doubledouble : " << to_binary(a) << " : " << a << '\n'; + + std::cout << "---\n"; + } + + // constexpr and specific values + std::cout << "+--------- constexpr and specific values --------+\n"; + { + using Real = qd; + + CONSTEXPRESSION Real a{}; // zero constexpr + std::cout << type_tag(a) << '\n'; + + Real b(1.0f); // constexpr of a native type conversion + std::cout << to_binary(b) << " : " << b << '\n'; + + CONSTEXPRESSION Real c(SpecificValue::minpos); // constexpr of a special value in the encoding + std::cout << to_binary(c) << " : " << c << " == minpos" << '\n'; + + CONSTEXPRESSION Real d(SpecificValue::maxpos); // constexpr of a special value in the encoding + std::cout << to_binary(d) << " : " << d << " == maxpos" << '\n'; + } + + // set bit patterns + std::cout << "+--------- set bit patterns API --------+\n"; + { + using Real = qd; + + Real a; // uninitialized + std::cout << type_tag(a) << '\n'; + + a.setbits(0x0000); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.setbit(8); + std::cout << to_binary(a) << " : " << a << " : set bit 8 assuming 0-based" << '\n'; + a.setbits(0xffff); + a.setbit(8, false); + std::cout << to_binary(a) << " : " << a << " : reset bit 8" << '\n'; + + a.setbits(0xAAAA); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.assign(std::string("0b1.0101'0101.0101'010")); + std::cout << to_binary(a) << " : " << a << '\n'; + + a.assign(std::string("0b0.1010'1010.1010'101")); + std::cout << to_binary(a) << " : " << a << '\n'; + } + + // parse decimal strings + std::cout << "+--------- parse API --------+\n"; + { + std::string qdstr; + qd v; + + v = parse("0.0"); + qdstr = v.to_string(25, 25, true, false, false, false, true, false, ' '); + std::cout << qdstr << '\n'; + + std::cout << std::setprecision(7); + print(std::cout, parse("0.5")); + print(std::cout, parse("1.0")); + print(std::cout, parse("2.0")); + + // 100 digits of e + // 10 2.7182818284 + // 20 2.71828182845904523536 + // 30 2.718281828459045235360287471352 + // 40 2.7182818284590452353602874713526624977572 + // 50 2.71828182845904523536028747135266249775724709369995 + // 60 2.718281828459045235360287471352662497757247093699959574966967 + // 70 2.7182818284590452353602874713526624977572470936999595749669676277240766 + // 80 2.71828182845904523536028747135266249775724709369995957496696762772407663035354759 + // 90 2.718281828459045235360287471352662497757247093699959574966967627724076630353547594571382178 + // 100 2.7182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274 + ReportValue(std::numbers::e, "e", 10, 25); + std::cout << std::setprecision(10); + print(std::cout, parse("2.7182818284")); // 10 digits + std::cout << std::setprecision(15); + print(std::cout, parse("2.71828182845904")); // 15 digits + std::cout << std::setprecision(20); + print(std::cout, parse("2.71828182845904523536")); // 20 digits + std::cout << std::setprecision(30); + print(std::cout, parse("2.718281828459045235360287471352")); // 30 digits + std::cout << std::setprecision(40); + print(std::cout, parse("2.7182818284590452353602874713526624977572")); // 40 digits + + std::cout << std::setprecision(37); + print(std::cout, parse("2.718281828459045235360287471352662498")); //37 digits + std::cout << std::setprecision(oldPrec); + } + + std::cout << "+--------- set specific values of interest --------+\n"; + { + qd a{ 0 }; // initialized + std::cout << "maxpos : " << a.maxpos() << " : " << scale(a) << '\n'; + std::cout << "minpos : " << a.minpos() << " : " << scale(a) << '\n'; + std::cout << "zero : " << a.zero() << " : " << scale(a) << '\n'; + std::cout << "minneg : " << a.minneg() << " : " << scale(a) << '\n'; + std::cout << "maxneg : " << a.maxneg() << " : " << scale(a) << '\n'; + std::cout << dynamic_range() << std::endl; + } + + std::cout << "+--------- doubledouble subnormal behavior --------+\n"; + { + constexpr double minpos = std::numeric_limits::min(); + std::cout << to_binary(minpos) << " : " << minpos << '\n'; + double subnormal = minpos / 2.0; + std::cout << to_binary(subnormal) << " : " << subnormal << '\n'; + qd a(minpos); + for (int i = 0; i < 10/*106*/; ++i) { + std::string str = a.to_string(30, 40, false, true, false, false, false, false, ' '); + std::cout << to_binary(a) << " : " << a << " : " << str << '\n'; + a /= 2.0; + } + } + + std::cout << "+--------- special value properties doubledouble vs IEEE-754 --------+\n"; + { + float fa; + fa = NAN; + std::cout << "qNAN : " << to_binary(NAN) << '\n'; + std::cout << "sNAN : " << to_binary(-NAN) << '\n'; + if (fa < 0.0f && fa > 0.0f && fa != 0.0f) { + std::cout << "IEEE-754 is incorrectly implemented\n"; + } + else { + std::cout << "IEEE-754 NAN has no sign\n"; + } + + qd a(fa); + if ((a < 0.0f && a > 0.0f && a != 0.0f)) { + std::cout << "doubledouble (qd) is incorrectly implemented\n"; + ++nrOfFailedTestCases; + } + else { + std::cout << "qd NAN has no sign\n"; + } + } + + std::cout << "+--------- numeric_limits of doubledouble vs IEEE-754 --------+\n"; + { + std::cout << "qd(INFINITY): " << qd(INFINITY) << "\n"; + std::cout << "qd(-INFINITY): " << qd(-INFINITY) << "\n"; + + std::cout << "qd(std::numeric_limits::infinity()) : " << qd(std::numeric_limits::infinity()) << "\n"; + std::cout << "qd(-std::numeric_limits::infinity()) : " << qd(-std::numeric_limits::infinity()) << "\n"; + + std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; + std::cout << " 2 * std::numeric_limits::infinity() : " << 2 * std::numeric_limits::infinity() << "\n"; + std::cout << "-2 * std::numeric_limits::infinity() : " << -2 * std::numeric_limits::infinity() << "\n"; + +// std::cout << "sw::universal::nextafter(qd(0), std::numeric_limits::infinity()) : " << sw::universal::nextafter(qd(-0), std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), std::numeric_limits::infinity()) : " << std::nextafter(float(-0), std::numeric_limits::infinity()) << "\n"; +// std::cout << "sw::universal::nextafter(qd(0), -std::numeric_limits::infinity()) : " << sw::universal::nextafter(qd(0), -std::numeric_limits::infinity()) << "\n"; + std::cout << "std::nextafter(float(0), -std::numeric_limits::infinity()) : " << std::nextafter(float(0), -std::numeric_limits::infinity()) << "\n"; + + std::cout << "qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) : " << qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_QUIET) << "\n"; + std::cout << "qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) : " << qd(std::numeric_limits::signaling_NaN()).isnan(sw::universal::NAN_TYPE_SIGNALLING) << "\n"; + } + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/static/qd/api/experiments.cpp b/static/qd/api/experiments.cpp new file mode 100644 index 000000000..342c5f866 --- /dev/null +++ b/static/qd/api/experiments.cpp @@ -0,0 +1,207 @@ +// experiments.cpp: experiments with the double-double floating-point number system +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +// minimum set of include files to reflect source code dependencies +// Configure the dd template environment +// enable/disable arithmetic exceptions +#define DOUBLEDOUBLE_THROW_ARITHMETIC_EXCEPTION 0 +#include +#include +#include +#include + +namespace sw { + namespace universal { + + void Progression(double v) { + using namespace sw::universal; + + auto oldPrec = std::cout.precision(); + float f{ float(v) }; + std::cout << std::setprecision(7); + std::cout << to_binary(f, true) << " : " << f << '\n'; + + double d{ v }; + std::cout << std::setprecision(17); + std::cout << to_binary(d, true) << " : " << d << '\n'; + + dd a{ v }; + std::cout << std::setprecision(35); + std::cout << to_binary(a, true) << " : " << a << '\n'; + std::cout << std::setprecision(oldPrec); + } + + void dd_binary(dd const& v) { + std::cout << to_pair(v) << '\n'; + } + + void adjust(dd const& a) { + dd r = abs(a); + dd ten(10.0); + int e{ 0 }; + dd_binary(r); + frexp(r, &e); + std::cout << "exponent : " << e << '\n'; + + if (e < 0) { + if (e > 300) { + r = ldexp(r, 53); dd_binary(r); + r *= pown(ten, -e); dd_binary(r); + r = ldexp(r, -53); dd_binary(r); + } + else { + r *= pown(ten, -e); dd_binary(r); + } + } + else { + if (e > 0) { + if (e > 300) { + r = ldexp(r, -53); dd_binary(r); + r /= pown(ten, e); dd_binary(r); + r = ldexp(r, 53); dd_binary(r); + } + else { + r /= pown(ten, -e); dd_binary(r); + } + } + } + } + } +} + + + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "double-double (dd) experiments"; + int nrOfFailedTestCases = 0; + + auto oldPrec = std::cout.precision(); + + std::cout << "Smallest normal number progressions\n"; + { + constexpr double smallestNormal = std::numeric_limits::min(); + dd a(smallestNormal); + for (int i = 0; i < 10; ++i) { + ReportValue(a); + a *= 2.0; + } + + } + + std::cout << "Setting float bits\n"; + { + float v{0.0f}; + setbit(v,31); + ReportValue(v); + setbit(v,23); // set min normal + ReportValue(v); + setbit(v,23,false); setbit(v,0); // set smallest denorm + ReportValue(v); + } + std::cout << "Setting double bits\n"; + { + double v{0.0}; + setbit(v,63); + ReportValue(v); + setbit(v,52); // set min normal + ReportValue(v); + setbit(v,52,false); setbit(v,0); // set smallest denorm + ReportValue(v); + } + std::cout << "Setting double-double bits\n"; + { + dd v{0.0}; + v.setbit(127); + ReportValue(v); + v.setbit(116); // set min normal + ReportValue(v); + v.setbit(116,false); v.setbit(64); // set smallest denorm + ReportValue(v); + } + + std::cout << "subnormal exponent adjustment\n"; + { + constexpr double smallestNormal = std::numeric_limits::min(); + dd a{ smallestNormal }; + for (int i = 0; i < 5; ++i) { + adjust(a); + a /= 2.0; + } + a = smallestNormal; + for (int i = 0; i < 5; ++i) { + adjust(a); + a *= 2.0; + } + + } + + std::cout << "+--------- doubledouble subnormal behavior --------+\n"; + { + constexpr double smallestNormal = std::numeric_limits::min(); + ReportValue(smallestNormal, "smallest normal"); + double ulpAtSmallestNormal = ulp(smallestNormal); + ReportValue(ulpAtSmallestNormal, "ulpAtSmallestNormal"); + double subnormal = smallestNormal / 2.0; + std::cout << to_binary(subnormal) << " : " << subnormal << '\n'; + dd a(smallestNormal + ulpAtSmallestNormal); + for (int i = 0; i < 10/*106*/; ++i) { + std::string tag = "pow(a, -" + std::to_string(i) + ")"; + ReportValue(a, tag); + a /= 2.0; + } + } + + std::cout << "--------- decimal string rounding -------------\n"; + { + dd a{}; + int precision = 7; + int nrDigits = precision + 7; + char* s = new char[nrDigits + 1ull]; + int decimalPoint; + + s[0] = '1'; + for (int i = 1; i < nrDigits-1; ++i) { + s[i] = '5'; + } + s[nrDigits - 1] = '\0'; + std::cout << "input digits : " << s << '\n'; + decimalPoint = 7; // 15555.5 + a.round_string(s, precision, &decimalPoint); + std::cout << "rounded digits : " << s << " : decimal point at " << decimalPoint << '\n'; + delete[] s; + } + + std::cout << std::setprecision(oldPrec); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << "Caught ad-hoc exception: " << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Caught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "caught unknown exception" << std::endl; + return EXIT_FAILURE; +}