From 9b45f14d1d634ade4c44e47bdb1a38b168bfb10b Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Thu, 31 Oct 2024 16:24:29 -0400 Subject: [PATCH] WIP: efloat construction, initialization, and copying --- elastic/efloat/api/api.cpp | 39 +++++-- .../universal/number/efloat/attributes.hpp | 4 +- .../universal/number/efloat/efloat_impl.hpp | 107 +++++++++++++----- 3 files changed, 112 insertions(+), 38 deletions(-) diff --git a/elastic/efloat/api/api.cpp b/elastic/efloat/api/api.cpp index 092c2a55..dfab688e 100644 --- a/elastic/efloat/api/api.cpp +++ b/elastic/efloat/api/api.cpp @@ -32,20 +32,43 @@ try { } // construction, initialization, and copy construction + std::cout << "+--------- efloat construction, initialization, and copy construction\n"; { using TestType = efloat; - TestType a{ 1.0f }, b(2.0); + TestType a{ 1.5f }, b(2.5); #if LONG_DOUBLE_SUPPORT - TestType c{ 4.0l }; + TestType c{ 4.5l }; #else - TestType c{ 4.0 }; + TestType c{ 4.5 }; #endif - std::cout << "a : " << to_triple(a) << '\n'; - std::cout << "b : " << to_triple(b) << '\n'; - c -= a + b - 1.0; - std::cout << "c : " << to_triple(c) << '\n'; - + TestType d(c); + + std::cout << "a : " << to_triple(a) << " : " << a.significant() << " : " << double(a) << '\n'; + std::cout << "b : " << to_triple(b) << " : " << b.significant() << " : " << double(b) << '\n'; + std::cout << "c : " << to_triple(c) << " : " << c.significant() << " : " << double(c) << '\n'; + std::cout << "d : " << to_triple(d) << " : " << d.significant() << " : " << double(c) << '\n'; + } + + // manipulators + std::cout << "+--------- efloat manipulators\n"; + { + using TestType = efloat; + + float_decoder d; + d.parts.sign = false; + d.parts.exponent = ieee754_parameter::bias + 64; + d.parts.fraction = 0x7FFFFu << 8; // these are just the fraction bits, no hidden bit + std::cout << "fraction bits : " << to_binary(d.parts.fraction, true) << '\n'; + float f = d.f; + std::cout << "floating point : " << to_binary(f, true) << " : " << f << '\n'; + + TestType a{ f }; + std::cout << "efloat triple : " << to_triple(a) << " : " << a.significant() << " : " << double(a) << '\n'; + std::cout << "sign : " << sign(a) << '\n'; + std::cout << "scale : 2^" << scale(a) << '\n'; + std::cout << "significant : " << significant(a) << "f\n"; + std::cout << "significant : " << significant(a) << '\n'; } // default behavior diff --git a/include/universal/number/efloat/attributes.hpp b/include/universal/number/efloat/attributes.hpp index 27c36e89..168a499c 100644 --- a/include/universal/number/efloat/attributes.hpp +++ b/include/universal/number/efloat/attributes.hpp @@ -17,6 +17,8 @@ namespace sw { namespace universal { inline int64_t scale(const efloat& v) { return v.scale(); } - inline std::vector significant(const efloat& v) { return v.bits(); } + template::value, Real >::type> + inline Real significant(const efloat& v) { return Real(v); } }} // namespace sw::universal diff --git a/include/universal/number/efloat/efloat_impl.hpp b/include/universal/number/efloat/efloat_impl.hpp index 75640616..3e9aae6d 100644 --- a/include/universal/number/efloat/efloat_impl.hpp +++ b/include/universal/number/efloat/efloat_impl.hpp @@ -20,28 +20,29 @@ /* The efloat arithmetic can be configured to: - throw exceptions on invalid arguments and operations -- return a signalling NaN +- return a signaling NaN Compile-time configuration flags are used to select the exception mode. -Run-time configuration is used to select modular vs saturation arithmetic. -You need the exception types defined, but you have the option to throw them +The exception types are defined, but you have the option to throw them */ #include namespace sw { namespace universal { -// forward references -class efloat; -inline efloat& convert(int64_t v, efloat& result); -inline efloat& convert_unsigned(uint64_t v, efloat& result); -bool parse(const std::string& number, efloat& v); +enum class FloatingPointState { + Zero, + Normal, + SignalingNaN, // let's use the US English spelling + QuietNaN, + Infinite +}; // efloat is an adaptive precision linear floating-point type class efloat { public: - efloat() : _sign(false), exp(0), limb{ 0 } { } + efloat() : _state{ FloatingPointState::Zero }, _sign{ false }, _exponent{ 0 }, _limb{ 0 } { } efloat(const efloat&) = default; efloat(efloat&&) = default; @@ -120,7 +121,7 @@ class efloat { } // modifiers - void clear() { _sign = false; exp = 0; limb.clear(); } + void clear() { _state = FloatingPointState::Normal; _sign = false; _exponent = 0; _limb.clear(); } void setzero() { clear(); } efloat& assign(const std::string& txt) { @@ -128,22 +129,55 @@ class efloat { } // selectors - bool iszero() const { return !_sign && limb.size() == 0; } - bool isone() const { return true; } - bool isodd() const { return false; } - bool iseven() const { return !isodd(); } - bool ispos() const { return !_sign; } - bool isneg() const { return _sign; } + bool iszero() const noexcept { return _state == FloatingPointState::Zero; } + bool isone() const noexcept { return (_state == FloatingPointState::Normal && !_sign && _exponent == 0 && _limb.size() == 1 && _limb[0] == 0x8000'000); } + bool isodd() const noexcept { return false; } + bool iseven() const noexcept { return !isodd(); } + bool ispos() const noexcept { return (_state == FloatingPointState::Normal && !_sign); } + bool isneg() const noexcept { return (_state == FloatingPointState::Normal && _sign); } // value information selectors - int sign() const { return (_sign ? -1 : 1); } - int64_t scale() const { return exp; } - std::vector bits() const { return limb; } + int sign() const noexcept { return (_sign ? -1 : 1); } + int64_t scale() const noexcept { return _exponent; } + double significant() const noexcept { + // efloat is a normalized floating-point, thus the significant falls in the range [1.0, 2.0) + double v{ 0.0 }; + if (_state == FloatingPointState::Normal) { + // build a 64-bit bit representation + uint64_t raw{ 0 }; + switch (_limb.size()) { + case 0: + break; + case 1: + raw = _limb[0]; + raw <<= 32; + break; + case 2: + default: + raw = _limb[0]; + raw <<= 32; + raw |= _limb[1]; + break; + } + raw &= 0x7FFF'FFFF'FFFF'FFFF; // remove hidden bit + if (raw > 0) { + v = double(raw)/ 9223372036854775808.0; + } + v += 1.0; + } + // else { + // Zero, NaN or Infinity will return a significant value of 0.0 + // } + + return v; + } + std::vector bits() const { return _limb; } protected: - bool _sign; // sign of the number: -1 if true, +1 if false, zero is positive - int64_t exp; // exponent of the number - std::vector limb; // limbs of the representation + bool _sign; // sign of the number: -1 if true, +1 if false, zero is positive + int64_t _exponent; // exponent of the number + std::vector _limb; // limbs of the representation + FloatingPointState _state; // exceptional state // HELPER methods @@ -177,17 +211,17 @@ class efloat { efloat& convert_ieee754(Real rhs) noexcept { clear(); _sign = sw::universal::sign(rhs); - exp = sw::universal::scale(rhs); + _exponent = sw::universal::scale(rhs); if constexpr (sizeof(Real) == 4) { uint32_t bits = sw::universal::_extractSignificant(rhs); bits <<= 8; // 32 - 23 = 9 bits to get the hidden bit to land on bit 31 - limb.push_back(bits); + _limb.push_back(bits); } else if constexpr (sizeof(Real) == 8) { uint64_t bits = sw::universal::_extractSignificant(rhs); bits <<= 11; // 64 - 52 = 12 bits to get the hidden bit to land on bit 63 - limb.push_back(static_cast(bits >> 32)); - limb.push_back(static_cast(bits & 0xFFFF'FFFF)); + _limb.push_back(static_cast(bits >> 32)); + _limb.push_back(static_cast(bits & 0xFFFF'FFFF)); } else { static_assert(true); @@ -200,9 +234,24 @@ class efloat { template::value, Real >::type> Real convert_to_ieee754() const noexcept { - float f{ 0 }; - - return Real(f); + Real v{ 0.0 }; + switch (_state) { + case FloatingPointState::Zero: + break; + case FloatingPointState::QuietNaN: + v = std::numeric_limits::quiet_NaN(); + break; + case FloatingPointState::SignalingNaN: + v = std::numeric_limits::signaling_NaN(); + break; + case FloatingPointState::Infinite: + v = (_sign ? -std::numeric_limits::infinity() : +std::numeric_limits::infinity()); + break; + case FloatingPointState::Normal: + Real bla = Real(significant()); + v = Real(sign()) * std::pow(Real(2.0), Real(scale())) * Real(significant()); + } + return v; } private: