Skip to content

Commit

Permalink
WIP: conversion of subnormals to efloat
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravenwater committed Nov 1, 2024
1 parent 9b45f14 commit 307ab83
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 29 deletions.
31 changes: 29 additions & 2 deletions elastic/efloat/api/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ try {
float_decoder d;
d.parts.sign = false;
d.parts.exponent = ieee754_parameter<float>::bias + 64;
d.parts.fraction = 0x7FFFFu << 8; // these are just the fraction bits, no hidden bit
d.parts.fraction = 0x7F'FF00u; // these are just the 23 fraction bits, no hidden bit
std::cout << "fraction bits : " << to_binary(d.parts.fraction, true) << '\n';
float f = d.f;
std::cout << "floating point : " << to_binary(f, true) << " : " << f << '\n';
Expand All @@ -72,9 +72,36 @@ try {
}

// default behavior
std::cout << "+--------- Default efloat has no subnormals, no supernormals and is not saturating\n";
std::cout << "+--------- Default efloat has no subnormals\n";
{
using TestType = efloat;

// create a subnormal
float v;
setFields(v, false, 0u, 0x00'0001u); // smallest subnormal single precision float
// bool s{ false };
// uint32_t e{ 0 };
// uint32_t f{ 0 };
// uint32_t bits{ 0 };
// extractFields(v, s, e, f, bits);
std::cout << "subnormal : " << to_binary(v) << " : " << v << '\n';

TestType a{ v };

std::cout << "efloat triple : " << to_triple(a) << " : " << a.significant() << " : " << double(a) << '\n';
std::cout << "sign : " << sign(a) << '\n';
std::cout << "scale : 2^" << scale(a) << '\n';
std::cout << "significant : " << significant<float>(a) << "f\n";

double dv;
setFields(dv, true, 0ull, 0x1ull);
std::cout << "floating point : " << to_binary(dv, true) << " : " << dv << '\n';
a = dv;

std::cout << "efloat triple : " << to_triple(a) << " : " << a.significant() << " : " << double(a) << '\n';
std::cout << "sign : " << sign(a) << '\n';
std::cout << "scale : 2^" << scale(a) << '\n';
std::cout << "significant : " << significant<float>(a) << "f\n";
}

// explicit configuration
Expand Down
24 changes: 6 additions & 18 deletions include/universal/native/extract_fields.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,8 @@ namespace sw { namespace universal {
#if BIT_CAST_IS_CONSTEXPR
#include <bit> // C++20 bit_cast

template<typename Real>
inline BIT_CAST_CONSTEXPR void extractFields(Real value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
if (value == 0) {
s = false;
rawExponentBits = 0ull;
rawFractionBits = 0ull;
}
if (value < 0) s = true;
}

// specialization to extract fields from a float
template<>
inline BIT_CAST_CONSTEXPR void extractFields(float value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
inline BIT_CAST_CONSTEXPR void extractFields(float value, bool& s, uint32_t& rawExponentBits, uint32_t& rawFractionBits, uint32_t& bits) noexcept {
uint32_t bc = std::bit_cast<uint32_t, float>(value);
s = (ieee754_parameter<float>::smask & bc);
rawExponentBits = (ieee754_parameter<float>::emask & bc) >> ieee754_parameter<float>::fbits;
Expand All @@ -34,7 +23,6 @@ namespace sw { namespace universal {
}

// specialization to extract fields from a double
template<>
inline BIT_CAST_CONSTEXPR void extractFields(double value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
uint64_t bc = std::bit_cast<uint64_t, double>(value);
s = (ieee754_parameter<double>::smask & bc);
Expand All @@ -48,7 +36,7 @@ namespace sw { namespace universal {
// Clang bit_cast<> can't deal with long double

#if defined(LONG_DOUBLE_DOWNCAST)
template<>

inline BIT_CAST_CONSTEXPR void extractFields(long double value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
double d = static_cast<double>(value);
uint64_t bc = std::bit_cast<uint64_t, double>(d);
Expand All @@ -61,7 +49,7 @@ namespace sw { namespace universal {
/*
ETLO 8/1/2024: not able to make std::bit_cast<> work for long double
// specialization to extract fields from a long double
template<>
inline BIT_CAST_CONSTEXPR void extractFields(long double value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
struct blob {
std::uint64_t hi;
Expand Down Expand Up @@ -92,12 +80,12 @@ namespace sw { namespace universal {
////////////////////////////////////////////////////////////////////////
// nonconst extractFields for single precision floating-point

inline void extractFields(float value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits, uint64_t& bits) noexcept {
inline void extractFields(float value, bool& s, uint32_t& rawExponentBits, uint32_t& rawFractionBits, uint32_t& bits) noexcept {
float_decoder decoder;
decoder.f = value;
s = decoder.parts.sign ? true : false;
rawExponentBits = static_cast<uint64_t>(decoder.parts.exponent);
rawFractionBits = static_cast<uint64_t>(decoder.parts.fraction);
rawExponentBits = decoder.parts.exponent;
rawFractionBits = decoder.parts.fraction;
bits = uint64_t(decoder.bits);
}

Expand Down
24 changes: 23 additions & 1 deletion include/universal/native/set_fields.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@ namespace sw { namespace universal {
v = std::bit_cast<double, uint64_t>(raw);
}

////////////////////////////////////////////////////////////////////////
// constexpr setFields on single precision floating-point

inline void setFields(float& v, bool s, uint32_t rawExponentBits, uint32_t rawFractionBits) noexcept {
uint32_t raw = (rawExponentBits & 0xFF) << 23;
raw |= (rawFractionBits & 0x7FFFFF);
uint32_t mask = 0x8000'0000;
if (s) raw |= mask;
v = std::bit_cast<float, uint32_t>(raw);
}

////////////////////////////////////////////////////////////////////////
// constexpr setFields on double precision floating-point

inline void setFields(double& v, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept {
uint64_t raw = (rawExponentBits & 0xFF) << 52;
raw |= (rawFractionBits & 0xF'FFFF'FFFF'FFFF);
uint64_t mask = 0x8000'0000'0000'0000;
if (s) raw |= mask;
v = std::bit_cast<double, uint64_t>(raw);
}

#if LONG_DOUBLE_SUPPORT

// Clang bit_cast<> can't deal with long double
Expand Down Expand Up @@ -129,7 +151,7 @@ namespace sw { namespace universal {
////////////////////////////////////////////////////////////////////////
// nonconst setFields on single precision floating-point

inline void setFields(float& value, bool s, uint64_t rawExponentBits, uint64_t rawFractionBits) noexcept {
inline void setFields(float& value, bool s, uint32_t rawExponentBits, uint32_t rawFractionBits) noexcept {
float_decoder decoder;
decoder.parts.sign = s;
decoder.parts.exponent = rawExponentBits & 0xFF;
Expand Down
2 changes: 1 addition & 1 deletion include/universal/number/efloat/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ namespace sw { namespace universal {

template<typename Real,
typename = typename std::enable_if< std::is_floating_point<Real>::value, Real >::type>
inline Real significant(const efloat& v) { return Real(v); }
inline Real significant(const efloat& v) { return static_cast<Real>(v.significant()); }

}} // namespace sw::universal
13 changes: 13 additions & 0 deletions include/universal/number/efloat/efloat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
#ifndef _EFLOAT_STANDARD_HEADER_
#define _EFLOAT_STANDARD_HEADER_

////////////////////////////////////////////////////////////////////////////////////////
/// COMPILATION DIRECTIVES TO DIFFERENT COMPILERS
#include <universal/utility/compiler.hpp>
#include <universal/utility/architecture.hpp>
#include <universal/utility/bit_cast.hpp>
#include <universal/utility/long_double.hpp>

////////////////////////////////////////////////////////////////////////////////////////
/// required std libraries
#include <iostream>
#include <iomanip>
#include <vector>

////////////////////////////////////////////////////////////////////////////////////////
/// BEHAVIORAL COMPILATION SWITCHES

Expand Down
63 changes: 56 additions & 7 deletions include/universal/number/efloat/efloat_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ class efloat {
std::vector<uint32_t> bits() const { return _limb; }

protected:
FloatingPointState _state; // exceptional state
bool _sign; // sign of the number: -1 if true, +1 if false, zero is positive
int64_t _exponent; // exponent of the number
std::vector<uint32_t> _limb; // limbs of the representation
FloatingPointState _state; // exceptional state

// HELPER methods

Expand Down Expand Up @@ -210,16 +210,66 @@ class efloat {
typename = typename std::enable_if< std::is_floating_point<Real>::value, Real >::type>
efloat& convert_ieee754(Real rhs) noexcept {
clear();
bool isSubnormal{ false };
switch (std::fpclassify(rhs)) {
case FP_ZERO:
_state = FloatingPointState::Zero;
_sign = false;
_exponent = 0;
// stay limbless
return *this;
case FP_NAN:
_sign = sw::universal::sign(rhs);
_state = (_sign ? FloatingPointState::SignalingNaN : FloatingPointState::QuietNaN);
_exponent = 0;
// stay limbless
return *this;
case FP_INFINITE:
_state = FloatingPointState::Infinite;
_sign = false;
_exponent = 0;
// stay limbless
return *this;
case FP_SUBNORMAL:
isSubnormal = true;
break;
case FP_NORMAL:
default:
break;
}

_sign = sw::universal::sign(rhs);
_exponent = sw::universal::scale(rhs);
_exponent = sw::universal::scale(rhs); // scale already deals with subnormal numbers
if constexpr (sizeof(Real) == 4) {
uint32_t bits = sw::universal::_extractSignificant<uint32_t, Real>(rhs);
bits <<= 8; // 32 - 23 = 9 bits to get the hidden bit to land on bit 31
uint32_t bits{ 0 };
if (isSubnormal) { // subnormal number
bits = sw::universal::_extractFraction<uint32_t, Real>(rhs);
bits <<= 8; // 31 - 23 = 8 bits to get the hidden bit to land on bit 31
uint32_t mask = 0x8000'0000;
while ((mask & bits) == 0) {
bits <<= 1;
}
}
else {
bits = sw::universal::_extractSignificant<uint32_t, Real>(rhs);
bits <<= 8; // 31 - 23 = 8 bits to get the hidden bit to land on bit 31
}
_limb.push_back(bits);
}
else if constexpr (sizeof(Real) == 8) {
uint64_t bits = sw::universal::_extractSignificant<uint64_t, Real>(rhs);
bits <<= 11; // 64 - 52 = 12 bits to get the hidden bit to land on bit 63
uint64_t bits{ 0 };
if (isSubnormal) { // subnormal number
bits = sw::universal::_extractFraction<uint64_t, Real>(rhs);
bits <<= 11; // 63 - 52 = 11 bits to get the hidden bit to land on bit 63
uint64_t mask = 0x8000'0000'0000'0000;
while ((mask & bits) == 0) {
bits <<= 1;
}
}
else {
bits = sw::universal::_extractSignificant<uint64_t, Real>(rhs);
bits <<= 11; // 63 - 52 = 11 bits to get the hidden bit to land on bit 63
}
_limb.push_back(static_cast<uint32_t>(bits >> 32));
_limb.push_back(static_cast<uint32_t>(bits & 0xFFFF'FFFF));
}
Expand Down Expand Up @@ -248,7 +298,6 @@ class efloat {
v = (_sign ? -std::numeric_limits<Real>::infinity() : +std::numeric_limits<Real>::infinity());
break;
case FloatingPointState::Normal:
Real bla = Real(significant());
v = Real(sign()) * std::pow(Real(2.0), Real(scale())) * Real(significant());
}
return v;
Expand Down

0 comments on commit 307ab83

Please sign in to comment.