Simple C++ header-only library for Fixed-Point Number operations
C version here, designed to be compact
- include/FixedPointNumber.hpp
- the header-only library of
class FixedPointNumber
- the header-only library of
- src/FixedPointNumberTest.cpp
- the test of
class FixedPointNumber
- the test of
- src/FixedPointNumberDemo.cpp
- the demo of 1D-convolution using
class FixedPointNumber
- more examples will be listed below
- the demo of 1D-convolution using
- makefile
Include library
#include "FixedPointNumber.hpp"
MUST be compiled with C++17 or above, e.g.,
g++ YourCPP.cpp --std=c++17
sign_bit
is always of width 1int_bits
is the width of integer part bitsfrac_bits
is the width of fraction part bits
The below example instantiates a FixedPointNumber
which has
- 1bit
sign-bit
- Xbits
int-bit
- Ybits
frac-bit
FixedPointNumber<X, Y> fp(...);
And the constructor arguments can be of type
- uint32_t
- int32_t
- double
- FixedPointNumber<V, W>
- This will construct this
FixedPointNumber<X, Y>
from the value of another object which is aFixedPointNumber<V, W>
- This will construct this
e.g.
FixedPointNumber<3, 16> fp1(0xFFF9EU);
FixedPointNumber<7, 8> fp2(0x0067);
FixedPointNumber<7, 8> fp3(1.2345678);
FixedPointNumber<3, 16> fp4(fp3);
// support operator=
FixedPointNumber<3, 16> fp5 = 1.2345678;
FixedPointNumber<3, 16> fp6 = fp1;
- to_double
FixedPointNumber<3, 16> fp(3.14159265359);
cout << setprecision(10) << fp.to_double() << endl;
// output:
// 3.141586304
- get_value
FixedPointNumber<7, 8> fp(0x1234);
cout << "0x" << hex << fp.get_value() << endl;
// output:
// 0x1234
- operator- (negative)
FixedPointNumber<3, 16> fp(3.14159265359);
cout << setprecision(10) << (-fp).to_double() << endl;
// output:
// -3.141586304
- operator+
FixedPointNumber<3, 16> fp1(3.14159265359);
FixedPointNumber<3, 16> fp2(2.71828182846);
cout << setprecision(10) << (fp1+fp2).to_double() << endl;
FixedPointNumber<3, 16> fp3(3.14159265359);
FixedPointNumber<7, 8> fp4(2.71828182846);
cout << setprecision(10) << (fp3+fp4).to_double() << endl; // result will be FixedPointNumber<3, 16>
cout << setprecision(10) << (fp4+fp3).to_double() << endl; // result will be FixedPointNumber<7, 8>
// output:
// 5.859863281
// 5.856430054
// 5.85546875
- operator*
FixedPointNumber<7, 8> fp1(3.14159265359);
FixedPointNumber<7, 8> fp2(2.71828182846);
cout << setprecision(10) << (fp1*fp2).to_double() << endl;
FixedPointNumber<7, 8> fp3(3.14159265359);
FixedPointNumber<5, 10> fp4(2.71828182846);
cout << setprecision(10) << (fp3*fp4).to_double() << endl; // result will be FixedPointNumber<7, 8>
cout << setprecision(10) << (fp4*fp3).to_double() << endl; // result will be FixedPointNumber<5, 10>
// output:
// 8.5234375
// 8.5234375
// 8.53515625
- operator<<
FixedPointNumber<7, 8> fp1(0x1234);
cout << fp1 << endl;
cout << fp1.to_double() << endl;
FixedPointNumber<7, 8> fp2(18.203125);
cout << fp2 << endl;
cout << fp2.to_double() << endl;
// output:
// 0x1234
// 18.203125
// 0x1234
// 18.203125