Skip to content

Commit

Permalink
support compile-time bits on BitPacked mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Dec 23, 2021
1 parent e3c3a1c commit 4c98c67
Show file tree
Hide file tree
Showing 14 changed files with 356 additions and 111 deletions.
2 changes: 1 addition & 1 deletion examples/bitpackfloat/bitpackfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ auto main() -> int
constexpr auto exponentBits = 5;
constexpr auto mantissaBits = 13;
const auto mapping
= llama::mapping::BitPackedFloatSoA{exponentBits, mantissaBits, llama::ArrayExtents<llama::dyn>{N}, Vector{}};
= llama::mapping::BitPackedFloatSoA{llama::ArrayExtents<llama::dyn>{N}, exponentBits, mantissaBits, Vector{}};

auto view = llama::allocView(mapping);

Expand Down
3 changes: 2 additions & 1 deletion examples/bitpackint/bitpackint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ auto main() -> int
{
constexpr auto N = 128;
constexpr auto bits = 7;
const auto mapping = llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<1>, Vector>{bits, {N}};
const auto mapping
= llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<1>, Vector, llama::Constant<bits>>{{N}};

auto view = llama::allocView(mapping);

Expand Down
6 changes: 3 additions & 3 deletions examples/daxpy/daxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ set ylabel "runtime [s]"
llama::mapping::PreconfiguredAoS<>::type,
boost::mp11::mp_list<boost::mp11::mp_list<double, float>>>{extents},
plotFile);
daxpy_llama("Bitpack 52^{11}", llama::mapping::BitPackedFloatSoA{11, 52, extents, double{}}, plotFile);
daxpy_llama("Bitpack 23^{8}", llama::mapping::BitPackedFloatSoA{8, 23, extents, double{}}, plotFile);
daxpy_llama("Bitpack 10^{5}", llama::mapping::BitPackedFloatSoA{5, 10, extents, double{}}, plotFile);
daxpy_llama("Bitpack 52^{11}", llama::mapping::BitPackedFloatSoA{extents, 11, 52, double{}}, plotFile);
daxpy_llama("Bitpack 23^{8}", llama::mapping::BitPackedFloatSoA{extents, 8, 23, double{}}, plotFile);
daxpy_llama("Bitpack 10^{5}", llama::mapping::BitPackedFloatSoA{extents, 5, 10, double{}}, plotFile);

plotFile << R"(EOD
plot $data using 2:xtic(1)
Expand Down
7 changes: 6 additions & 1 deletion examples/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ namespace usellama
return "ByteSplit SoA MB";
if(m == 7)
return "BitPack SoA 11e4";
if(m == 8)
return "BitPack SoA 11e4 CT";
std::abort();
};
auto title = "LLAMA " + mappingName(Mapping);
Expand Down Expand Up @@ -150,7 +152,10 @@ namespace usellama
return llama::mapping::Bytesplit<ArrayExtents, Particle, llama::mapping::PreconfiguredSoA<>::type>{
extents};
if constexpr(Mapping == 7)
return llama::mapping::BitPackedFloatSoA<ArrayExtents, Particle>{4, 11, extents};
return llama::mapping::BitPackedFloatSoA<ArrayExtents, Particle>{extents, 4, 11};
if constexpr(Mapping == 8)
return llama::mapping::
BitPackedFloatSoA<ArrayExtents, Particle, llama::Constant<4>, llama::Constant<11>>{extents};
}();
if constexpr(DUMP_MAPPING)
std::ofstream(title + ".svg") << llama::toSvg(mapping);
Expand Down
45 changes: 45 additions & 0 deletions include/llama/Core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,4 +693,49 @@ namespace llama
/// Alias for ToT, adding `const` if FromT is const qualified.
template<typename FromT, typename ToT>
using CopyConst = std::conditional_t<std::is_const_v<FromT>, const ToT, ToT>;

/// Used as template argument to specify a constant/compile-time value.
template<auto V>
using Constant = std::integral_constant<decltype(V), V>;

namespace internal
{
/// Holds a value of type T. Is useful as a base class. Is specialized for llama::Constant to not store the
/// value at runtime. \tparam T Type of value to store. \tparam I Is used to disambiguate multiple BoxedValue
/// base classes.
template<typename T, int I = 0>
struct BoxedValue
{
BoxedValue() = default;

// we don't make this ctor explicit so a Value appearing in a ctor list can just be created by passing a T
BoxedValue(T value) : val(value)
{
}

constexpr auto value() const
{
return val;
}

private:
T val = {};
};

template<auto V, int I>
struct BoxedValue<Constant<V>, I>
{
BoxedValue() = default;

// we don't make this ctor explicit so a Value appearing in a ctor list can just be created by passing a T
BoxedValue(Constant<V>)
{
}

static constexpr auto value()
{
return V;
}
};
} // namespace internal
} // namespace llama
130 changes: 82 additions & 48 deletions include/llama/mapping/BitPackedFloatSoA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

#include "../ProxyRefOpMixin.hpp"
#include "BitPackedIntSoA.hpp"
#include "Common.hpp"

#include <algorithm>
#include <climits>
#include <cstdint>
#include <cstring>
Expand Down Expand Up @@ -78,12 +80,28 @@ namespace llama::mapping
return outFloat;
}

// TODO: Boost.Hana generalizes these sorts of computations on mixed constants and values
template<typename E, typename M>
auto integBits(E e, M m)
{
return llama::internal::BoxedValue{e.value() + m.value() + 1};
}

template<auto E, auto M>
auto integBits(llama::internal::BoxedValue<Constant<E>>, llama::internal::BoxedValue<Constant<M>>)
{
return llama::internal::BoxedValue<Constant<E + M + 1>>{};
}

/// A proxy type representing a reference to a reduced precision floating-point value, stored in a buffer at a
/// specified bit offset.
/// @tparam Integral Integral data type which can be loaded and store through this reference.
/// @tparam Float Floating-point data type which can be loaded and store through this reference.
/// @tparam StoredIntegralPointer Pointer to integral type used for storing the bits.
template<typename Float, typename StoredIntegralPointer>
struct BitPackedFloatRef : ProxyRefOpMixin<BitPackedFloatRef<Float, StoredIntegralPointer>, Float>
template<typename Float, typename StoredIntegralPointer, typename VHExp, typename VHMan>
struct LLAMA_DECLSPEC_EMPTY_BASES BitPackedFloatRef
: private VHExp
, private VHMan
, ProxyRefOpMixin<BitPackedFloatRef<Float, StoredIntegralPointer, VHExp, VHMan>, Float>
{
private:
static_assert(
Expand All @@ -95,52 +113,57 @@ namespace llama::mapping

using FloatBits = std::conditional_t<std::is_same_v<Float, float>, std::uint32_t, std::uint64_t>;

internal::BitPackedIntRef<FloatBits, StoredIntegralPointer> intref;
unsigned exponentBits = 0;
unsigned mantissaBits = 0;
BitPackedIntRef<
FloatBits,
StoredIntegralPointer,
decltype(integBits(std::declval<VHExp>(), std::declval<VHMan>()))>
intref;

public:
using value_type = Float;

LLAMA_FN_HOST_ACC_INLINE constexpr BitPackedFloatRef(
StoredIntegralPointer p,
std::size_t bitOffset,
unsigned exponentBits,
unsigned mantissaBits
StoredIntegralPointer p,
std::size_t bitOffset,
VHExp vhExp,
VHMan vhMan
#ifndef NDEBUG
,
StoredIntegralPointer endPtr
,
StoredIntegralPointer endPtr
#endif
)
: intref{p, bitOffset, exponentBits + mantissaBits + 1,
)
: VHExp{vhExp}
, VHMan{vhMan}
, intref{
p,
bitOffset,
integBits(vhExp, vhMan),
#ifndef NDEBUG
endPtr
endPtr
#endif
}
, exponentBits(exponentBits)
, mantissaBits(mantissaBits)
}
{
}

// NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
LLAMA_FN_HOST_ACC_INLINE constexpr operator Float() const
{
using Bits = internal::FloatBitTraits<Float>;
using Bits = FloatBitTraits<Float>;
const FloatBits packedFloat = intref;
const FloatBits unpackedFloat
= repackFloat(packedFloat, mantissaBits, exponentBits, Bits::mantissa, Bits::exponent);
= repackFloat(packedFloat, VHMan::value(), VHExp::value(), Bits::mantissa, Bits::exponent);
Float f;
std::memcpy(&f, &unpackedFloat, sizeof(Float));
return f;
}

LLAMA_FN_HOST_ACC_INLINE constexpr auto operator=(Float f) -> BitPackedFloatRef&
{
using Bits = internal::FloatBitTraits<Float>;
using Bits = FloatBitTraits<Float>;
FloatBits unpackedFloat = 0;
std::memcpy(&unpackedFloat, &f, sizeof(Float));
const FloatBits packedFloat
= repackFloat(unpackedFloat, Bits::mantissa, Bits::exponent, mantissaBits, exponentBits);
= repackFloat(unpackedFloat, Bits::mantissa, Bits::exponent, VHMan::value(), VHExp::value());
intref = packedFloat;
return *this;
}
Expand All @@ -152,33 +175,49 @@ namespace llama::mapping
/// IEEE 754. Infinity and NAN are supported. If the packed exponent bits are not big enough to hold a number, it
/// will be set to infinity (preserving the sign). If your record dimension contains non-floating-point types,
/// split them off using the \ref Split mapping first.
/// \tparam ExponentBits If ExponentBits is llama::Constant<N>, the compile-time N specifies the number of bits to
/// use to store the exponent. If ExponentBits is llama::Value<T>, the number of bits is specified at runtime,
/// passed to the constructor and stored as type T.
/// \tparam MantissaBits Like ExponentBits but for the mantissa bits.
/// \tparam LinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and how
/// big the linear domain gets.
/// \tparam StoredIntegral Integral type used as storage of reduced precision floating-point values.
// TODO(bgruber): we could also split each float in the record dimension into 3 integers, sign bit, exponent and
// mantissa. might not be efficient though
template<
typename TArrayExtents,
typename TRecordDim,
typename LinearizeArrayDimsFunctor = llama::mapping::LinearizeArrayDimsCpp,
typename ExponentBits = unsigned,
typename MantissaBits = unsigned,
typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
typename StoredIntegral = std::conditional_t<
boost::mp11::mp_contains<llama::FlatRecordDim<TRecordDim>, double>::value,
boost::mp11::mp_contains<FlatRecordDim<TRecordDim>, double>::value,
std::uint64_t,
std::uint32_t>>
struct BitPackedFloatSoA : TArrayExtents
struct LLAMA_DECLSPEC_EMPTY_BASES BitPackedFloatSoA
: TArrayExtents
, llama::internal::BoxedValue<ExponentBits, 0>
, llama::internal::BoxedValue<MantissaBits, 1>
{
private:
using VHExp = llama::internal::BoxedValue<ExponentBits, 0>;
using VHMan = llama::internal::BoxedValue<MantissaBits, 1>;

public:
using ArrayExtents = TArrayExtents;
using ArrayIndex = typename ArrayExtents::Index;
using RecordDim = TRecordDim;
static constexpr std::size_t blobCount = boost::mp11::mp_size<llama::FlatRecordDim<RecordDim>>::value;
static constexpr std::size_t blobCount = boost::mp11::mp_size<FlatRecordDim<RecordDim>>::value;

constexpr BitPackedFloatSoA() = default;

LLAMA_FN_HOST_ACC_INLINE
constexpr BitPackedFloatSoA(unsigned exponentBits, unsigned mantissaBits, ArrayExtents extents, RecordDim = {})
constexpr BitPackedFloatSoA(
ArrayExtents extents,
ExponentBits exponentBits = {},
MantissaBits mantissaBits = {},
RecordDim = {})
: ArrayExtents(extents)
, exponentBits{exponentBits}
, mantissaBits{mantissaBits}
, VHExp{exponentBits}
, VHMan{mantissaBits}
{
}

Expand All @@ -191,41 +230,36 @@ namespace llama::mapping
constexpr auto blobSize(std::size_t /*blobIndex*/) const -> std::size_t
{
constexpr auto bitsPerStoredIntegral = sizeof(StoredIntegral) * CHAR_BIT;
const auto bitsNeeded = LinearizeArrayDimsFunctor{}.size(extents()) * (exponentBits + mantissaBits + 1);
const auto bitsNeeded
= LinearizeArrayDimsFunctor{}.size(extents()) * (VHExp::value() + VHMan::value() + 1);
return roundUpToMultiple(bitsNeeded, bitsPerStoredIntegral) / CHAR_BIT;
}

template<std::size_t... RecordCoords>
static constexpr auto isComputed(llama::RecordCoord<RecordCoords...>)
static constexpr auto isComputed(RecordCoord<RecordCoords...>)
{
return true;
}

template<std::size_t... RecordCoords, typename Blobs>
LLAMA_FN_HOST_ACC_INLINE constexpr auto compute(
ArrayIndex ai,
llama::RecordCoord<RecordCoords...>,
Blobs& blobs) const
LLAMA_FN_HOST_ACC_INLINE constexpr auto compute(ArrayIndex ai, RecordCoord<RecordCoords...>, Blobs& blobs)
const
{
constexpr auto blob = llama::flatRecordCoord<RecordDim, llama::RecordCoord<RecordCoords...>>;
const auto bitOffset = LinearizeArrayDimsFunctor{}(ai, extents()) * (exponentBits + mantissaBits + 1);
constexpr auto blob = llama::flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
const auto bitOffset = LinearizeArrayDimsFunctor{}(ai, extents()) * (VHExp::value() + VHMan::value() + 1);

using QualifiedStoredIntegral = CopyConst<Blobs, StoredIntegral>;
using DstType = llama::GetType<RecordDim, llama::RecordCoord<RecordCoords...>>;
return internal::BitPackedFloatRef<DstType, QualifiedStoredIntegral*>{
using DstType = GetType<RecordDim, RecordCoord<RecordCoords...>>;
return internal::BitPackedFloatRef<DstType, QualifiedStoredIntegral*, VHExp, VHMan>{
reinterpret_cast<QualifiedStoredIntegral*>(&blobs[blob][0]),
bitOffset,
exponentBits,
mantissaBits
static_cast<VHExp>(*this),
static_cast<VHMan>(*this)
#ifndef NDEBUG
,
,
reinterpret_cast<QualifiedStoredIntegral*>(&blobs[blob][0] + blobSize(blob))
#endif
};
}

private:
unsigned exponentBits = 0;
unsigned mantissaBits = 0;
};
} // namespace llama::mapping
Loading

0 comments on commit 4c98c67

Please sign in to comment.