Skip to content

Commit

Permalink
[core] Convert operator supports u2, u3, u6 types (openvinotoolkit#…
Browse files Browse the repository at this point in the history
…23490)

### Details:
 - Add new types `u2, u3, u6` to Convert operator.
- Expand `element::Iterator` to support NF4 and `BitProxy` to support
NF4 conversion.
 - Update tensor to calculate correctly byte size for `u3, u6` types.
- Fix NF4 <-> conversion to always use byte pack/unpack and quantization
when convert to/from floating point. In future the conversion for NF4
will be limited to f32 -> NF4.

### Tickets:
 - [CVS-127000](https://jira.devtools.intel.com/browse/CVS-127000)
 - [CVS-128024](https://jira.devtools.intel.com/browse/CVS-128024)

---------

Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com>
  • Loading branch information
2 people authored and bbielawx committed Apr 12, 2024
1 parent 859d90a commit 4c396bd
Show file tree
Hide file tree
Showing 10 changed files with 1,061 additions and 133 deletions.
84 changes: 81 additions & 3 deletions src/core/dev_api/openvino/core/type/element_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/core/type/nf4.hpp"

namespace ov {
namespace util {
Expand Down Expand Up @@ -96,6 +97,11 @@ constexpr size_t bit_width<Type_t::u4>() {
return 4;
}

template <>
constexpr size_t bit_width<Type_t::nf4>() {
return 4;
}

template <>
constexpr size_t bit_width<Type_t::i4>() {
return 4;
Expand Down Expand Up @@ -147,6 +153,12 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
return (*m_ptr >> m_bit_shift) & value_mask;
}

void set_bit_value(uint8_t value) {
constexpr auto value_mask = util::make_n_bit_mask(m_bits);
*m_ptr &= ~(value_mask << m_bit_shift);
*m_ptr |= value << m_bit_shift;
}

public:
using value_type = typename std::decay<T>::type; //!< Fundamental type of bound to BitProxy.

Expand Down Expand Up @@ -182,6 +194,19 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
return static_cast<value_type>(get_bit_value());
}

/**
* @brief Converts to float for NF4.
*
* @note Implementation aligned to ConvertNF4::unpack, de-quantization applied only when converting to
* floating point. For integral types get bit value.
*
* @return Converted NF4 value to float.
*/
template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
operator float() const {
return ConvertNF4::dequantize(get_bit_value());
}

/**
* @brief Converts to fundamental type.
*
Expand All @@ -207,8 +232,17 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
*/
BitProxy<T, ET>& operator=(const value_type v) {
constexpr auto value_mask = util::make_n_bit_mask(m_bits);
*m_ptr &= ~(value_mask << m_bit_shift);
*m_ptr |= (static_cast<uint8_t>(v) & value_mask) << m_bit_shift;
set_bit_value(static_cast<uint8_t>(v) & value_mask);
return *this;
}

/**
* @brief Sets current NF4 value from float using qunatization.
* @param v Value to be set.
*/
template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
BitProxy<T, ET>& operator=(const float v) {
set_bit_value(ConvertNF4::quantize(v));
return *this;
}
};
Expand Down Expand Up @@ -490,13 +524,57 @@ class Iterator {
*
* @tparam ET Type of ov::element::Type_t.
* @tparam T Type of pointer data. Must be fundamental type of ET.
*
* @param ptr Pointer to data.
* @return Element iterator for type ET.
*/
template <Type_t ET, class T, typename std::enable_if<!is_byte_type(ET) && ET != string>::type* = nullptr>
constexpr Iterator<ET, T> iterator(T* ptr) {
return {ptr};
}

/**
* @brief Make iterator from pointer for standard types.
*
* To have common interface for all ov::element::Type. Just return input pointer.
*
* @tparam ET Type of ov::element::Type_t.
* @tparam T Type of pointer data. Must be fundamental type of ET.
*
* @param ptr Pointer to data.
* @return Element iterator same as input pointer.
*/
template <Type_t ET, class T, typename std::enable_if<is_byte_type(ET) || ET == string>::type* = nullptr>
constexpr T* iterator(T* ptr) {
return ptr;
}

/**
* @brief Make iterator from void pointer.
*
* Data will be reinterpreted using fundamental type for ov::element::Type.
*
* @tparam ET OpenVINO element type.
* @param ptr Pointer to data.
* @return Iterator for given ET.
*/
template <Type_t ET, class T = ov::fundamental_type_for<ET>>
constexpr auto iterator(void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
return iterator<ET, T>(reinterpret_cast<T*>(ptr));
}

/**
* @brief Make iterator from constant void pointer.
*
* Data will be reinterpreted using fundamental type for ov::element::Type.
*
* @tparam ET OpenVINO element type.
* @param ptr Pointer to data.
* @return Iterator for given ET.
*/
template <Type_t ET, class T = typename std::add_const<ov::fundamental_type_for<ET>>::type>
constexpr auto iterator(const void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
return iterator<ET, T>(reinterpret_cast<T*>(ptr));
}
} // namespace element
} // namespace ov
105 changes: 24 additions & 81 deletions src/core/reference/include/openvino/reference/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,94 +7,23 @@
#include <algorithm>
#include <cstddef>

#include "openvino/core/type/element_iterator.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/core/type/float16.hpp"
#include "openvino/core/type/nf4.hpp"

namespace ov {
namespace reference {
namespace detail {
inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) {
const size_t byte_idx = idx / 8;
const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits
if (val) {
buf[byte_idx] |= (1 << bit_idx);
} else {
buf[byte_idx] &= ~(1 << bit_idx);
}
}

inline uint8_t get_u1(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 8;
const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits
return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0;
}

inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed
buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's
}

inline uint8_t get_u4(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
return (buf[byte_idx] >> bit_shift) & 0xF;
}

inline void set_i4(uint8_t* buf, size_t idx, int8_t val) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed
buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's
}

inline int8_t get_i4(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF;
if (val & 0x08) { // negative number
val |= 0xF0;
}
return val;
}
template <typename TO, typename TI>
TO get_value(const uint8_t* buf, size_t idx, element::Type from_type) {
if (from_type == element::u1) {
return detail::get_u1(buf, idx);
}

if (from_type == element::u4) {
return detail::get_u4(buf, idx);
}

if (from_type == element::i4) {
return detail::get_i4(buf, idx);
}

auto v = reinterpret_cast<const TI*>(buf);
return static_cast<TO>(v[idx]);
template <class ElementIter>
constexpr bool is_nf4_iterator() {
using it = typename std::decay<ElementIter>::type;
using T = fundamental_type_for<element::nf4>;
return std::is_same<it, element::Iterator<element::nf4, const T>>::value ||
std::is_same<it, element::Iterator<element::nf4, T>>::value;
}

template <typename TI, typename TO>
void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type, element::Type_t dst_type) {
const uint8_t* input = reinterpret_cast<const uint8_t*>(arg);
uint8_t* output = reinterpret_cast<uint8_t*>(out);
for (size_t i = 0; i < count; ++i) {
if (dst_type == element::u1) {
detail::set_u1(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
} else if (dst_type == element::u4) {
detail::set_u4(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
} else if (dst_type == element::i4) {
detail::set_i4(output, i, detail::get_value<int8_t, TI>(input, i, src_type));
} else if (src_type == element::nf4) {
ov::ConvertNF4::unpack(out, input, i);
} else {
out[i] = detail::get_value<TO, TI>(input, i, src_type);
}
}
}
namespace reference {
namespace detail {

template <typename TI, typename TO>
typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
Expand All @@ -107,6 +36,20 @@ typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const T
}
} // namespace detail

template <typename InputIt, typename OutputIt>
void convert(InputIt arg, OutputIt out, const size_t count) {
using IN_T = typename std::iterator_traits<InputIt>::value_type;
using OUT_T = typename std::iterator_traits<OutputIt>::value_type;

// Deduce types for NF4 <-> floating point conversion to use quantization.
using From = typename std::
conditional<is_nf4_iterator<InputIt>() && !std::is_integral<OUT_T>::value, const float, IN_T>::type;
using To =
typename std::conditional<is_nf4_iterator<OutputIt>() && !std::is_integral<IN_T>::value, float, OUT_T>::type;

std::transform(arg, arg + count, out, detail::convert<From, To>);
}

template <typename TI, typename TO>
void convert(const TI* arg, TO* out, const size_t count) {
std::transform(arg, arg + count, out, detail::convert<TI, TO>);
Expand All @@ -130,7 +73,7 @@ void convert<float16, int8_t>(const float16* arg, int8_t* out, size_t count);
// Count how many f32 values is out of normal finite numbers range when converted to f16
size_t count_out_of_f16_range(const float* arg, size_t count);

// Convert values from f32 to f16 with claming to f16 min/max when value is out of normal finite numbers range
// Convert values from f32 to f16 with clamping to f16 min/max when value is out of normal finite numbers range
void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t count);
} // namespace reference
} // namespace ov
37 changes: 9 additions & 28 deletions src/core/src/op/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,50 +15,31 @@ namespace ov {
namespace op {
namespace convert {

constexpr bool is_lp_type(const element::Type_t et) {
return (et == element::i4) || (et == element::u1) || (et == element::u4) || (et == element::nf4);
}

#define CONVERT_ET_LIST \
boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u4, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2
boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u2, u3, u4, u6, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2

struct Evaluate : public element::NoAction<bool> {
using element::NoAction<bool>::visit;
template <element::Type_t ET, class TI = fundamental_type_for<ET>>

template <element::Type_t ET_IN, class TI = fundamental_type_for<ET_IN>>
static result_type visit(const Tensor& arg, Tensor& out, const size_t count) {
using namespace ov::element;
return IF_TYPE_OF(Convert_out,
CONVERT_ET_LIST,
EvalByOutputType<is_lp_type(ET)>,
EvalByOutputType,
out.get_element_type(),
reinterpret_cast<const TI*>(arg.data()),
iterator<ET_IN>(reinterpret_cast<const TI*>(arg.data())),
out,
count,
ET);
count);
}

private:
template <bool IS_ARG_ET_LP>
struct EvalByOutputType : public element::NoAction<bool> {
using element::NoAction<bool>::visit;

template <element::Type_t ET,
class T,
class T_ET,
class U = ov::fundamental_type_for<ET>,
typename std::enable_if<is_lp_type(ET) || IS_ARG_ET_LP>::type* = nullptr>
static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&& arg_et) {
reference::detail::lp_convert(arg, reinterpret_cast<U*>(out.data()), count, arg_et, ET);
return true;
}

template <element::Type_t ET,
class T,
class T_ET,
class U = ov::fundamental_type_for<ET>,
typename std::enable_if<!is_lp_type(ET) && !IS_ARG_ET_LP>::type* = nullptr>
static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&&) {
reference::convert(arg, out.data<U>(), count);
template <element::Type_t ET_OUT, class InputIter, class TO = ov::fundamental_type_for<ET_OUT>>
static result_type visit(InputIter arg, Tensor& out, const size_t count) {
reference::convert(arg, element::iterator<ET_OUT>(out.data()), count);
return true;
}
};
Expand Down
16 changes: 15 additions & 1 deletion src/core/src/runtime/itensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

#include "openvino/core/except.hpp"
#include "openvino/core/shape_util.hpp"
#include "openvino/core/type/element_iterator.hpp"
#include "openvino/runtime/allocator.hpp"
#include "openvino/runtime/iremote_tensor.hpp"
#include "openvino/runtime/make_tensor.hpp"
#include "openvino/runtime/properties.hpp"

namespace ov {
Expand All @@ -21,7 +23,19 @@ size_t ITensor::get_size() const {
}

size_t ITensor::get_byte_size() const {
return (get_size() * get_element_type().bitwidth() + 8 - 1) / 8;
const auto& et = get_element_type();
auto byte_size = get_size() * et.bitwidth();
if (element::is_split_bit_type(et)) {
constexpr size_t storage_unit_size = 24;
byte_size += storage_unit_size - 1;
byte_size /= storage_unit_size;
byte_size *= 3;
} else {
constexpr size_t storage_unit_size = 8;
byte_size += storage_unit_size - 1;
byte_size /= storage_unit_size;
}
return byte_size;
}

bool ITensor::is_continuous() const {
Expand Down
Loading

0 comments on commit 4c396bd

Please sign in to comment.