Skip to content

Commit

Permalink
MSVC: Resolve function overload resolution confusion
Browse files Browse the repository at this point in the history
MSVC would take the wrong function template overload and then error on
template instantiation. I didn't find a workaround to have it pick the
right overload other than refactoring the mechanism to not use enable_if
on L but rather to encode L in a tag argument.

Refs: gh-119
Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Oct 7, 2016
1 parent d0f1f52 commit ee46cdc
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 90 deletions.
182 changes: 100 additions & 82 deletions common/simdarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -2415,88 +2415,106 @@ Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
// transpose_impl {{{1
namespace Common
{
template <int L, typename T, std::size_t N, typename V>
inline enable_if<L == 4, void> transpose_impl(
SimdArray<T, N, V, N> * Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
{
V *Vc_RESTRICT r2[L] = {&internal_data(*r[0]), &internal_data(*r[1]),
&internal_data(*r[2]), &internal_data(*r[3])};
transpose_impl<L>(
&r2[0], TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
internal_data(std::get<1>(proxy.in)),
internal_data(std::get<2>(proxy.in)),
internal_data(std::get<3>(proxy.in))});
}
template <int L, typename T, typename V>
inline enable_if<(L == 2), void> transpose_impl(
SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
{
auto &lo = *r[0];
auto &hi = *r[1];
internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
}
template <int L, typename T, std::size_t N, typename V>
inline enable_if<(L == 4 && N > 1), void> transpose_impl(
SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
{
SimdArray<T, N, V, 1> *Vc_RESTRICT r0[L / 2] = {r[0], r[1]};
SimdArray<T, N, V, 1> *Vc_RESTRICT r1[L / 2] = {r[2], r[3]};
using H = SimdArray<T, 2>;
transpose_impl<2>(
&r0[0], TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
internal_data0(std::get<1>(proxy.in)),
internal_data0(std::get<2>(proxy.in)),
internal_data0(std::get<3>(proxy.in))});
transpose_impl<2>(
&r1[0], TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
internal_data1(std::get<1>(proxy.in)),
internal_data1(std::get<2>(proxy.in)),
internal_data1(std::get<3>(proxy.in))});
}
/* TODO:
template <typename T, std::size_t N, typename V, std::size_t VSize>
inline enable_if<(N > VSize), void> transpose_impl(
std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
{
typedef SimdArray<T, N, V, VSize> SA;
std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
{&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
&internal_data0(*r[3])}};
transpose_impl(
r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
typename SA::storage_type0, typename SA::storage_type0>{
internal_data0(std::get<0>(proxy.in)),
internal_data0(std::get<1>(proxy.in)),
internal_data0(std::get<2>(proxy.in)),
internal_data0(std::get<3>(proxy.in))});
std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
{&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
&internal_data1(*r[3])}};
transpose_impl(
r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
typename SA::storage_type1, typename SA::storage_type1>{
internal_data1(std::get<0>(proxy.in)),
internal_data1(std::get<1>(proxy.in)),
internal_data1(std::get<2>(proxy.in)),
internal_data1(std::get<3>(proxy.in))});
}
*/
template <typename T, size_t N, typename V>
inline void transpose_impl(
TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
{
V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
&internal_data(*r[2]), &internal_data(*r[3])};
transpose_impl(TransposeTag<4, 4>(), &r2[0],
TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
internal_data(std::get<1>(proxy.in)),
internal_data(std::get<2>(proxy.in)),
internal_data(std::get<3>(proxy.in))});
}

template <typename T, typename V>
inline void transpose_impl(
TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
{
auto &lo = *r[0];
auto &hi = *r[1];
internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
}

template <typename T, typename V>
inline void transpose_impl(
TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
{
V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
&internal_data(*r[2]), &internal_data(*r[3])};
transpose_impl(TransposeTag<4, 4>(), &r2[0],
TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
internal_data(std::get<1>(proxy.in)),
internal_data(std::get<2>(proxy.in)),
internal_data(std::get<3>(proxy.in))});
}

template <typename T, size_t N, typename V>
inline void transpose_impl(
TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
{
SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
using H = SimdArray<T, 2>;
transpose_impl(TransposeTag<2, 4>(), &r0[0],
TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
internal_data0(std::get<1>(proxy.in)),
internal_data0(std::get<2>(proxy.in)),
internal_data0(std::get<3>(proxy.in))});
transpose_impl(TransposeTag<2, 4>(), &r1[0],
TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
internal_data1(std::get<1>(proxy.in)),
internal_data1(std::get<2>(proxy.in)),
internal_data1(std::get<3>(proxy.in))});
}

/* TODO:
template <typename T, std::size_t N, typename V, std::size_t VSize>
inline enable_if<(N > VSize), void> transpose_impl(
std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
{
typedef SimdArray<T, N, V, VSize> SA;
std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
{&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
&internal_data0(*r[3])}};
transpose_impl(
r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
typename SA::storage_type0, typename SA::storage_type0>{
internal_data0(std::get<0>(proxy.in)),
internal_data0(std::get<1>(proxy.in)),
internal_data0(std::get<2>(proxy.in)),
internal_data0(std::get<3>(proxy.in))});
std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
{&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
&internal_data1(*r[3])}};
transpose_impl(
r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
typename SA::storage_type1, typename SA::storage_type1>{
internal_data1(std::get<0>(proxy.in)),
internal_data1(std::get<1>(proxy.in)),
internal_data1(std::get<2>(proxy.in)),
internal_data1(std::get<3>(proxy.in))});
}
*/
} // namespace Common

// Traits static assertions {{{1
Expand Down
8 changes: 7 additions & 1 deletion common/transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

namespace Vc_VERSIONED_NAMESPACE
{
namespace Common
{
template <typename... Inputs> struct TransposeProxy
{
TransposeProxy(const Inputs &... inputs) : in{inputs...} {}

std::tuple<const Inputs &...> in;
};

template <typename... Vs> TransposeProxy<Vs...> transpose(Vs... vs)
template <int LhsLength, size_t RhsLength> struct TransposeTag {
};
} // namespace Common

template <typename... Vs> Common::TransposeProxy<Vs...> transpose(Vs... vs)
{
return {vs...};
}
Expand Down
2 changes: 1 addition & 1 deletion common/vectortuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ template <int Length, typename V> class VectorReferenceArray

template <typename... Inputs> void operator=(TransposeProxy<Inputs...> &&proxy)
{
transpose_impl<Length>(&r[0], proxy);
transpose_impl(TransposeTag<Length, sizeof...(Inputs)>(), &r[0], proxy);
}

template <typename T, typename IndexVector, typename Scale, bool Flag>
Expand Down
5 changes: 2 additions & 3 deletions scalar/vector.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,8 @@ template<> Vc_INTRINSIC void Scalar::double_v::setQnan(Scalar::double_v::Mask m)
namespace Common
{
// transpose_impl {{{1
template <int L>
Vc_ALWAYS_INLINE enable_if<L == 1, void> transpose_impl(
Scalar::float_v *Vc_RESTRICT r[], const TransposeProxy<Scalar::float_v> &proxy)
Vc_ALWAYS_INLINE void transpose_impl(TransposeTag<1, 1>, Scalar::float_v *Vc_RESTRICT r[],
const TransposeProxy<Scalar::float_v> &proxy)
{
*r[0] = std::get<0>(proxy.in).data();
}
Expand Down
5 changes: 2 additions & 3 deletions sse/vector.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -775,9 +775,8 @@ template <> template <int Index> Vc_INTRINSIC SSE::double_v SSE::double_v::broad
namespace Common
{
// transpose_impl {{{1
template <int L>
Vc_ALWAYS_INLINE enable_if<L == 4, void> transpose_impl(
SSE::float_v *Vc_RESTRICT r[],
Vc_ALWAYS_INLINE void transpose_impl(
TransposeTag<4, 4>, SSE::float_v *Vc_RESTRICT r[],
const TransposeProxy<SSE::float_v, SSE::float_v, SSE::float_v, SSE::float_v> &proxy)
{
const auto in0 = std::get<0>(proxy.in).data();
Expand Down

0 comments on commit ee46cdc

Please sign in to comment.