Skip to content

Commit

Permalink
MSVC: more ICE workarounds on loads
Browse files Browse the repository at this point in the history
Refs: gh-119
Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Oct 6, 2016
1 parent 54ac95c commit 9cb6730
Show file tree
Hide file tree
Showing 7 changed files with 2,761 additions and 79 deletions.
127 changes: 127 additions & 0 deletions avx/debug.h.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/* This file is part of the Vc library. {{{
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the names of contributing organizations nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

}}}*/

#ifndef VC_AVX_DEBUG_H_
#define VC_AVX_DEBUG_H_

#ifndef NDEBUG
#include "vector.h"
#include <iostream>
#include <iomanip>
#endif

namespace Vc_VERSIONED_NAMESPACE
{
namespace AVX
{
template <typename T, typename U> struct AddType {
const U &d;
};
template <typename T, typename U> AddType<T, U> addType(const U &x) { return {x}; }

#ifdef NDEBUG
class DebugStream
{
public:
DebugStream(const char *, const char *, int) {}
template<typename T> inline DebugStream &operator<<(const T &) { return *this; }
};
#else
class DebugStream
{
private:
template<typename T, typename V> static void printVector(V _x)
{
enum { Size = sizeof(V) / sizeof(T) };
union { V v; T m[Size]; } x = { _x };
std::cerr << '[' << std::setprecision(24) << x.m[0];
for (int i = 1; i < Size; ++i) {
std::cerr << ", " << std::setprecision(24) << x.m[i];
}
std::cerr << ']';
}
public:
DebugStream(const char *func, const char *file, int line)
{
std::cerr << "\033[1;40;33mDEBUG: " << file << ':' << line << ' ' << func << ' ';
}

template<typename T> DebugStream &operator<<(const T &x) { std::cerr << x; return *this; }

template <typename T, typename U> DebugStream &operator<<(AddType<T, U> &&x)
{
printVector<T, U>(x.d);
return *this;
}
DebugStream &operator<<(__m128 x) {
printVector<float, __m128>(x);
return *this;
}
DebugStream &operator<<(__m256 x) {
printVector<float, __m256>(x);
return *this;
}
DebugStream &operator<<(__m128d x) {
printVector<double, __m128d>(x);
return *this;
}
DebugStream &operator<<(__m256d x) {
printVector<double, __m256d>(x);
return *this;
}
DebugStream &operator<<(__m128i x) {
printVector<unsigned int, __m128i>(x);
return *this;
}
DebugStream &operator<<(__m256i x) {
printVector<unsigned int, __m256i>(x);
return *this;
}

~DebugStream()
{
std::cerr << "\033[0m" << std::endl;
}
};
#endif

<<<<<<< HEAD
#ifdef Vc_DEBUG
#undef Vc_DEBUG
#endif
=======
#ifdef Vc_MSVC
#define Vc_DEBUG Vc::AVX::DebugStream(__FUNCSIG__, __FILE__, __LINE__)
#else
>>>>>>> edc047b... MSVC: use __FUNCSIG__ instead of __PRETTY_FUNCTION__
#define Vc_DEBUG Vc::AVX::DebugStream(__PRETTY_FUNCTION__, __FILE__, __LINE__)
#endif

} // namespace AVX
} // namespace Vc

#endif // VC_AVX_DEBUG_H_
101 changes: 84 additions & 17 deletions avx/detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ Vc_INTRINSIC __m256 load32(const float *mem, when_unaligned)
{
return _mm256_loadu_ps(mem);
}
Vc_INTRINSIC __m256 load32(const float *mem, when_streaming)
{
return AvxIntrinsics::stream_load<__m256>(mem);
}
Vc_INTRINSIC __m256d load32(const double *mem, when_aligned)
{
return _mm256_load_pd(mem);
Expand All @@ -110,6 +114,10 @@ Vc_INTRINSIC __m256d load32(const double *mem, when_unaligned)
{
return _mm256_loadu_pd(mem);
}
Vc_INTRINSIC __m256d load32(const double *mem, when_streaming)
{
return AvxIntrinsics::stream_load<__m256d>(mem);
}
template <class T> Vc_INTRINSIC __m256i load32(const T *mem, when_aligned)
{
static_assert(std::is_integral<T>::value, "load32<T> is only intended for integral T");
Expand All @@ -120,6 +128,11 @@ template <class T> Vc_INTRINSIC __m256i load32(const T *mem, when_unaligned)
static_assert(std::is_integral<T>::value, "load32<T> is only intended for integral T");
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(mem));
}
template <class T> Vc_INTRINSIC __m256i load32(const T *mem, when_streaming)
{
static_assert(std::is_integral<T>::value, "load32<T> is only intended for integral T");
return AvxIntrinsics::stream_load<__m256i>(mem);
}

// MSVC workarounds{{{2
#ifdef Vc_MSVC
Expand Down Expand Up @@ -150,6 +163,14 @@ Vc_INTRINSIC __m256 load(const float *mem, when_unaligned,
return _mm256_loadu_ps(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256 load(const float *mem, when_streaming,
enable_if<(std::is_same<DstT, float>::value &&
std::is_same<V, __m256>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256>(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256d load(const double *mem, when_aligned,
enable_if<(std::is_same<DstT, double>::value &&
Expand All @@ -166,6 +187,14 @@ Vc_INTRINSIC __m256d load(const double *mem, when_unaligned,
return _mm256_loadu_pd(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256d load(const double *mem, when_streaming,
enable_if<(std::is_same<DstT, double>::value &&
std::is_same<V, __m256d>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256d>(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const uint *mem, when_aligned,
enable_if<(std::is_same<DstT, uint>::value &&
Expand All @@ -182,6 +211,14 @@ Vc_INTRINSIC __m256i load(const uint *mem, when_unaligned,
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(mem));
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const uint *mem, when_streaming,
enable_if<(std::is_same<DstT, uint>::value &&
std::is_same<V, __m256i>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256i>(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const int *mem, when_unaligned,
enable_if<(std::is_same<DstT, int>::value &&
Expand All @@ -198,6 +235,14 @@ Vc_INTRINSIC __m256i load(const int *mem, when_aligned,
return _mm256_load_si256(reinterpret_cast<const __m256i *>(mem));
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const int *mem, when_streaming,
enable_if<(std::is_same<DstT, int>::value &&
std::is_same<V, __m256i>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256i>(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const short *mem, when_unaligned,
enable_if<(std::is_same<DstT, short>::value &&
Expand All @@ -214,6 +259,14 @@ Vc_INTRINSIC __m256i load(const short *mem, when_aligned,
return _mm256_load_si256(reinterpret_cast<const __m256i *>(mem));
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const short *mem, when_streaming,
enable_if<(std::is_same<DstT, short>::value &&
std::is_same<V, __m256i>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256i>(mem);
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const ushort *mem, when_unaligned,
enable_if<(std::is_same<DstT, ushort>::value &&
Expand All @@ -229,30 +282,39 @@ Vc_INTRINSIC __m256i load(const ushort *mem, when_aligned,
{
return _mm256_load_si256(reinterpret_cast<const __m256i *>(mem));
}

template <typename V, typename DstT>
Vc_INTRINSIC __m256i load(const ushort *mem, when_streaming,
enable_if<(std::is_same<DstT, ushort>::value &&
std::is_same<V, __m256i>::value)> = nullarg)
{
return AvxIntrinsics::stream_load<__m256i>(mem);
}

#endif // Vc_MSVC

// short {{{2
template <typename Flags>
Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, short>)
{
return load(mem, f, LoadTag<__m256i, ushort>());
return load32(mem, f);
}
template <typename Flags>
Vc_INTRINSIC __m256i load(const uchar *mem, Flags f, LoadTag<__m256i, short>)
{
return AVX::cvtepu8_epi16(load(mem, f, LoadTag<__m128i, uchar>()));
return AVX::cvtepu8_epi16(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256i load(const schar *mem, Flags f, LoadTag<__m256i, short>)
{
return AVX::cvtepi8_epi16(load(mem, f, LoadTag<__m128i, schar>()));
return AVX::cvtepi8_epi16(load16(mem, f));
}

// ushort {{{2
template <typename Flags>
Vc_INTRINSIC __m256i load(const uchar *mem, Flags f, LoadTag<__m256i, ushort>)
{
return AVX::cvtepu8_epi16(load(mem, f, LoadTag<__m128i, uchar>()));
return AVX::cvtepu8_epi16(load16(mem, f));
}

// int {{{2
Expand All @@ -264,12 +326,12 @@ Vc_INTRINSIC __m256i load(const uint *mem, Flags f, LoadTag<__m256i, int>)
template <typename Flags>
Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, int>)
{
return AVX::cvtepu16_epi32(load(mem, f, LoadTag<__m128i, ushort>()));
return AVX::cvtepu16_epi32(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256i load(const short *mem, Flags f, LoadTag<__m256i, int>)
{
return AVX::cvtepi16_epi32(load(mem, f, LoadTag<__m128i, short>()));
return AVX::cvtepi16_epi32(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256i load(const uchar *mem, Flags, LoadTag<__m256i, int>)
Expand All @@ -286,7 +348,7 @@ Vc_INTRINSIC __m256i load(const schar *mem, Flags, LoadTag<__m256i, int>)
template <typename Flags>
Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, uint>)
{
return AVX::cvtepu16_epi32(load(mem, f, LoadTag<__m128i, ushort>()));
return AVX::cvtepu16_epi32(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256i load(const uchar *mem, Flags, LoadTag<__m256i, uint>)
Expand All @@ -298,37 +360,37 @@ Vc_INTRINSIC __m256i load(const uchar *mem, Flags, LoadTag<__m256i, uint>)
template <typename Flags>
Vc_INTRINSIC __m256d load(const float *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<float, double>(load<__m128, float>(mem, f));
return AVX::convert<float, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const uint *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<uint, double>(load<__m128i, uint>(mem, f));
return AVX::convert<uint, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const int *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<int, double>(load<__m128i, int>(mem, f));
return AVX::convert<int, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const ushort *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<int, double>(load<__m128i, int>(mem, f));
return AVX::convert<int, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const short *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<int, double>(load<__m128i, int>(mem, f));
return AVX::convert<int, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const uchar *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<int, double>(load<__m128i, int>(mem, f));
return AVX::convert<int, double>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256d load(const schar *mem, Flags f, LoadTag<__m256d, double>)
{
return AVX::convert<int, double>(load<__m128i, int>(mem, f));
return AVX::convert<int, double>(load16(mem, f));
}

// float {{{2
Expand All @@ -341,13 +403,18 @@ Vc_INTRINSIC __m256 load(const double *mem, Flags f, LoadTag<__m256, float>)
template <typename Flags>
Vc_INTRINSIC __m256 load(const uint *mem, Flags f, LoadTag<__m256, float>)
{
const auto v = load(mem, f, LoadTag<__m256i, uint>());
const auto v = load32(mem, f);
return _mm256_blendv_ps(
_mm256_cvtepi32_ps(v),
_mm256_add_ps(_mm256_cvtepi32_ps(AVX::sub_epi32(v, AVX::set2power31_epu32())),
AVX::set2power31_ps()),
_mm256_castsi256_ps(AVX::cmplt_epi32(v, _mm256_setzero_si256())));
}
template <typename Flags>
Vc_INTRINSIC __m256 load(const int *mem, Flags f, LoadTag<__m256, float>)
{
return AVX::convert<int, float>(load32(mem, f));
}
template <typename T, typename Flags,
typename = enable_if<!std::is_same<T, float>::value>>
Vc_INTRINSIC __m256 load(const T *mem, Flags f, LoadTag<__m256, float>)
Expand All @@ -357,12 +424,12 @@ Vc_INTRINSIC __m256 load(const T *mem, Flags f, LoadTag<__m256, float>)
template <typename Flags>
Vc_INTRINSIC __m256 load(const ushort *mem, Flags f, LoadTag<__m256, float>)
{
return AVX::convert<ushort, float>(load<__m128i, ushort>(mem, f));
return AVX::convert<ushort, float>(load16(mem, f));
}
template <typename Flags>
Vc_INTRINSIC __m256 load(const short *mem, Flags f, LoadTag<__m256, float>)
{
return AVX::convert<short, float>(load<__m128i, short>(mem, f));
return AVX::convert<short, float>(load16(mem, f));
}
/*
template<typename Flags> struct LoadHelper<float, unsigned char, Flags> {
Expand Down
Loading

0 comments on commit 9cb6730

Please sign in to comment.