diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 96569404e..6890903a5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,8 +4,6 @@ on: pull_request: schedule: - cron: '0 3 * * *' -env: - build_type: Release jobs: #clang-format: # runs-on: ubuntu-latest @@ -20,34 +18,27 @@ jobs: runs-on: ubuntu-latest env: dashboard_model: Experimental + build_type: ${{ matrix.build_type }} NUMBER_OF_PROCESSORS: 2 - CXX: ${{ matrix.CXX }} - name: ${{ matrix.name }} + CXX: ${{ matrix.cxx }} strategy: fail-fast: false matrix: + build_type: [Debug, Release] + cxx: [g++-9, g++-10, g++-11, clang++-10, clang++-11, clang++-12, icpc] include: - - name: build-ubuntu-gcc9 - CXX: g++-9 - - name: build-ubuntu-gcc10 - CXX: g++-10 - - name: build-ubuntu-gcc11 - CXX: g++-11 + - cxx: g++-11 INSTALL_EXTRA: g++-11 - - name: build-ubuntu-clang10 - CXX: clang++-10 - - name: build-ubuntu-clang11 - CXX: clang++-11 + - cxx: clang++-11 INSTALL_EXTRA: clang-11 - - name: build-ubuntu-clang12 - CXX: clang++-12 + - cxx: clang++-12 INSTALL_EXTRA: clang-12 - - name: build-ubuntu-icpc - CXX: icpc + - cxx: icpc INSTALL_ONEAPI: true - #- name: build-ubuntu-icpx - # CXX: icpx - # INSTALL_ONEAPI: true + exclude: + # icpc in debug mode runs out of memory in CI + - cxx: icpc + build_type: Debug steps: - uses: actions/checkout@v2 with: @@ -77,10 +68,12 @@ jobs: build-windows: runs-on: ${{ matrix.os }} - name: build-${{ matrix.os }} + env: + build_type: ${{ matrix.build_type }} strategy: fail-fast: false matrix: + build_type: [Debug, Release] os: [windows-2019] steps: - uses: actions/checkout@v2 diff --git a/src/avx_sorthelper.cpp b/src/avx_sorthelper.cpp index fba2480d0..78d2ad503 100644 --- a/src/avx_sorthelper.cpp +++ b/src/avx_sorthelper.cpp @@ -71,71 +71,71 @@ Vc_CONST AVX2::short_v sorted(AVX2::short_v x_ // sort pairs (one min/max) auto x = AVX::lo128(x_.data()); auto y = AVX::hi128(x_.data()); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); auto l = _mm_min_epi16(x, y); auto h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); // merge left & right quads (two min/max) x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(h, l); - Vc_DEBUG << "8x2 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "8x2 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteLo(Mem::blend(l, h)); y = Mem::permuteHi(Mem::blend(h, l)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); // merge quads into octs (three min/max) x = _mm_unpacklo_epi16(h, l); y = _mm_unpackhi_epi16(l, h); - Vc_DEBUG << "4x4 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "4x4 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteLo(Mem::blend(h, l)); y = Mem::permuteHi(Mem::blend(l, h)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteHi(Mem::blend(l, h)); y = Mem::permuteLo(Mem::blend(h, l)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h) << " done?"; + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h) << " done?"; // merge octs into hexa (four min/max) x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(h, l); - Vc_DEBUG << "2x8 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "2x8 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_unpacklo_epi64(l, h); y = _mm_unpackhi_epi64(l, h); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_castps_si128(Mem::permute(Mem::blend(_mm_castsi128_ps(h), _mm_castsi128_ps(l)))); y = _mm_castps_si128(Mem::blend(_mm_castsi128_ps(l), _mm_castsi128_ps(h))); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::blend(l, h); y = Mem::permuteLo( Mem::permuteHi(Mem::blend(h, l))); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epi16(x, y); h = _mm_max_epi16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(l, h); return AVX::concat(x, y); @@ -147,71 +147,71 @@ Vc_CONST AVX2::ushort_v sorted(AVX2::ushort_v // sort pairs (one min/max) auto x = AVX::lo128(x_.data()); auto y = AVX::hi128(x_.data()); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); auto l = _mm_min_epu16(x, y); auto h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); // merge left & right quads (two min/max) x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(h, l); - Vc_DEBUG << "8x2 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "8x2 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteLo(Mem::blend(l, h)); y = Mem::permuteHi(Mem::blend(h, l)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); // merge quads into octs (three min/max) x = _mm_unpacklo_epi16(h, l); y = _mm_unpackhi_epi16(l, h); - Vc_DEBUG << "4x4 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "4x4 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteLo(Mem::blend(h, l)); y = Mem::permuteHi(Mem::blend(l, h)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::permuteHi(Mem::blend(l, h)); y = Mem::permuteLo(Mem::blend(h, l)); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h) << " done?"; + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h) << " done?"; // merge octs into hexa (four min/max) x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(h, l); - Vc_DEBUG << "2x8 sorted xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "2x8 sorted xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_unpacklo_epi64(l, h); y = _mm_unpackhi_epi64(l, h); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_castps_si128(Mem::permute(Mem::blend(_mm_castsi128_ps(h), _mm_castsi128_ps(l)))); y = _mm_castps_si128(Mem::blend(_mm_castsi128_ps(l), _mm_castsi128_ps(h))); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = Mem::blend(l, h); y = Mem::permuteLo( Mem::permuteHi(Mem::blend(h, l))); - Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); + // Vc_DEBUG << "xy: " << AVX::addType(x) << AVX::addType(y); l = _mm_min_epu16(x, y); h = _mm_max_epu16(x, y); - Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); + // Vc_DEBUG << "lh: " << AVX::addType(l) << AVX::addType(h); x = _mm_unpacklo_epi16(l, h); y = _mm_unpackhi_epi16(l, h); return AVX::concat(x, y); diff --git a/tests/ulp.cpp b/tests/ulp.cpp index a7fa3e436..3c1d6c89a 100644 --- a/tests/ulp.cpp +++ b/tests/ulp.cpp @@ -29,6 +29,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. TEST_TYPES(V, testUlpDiff, concat) //{{{1 { + // MSVC takes too long in debug mode +#if defined _MSC_VER && defined _DEBUG + const auto range = 1000; +#else + const auto range = 10000; +#endif + typedef typename V::EntryType T; using vir::detail::ulpDiffToReference; @@ -41,7 +48,7 @@ TEST_TYPES(V, testUlpDiff, concat) //{{{1 frexp(base, &exp); const V eps = ldexp(V(std::numeric_limits::epsilon()), exp - 1); //std::cout << base << ", " << exp << ", " << eps << std::endl; - for (int i = -10000; i <= 10000; ++i) { + for (int i = -range; i <= range; ++i) { const V i_v = V(T(i)); const V diff = base + i_v * eps;