Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run debug builds in CI #338

Merged
merged 3 commits into from
May 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ on:
pull_request:
schedule:
- cron: '0 3 * * *'
env:
build_type: Release
jobs:
#clang-format:
# runs-on: ubuntu-latest
Expand All @@ -20,34 +18,27 @@ jobs:
runs-on: ubuntu-latest
env:
dashboard_model: Experimental
build_type: ${{ matrix.build_type }}
NUMBER_OF_PROCESSORS: 2
CXX: ${{ matrix.CXX }}
name: ${{ matrix.name }}
CXX: ${{ matrix.cxx }}
strategy:
fail-fast: false
matrix:
build_type: [Debug, Release]
cxx: [g++-9, g++-10, g++-11, clang++-10, clang++-11, clang++-12, icpc]
include:
- name: build-ubuntu-gcc9
CXX: g++-9
- name: build-ubuntu-gcc10
CXX: g++-10
- name: build-ubuntu-gcc11
CXX: g++-11
- cxx: g++-11
INSTALL_EXTRA: g++-11
- name: build-ubuntu-clang10
CXX: clang++-10
- name: build-ubuntu-clang11
CXX: clang++-11
- cxx: clang++-11
INSTALL_EXTRA: clang-11
- name: build-ubuntu-clang12
CXX: clang++-12
- cxx: clang++-12
INSTALL_EXTRA: clang-12
- name: build-ubuntu-icpc
CXX: icpc
- cxx: icpc
INSTALL_ONEAPI: true
#- name: build-ubuntu-icpx
# CXX: icpx
# INSTALL_ONEAPI: true
exclude:
# icpc in debug mode runs out of memory in CI
- cxx: icpc
build_type: Debug
steps:
- uses: actions/checkout@v2
with:
Expand Down Expand Up @@ -77,10 +68,12 @@ jobs:

build-windows:
runs-on: ${{ matrix.os }}
name: build-${{ matrix.os }}
env:
build_type: ${{ matrix.build_type }}
strategy:
fail-fast: false
matrix:
build_type: [Debug, Release]
os: [windows-2019]
steps:
- uses: actions/checkout@v2
Expand Down
80 changes: 40 additions & 40 deletions src/avx_sorthelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,71 +71,71 @@ Vc_CONST AVX2::short_v sorted<CurrentImplementation::current()>(AVX2::short_v x_
// sort pairs (one min/max)
auto x = AVX::lo128(x_.data());
auto y = AVX::hi128(x_.data());
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
auto l = _mm_min_epi16(x, y);
auto h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);

// merge left & right quads (two min/max)
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(h, l);
Vc_DEBUG << "8x2 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "8x2 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteLo<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h));
y = Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);

// merge quads into octs (three min/max)
x = _mm_unpacklo_epi16(h, l);
y = _mm_unpackhi_epi16(l, h);
Vc_DEBUG << "4x4 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "4x4 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteLo<X2, X3, X0, X1>(Mem::blend<X0, X1, Y2, Y3, X4, X5, Y6, Y7>(h, l));
y = Mem::permuteHi<X6, X7, X4, X5>(Mem::blend<X0, X1, Y2, Y3, X4, X5, Y6, Y7>(l, h));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h));
y = Mem::permuteLo<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h) << " done?";
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h) << " done?";

// merge octs into hexa (four min/max)
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(h, l);
Vc_DEBUG << "2x8 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "2x8 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_unpacklo_epi64(l, h);
y = _mm_unpackhi_epi64(l, h);
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_castps_si128(Mem::permute<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3>(_mm_castsi128_ps(h), _mm_castsi128_ps(l))));
y = _mm_castps_si128(Mem::blend<X0, Y1, X2, Y3>(_mm_castsi128_ps(l), _mm_castsi128_ps(h)));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h);
y = Mem::permuteLo<X1, X0, X3, X2>(
Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l)));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epi16(x, y);
h = _mm_max_epi16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(l, h);
return AVX::concat(x, y);
Expand All @@ -147,71 +147,71 @@ Vc_CONST AVX2::ushort_v sorted<CurrentImplementation::current()>(AVX2::ushort_v
// sort pairs (one min/max)
auto x = AVX::lo128(x_.data());
auto y = AVX::hi128(x_.data());
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
auto l = _mm_min_epu16(x, y);
auto h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);

// merge left & right quads (two min/max)
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(h, l);
Vc_DEBUG << "8x2 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "8x2 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteLo<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h));
y = Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);

// merge quads into octs (three min/max)
x = _mm_unpacklo_epi16(h, l);
y = _mm_unpackhi_epi16(l, h);
Vc_DEBUG << "4x4 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "4x4 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteLo<X2, X3, X0, X1>(Mem::blend<X0, X1, Y2, Y3, X4, X5, Y6, Y7>(h, l));
y = Mem::permuteHi<X6, X7, X4, X5>(Mem::blend<X0, X1, Y2, Y3, X4, X5, Y6, Y7>(l, h));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h));
y = Mem::permuteLo<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h) << " done?";
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h) << " done?";

// merge octs into hexa (four min/max)
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(h, l);
Vc_DEBUG << "2x8 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "2x8 sorted xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_unpacklo_epi64(l, h);
y = _mm_unpackhi_epi64(l, h);
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_castps_si128(Mem::permute<X1, X0, X3, X2>(Mem::blend<X0, Y1, X2, Y3>(_mm_castsi128_ps(h), _mm_castsi128_ps(l))));
y = _mm_castps_si128(Mem::blend<X0, Y1, X2, Y3>(_mm_castsi128_ps(l), _mm_castsi128_ps(h)));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(l, h);
y = Mem::permuteLo<X1, X0, X3, X2>(
Mem::permuteHi<X5, X4, X7, X6>(Mem::blend<X0, Y1, X2, Y3, X4, Y5, X6, Y7>(h, l)));
Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
// Vc_DEBUG << "xy: " << AVX::addType<short>(x) << AVX::addType<short>(y);
l = _mm_min_epu16(x, y);
h = _mm_max_epu16(x, y);
Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
// Vc_DEBUG << "lh: " << AVX::addType<short>(l) << AVX::addType<short>(h);
x = _mm_unpacklo_epi16(l, h);
y = _mm_unpackhi_epi16(l, h);
return AVX::concat(x, y);
Expand Down
9 changes: 8 additions & 1 deletion tests/ulp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

TEST_TYPES(V, testUlpDiff, concat<RealVectors, RealSimdArrayList>) //{{{1
{
// MSVC takes too long in debug mode
#if defined _MSC_VER && defined _DEBUG
const auto range = 1000;
#else
const auto range = 10000;
#endif

typedef typename V::EntryType T;

using vir::detail::ulpDiffToReference;
Expand All @@ -41,7 +48,7 @@ TEST_TYPES(V, testUlpDiff, concat<RealVectors, RealSimdArrayList>) //{{{1
frexp(base, &exp);
const V eps = ldexp(V(std::numeric_limits<T>::epsilon()), exp - 1);
//std::cout << base << ", " << exp << ", " << eps << std::endl;
for (int i = -10000; i <= 10000; ++i) {
for (int i = -range; i <= range; ++i) {
const V i_v = V(T(i));
const V diff = base + i_v * eps;

Expand Down