From 01e00e740319b0a0420194a2c9d3e7517830f322 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 13 Feb 2020 14:32:24 +0100 Subject: [PATCH] Fix implementation of iamax_sse.S as reported in #2116. The was a typo in iamax_sse.S where one of the comparison was cmpeqps instead of cmpeqss. That misdetected index for sequences where the minimum value was 0. --- kernel/x86_64/KERNEL | 4 ++-- kernel/x86_64/iamax_sse.S | 6 +----- utest/CMakeLists.txt | 1 + utest/Makefile | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 92d121ab2d..4874711bbe 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -171,7 +171,7 @@ IXAMAXKERNEL = izamax.S endif ifndef ISAMINKERNEL -ISAMINKERNEL = iamax.S +ISAMINKERNEL = iamax_sse.S endif ifndef IDAMINKERNEL @@ -207,7 +207,7 @@ IQMAXKERNEL = iamax.S endif ifndef ISMINKERNEL -ISMINKERNEL = iamax.S +ISMINKERNEL = iamax_sse.S endif ifndef IDMINKERNEL diff --git a/kernel/x86_64/iamax_sse.S b/kernel/x86_64/iamax_sse.S index d50c1699cb..9c7af1fd7b 100644 --- a/kernel/x86_64/iamax_sse.S +++ b/kernel/x86_64/iamax_sse.S @@ -36,10 +36,6 @@ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -/* This kernel was found to give wrong results when used for ISMIN/ISAMIN - with increment != 1, although it appears to be correct for corresponding - MAX operations. See issue 2116 */ - #define ASSEMBLER #include "common.h" @@ -863,7 +859,7 @@ #ifdef USE_ABS andps %xmm15, %xmm5 #endif - cmpeqps %xmm0, %xmm5 + cmpeqss %xmm0, %xmm5 movss 0 * SIZE(X), %xmm6 addq INCX, X diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index 1e3051a8fd..c63b6d3494 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -7,6 +7,7 @@ else () set(OpenBLAS_utest_src utest_main.c test_amax.c + test_asmin.c test_rotmg.c test_rot.c test_axpy.c diff --git a/utest/Makefile b/utest/Makefile index bd4bdf3ae8..65865be3b4 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -11,7 +11,7 @@ UTESTBIN=openblas_utest include $(TOPDIR)/Makefile.system -OBJS=utest_main.o test_amax.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o +OBJS=utest_main.o test_amax.o test_ismin.c test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o ifneq ($(NO_LAPACK), 1)