Skip to content

Commit

Permalink
Merge pull request #2100 from xianyi/develop
Browse files Browse the repository at this point in the history
Merge develop in preparation of 0.3.6 release
  • Loading branch information
martin-frbg authored Apr 29, 2019
2 parents eebc189 + 97d5034 commit 15cb124
Show file tree
Hide file tree
Showing 386 changed files with 37,196 additions and 9,621 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ matrix:

- &test-macos
os: osx
osx_image: xcode8
osx_image: xcode10.1
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
- brew update
Expand All @@ -160,6 +160,7 @@ matrix:
- BTYPE="BINARY=64 INTERFACE64=1"

- <<: *test-macos
osx_image: xcode8.3
env:
- BTYPE="BINARY=32"

Expand Down
44 changes: 30 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 5)
set(OpenBLAS_PATCH_VERSION 6)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

# Adhere to GNU filesystem layout conventions
Expand Down Expand Up @@ -42,6 +42,19 @@ endif()

#######

if(MSVC AND MSVC_STATIC_CRT)
set(CompilerFlags
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
)
foreach(CompilerFlag ${CompilerFlags})
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
endforeach()
endif()

message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")

Expand All @@ -62,10 +75,10 @@ endif ()

set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
if(BUILD_RELAPACK)
list(APPEND SUBDIRS relapack/src)
endif()
list(APPEND SUBDIRS lapack)
endif ()

# set which float types we want to build for
Expand Down Expand Up @@ -134,7 +147,7 @@ endif ()

# Only generate .def for dll on MSVC and always produce pdb files for debug and release
if(MSVC)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
Expand All @@ -149,15 +162,9 @@ if (${DYNAMIC_ARCH})
endforeach()
endif ()

# Only build shared libs for MSVC
if (MSVC)
set(BUILD_SHARED_LIBS ON)
endif()


# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)

# Android needs to explicitly link against libm
if(ANDROID)
Expand All @@ -166,7 +173,7 @@ endif()

# Handle MSVC exports
if(MSVC AND BUILD_SHARED_LIBS)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
else()
# Creates verbose .def file (51KB vs 18KB)
Expand Down Expand Up @@ -217,6 +224,14 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)

if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
else()
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
endif()
endif()

if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
Expand Down Expand Up @@ -314,7 +329,7 @@ install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(NOT NOFORTRAN)
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")

set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
Expand All @@ -327,10 +342,11 @@ endif()
if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")

set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

if(NOT NO_LAPACKE)
Expand Down
78 changes: 78 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,82 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.6
29-Apr-2019

common:
* the build tools now check that a given cpu TARGET is actually valid
* the build-time check of system features (c_check) has been made
less dependent on particular perl features (this should mainly
benefit building on Windows)
* several problem with the ReLAPACK integration were fixed,
including INTERFACE64 support and building a shared library
* building with CMAKE on BSD systems was improved
* a non-absolute SUM function was added based on the
existing optimized code for ASUM
* CBLAS interfaces to the IxMIN and IxMAX functions were added
* a name clash between LAPACKE and BOOST headers was resolved
* CMAKE builds with OpenMP failed to include the appropriate getrf_parallel
kernels
* a crash on thread (key) deletion with the USE_TLS=1 memory management
option was fixed
* restored several earlier fixes, in particular for OpenMP performance,
building on BSD, and calling fork on CYGWIN, which had inadvertently
been dropped in the 0.3.3 rewrite of the memory management code.

x86_64:
* the AVX512 DGEMM kernel has been disabled again due to unsolved problems
* building with old versions of MSVC was fixed
* it is now possible to build a static library on Windows with CMAKE
* accessing environment variables on CYGWIN at run time was fixed
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
* Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected
* building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported
with CMAKE as well
* building for DYNAMIC_ARCH with GENERIC as the default target is now supported
* a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed
* assembly bugs involving undeclared modification of input operands were fixed
in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem,
Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause
test failures or segfaults when compiled with recent versions of gcc from 8 onward.
* a similar bug was fixed in the blas_quickdivide code used to split workloads
in most functions
* a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX
* fixed building on SkylakeX systems when either the compiler or the (emulated) operating
environment does not support AVX512
* improved GEMM performance on ZEN targets

x86:
* build failures caused by the recently added checks for AVX512 were fixed
* an inline assembly bug involving undeclared modification of an input argument was
fixed in the blas_quickdivide code used to split workloads in most functions
* a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX

MIPS32:
* a bug in the IMIN implementation made it return the result of IMAX

POWER:
* single precision BLAS1/2 functions have received optimized POWER8 kernels
* POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel
* building on PPC970 systems under OSX Leopard or Tiger is now supported
* out-of-bounds memory accesses in the gemm_beta microkernels were fixed
* building a shared library on AIX is now supported for POWER6
* DYNAMIC_ARCH support has been added for POWER6 and newer

ARMv7:
* corrected xDOT behaviour with zero INC_X or INC_Y
* a bug in the IMIN implementation made it return the result of IMAX

ARMv8:
* added support for HiSilicon TSV110 cpus
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
* cross-compilation with CMAKE now works again
* a bug in the IMIN implementation made it return the result of IMAX
* ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7

IBM Z:
* optimized microkernels for single precicion BLAS1/2 functions have been added
for both Z13 and Z14

====================================================================
Version 0.3.5
31-Dec-2018
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ endif
@echo

shared :
ifndef NO_SHARED
ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
Expand Down
5 changes: 5 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif

ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
10 changes: 5 additions & 5 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ ifndef NO_LAPACKE
endif

#for install static library
ifndef NO_STATIC
ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
Expand Down Expand Up @@ -106,14 +106,14 @@ ifndef NO_LAPACKE
endif

#for install static library
ifndef NO_STATIC
ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
Expand All @@ -138,7 +138,7 @@ endif
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

ifndef NO_SHARED
ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
Expand Down
10 changes: 9 additions & 1 deletion Makefile.power
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ else
USE_OPENMP = 1
endif


ifeq ($(CORE), POWER9)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
endif
endif

ifeq ($(CORE), POWER8)
ifeq ($(USE_OPENMP), 1)
Expand Down
Loading

0 comments on commit 15cb124

Please sign in to comment.