diff --git a/README.md b/README.md index 00c5f79d..24bd5188 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,12 @@ clFFT ===== [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT) -clMath is a software library containing FFT and BLAS functions written +clFFT is a software library containing FFT functions written in OpenCL. In addition to GPU devices, the libraries also support -running on CPU devices to facilitate debugging and multicore +running on CPU devices to facilitate debugging and heterogeneous programming. -clMath 2.1 is the latest version and is available as source only. -clMath's predecessor APPML 1.10 has pre-built binaries available for -download on both Linux and Windows platforms. +Pre-built binaries are available [here][binary_release]. ## Introduction to clFFT @@ -210,3 +208,4 @@ int main( void ) [clmath-developers@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki/Build [Contributing]: CONTRIBUTING.md [Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0 + [binary_release]: https://github.com/clMathLibraries/clFFT/releases diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f9617943..ec5fc4ba 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,12 @@ # ######################################################################## # Copyright 2013 Advanced Micro Devices, Inc. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,7 +40,7 @@ if( NOT DEFINED CLFFT_VERSION_MAJOR ) endif( ) if( NOT DEFINED CLFFT_VERSION_MINOR ) - set( CLFFT_VERSION_MINOR 2 ) + set( CLFFT_VERSION_MINOR 4 ) endif( ) if( NOT DEFINED CLFFT_VERSION_PATCH ) @@ -72,7 +72,7 @@ option( BUILD_TEST "Build the library testing suite (dependency on google test, option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the FFT runtime will search for" ON ) option( BUILD_SHARED_LIBRARY "Build shared libraries." ON) -# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. +# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. # Otherwise, create a sensible default that the user can change if( DEFINED ENV{BOOST_ROOT} ) set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" ) @@ -105,8 +105,8 @@ else() endif() endif() -# These variables are meant to contain string which should be appended to the installation paths -# of library and executable binaries, respectively. They are meant to be user configurable/overridable. +# These variables are meant to contain string which should be appended to the installation paths +# of library and executable binaries, respectively. They are meant to be user configurable/overridable. set( SUFFIX_LIB_DEFAULT "" ) set( SUFFIX_BIN_DEFAULT "" ) @@ -155,7 +155,7 @@ find_package( OpenCL ) # This will define FFTW_FOUND find_package( FFTW ) -if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) ) +if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) ) message( STATUS "Cmake version 2.8 or greater needed to use GTest" ) else() # This will define GTEST_FOUND @@ -195,7 +195,7 @@ if( MSVC ) # CMake sets huge stack frames for windows, for whatever reason. We go with compiler default. string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" ) string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}" ) - string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" ) + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" ) elseif( CMAKE_COMPILER_IS_GNUCXX ) message( STATUS "Detected GNU fortran compiler." ) @@ -208,7 +208,12 @@ elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "-pthread ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}" ) - + + # For linux debug builds, define the same preprocessing symbols as win to keep it simple + if( CMAKE_BUILD_TYPE MATCHES "Debug" ) + add_definitions( "/D_DEBUG" ) + endif( ) + if( BUILD64 ) set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" ) @@ -237,12 +242,12 @@ message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} ) # configure a header file to pass the CMake version settings to the source, and package the header files in the output archive configure_file( "${PROJECT_SOURCE_DIR}/include/clFFT.version.h.in" "${PROJECT_BINARY_DIR}/include/clFFT.version.h" ) -install( FILES - "${PROJECT_BINARY_DIR}/include/clFFT.version.h" +install( FILES + "${PROJECT_BINARY_DIR}/include/clFFT.version.h" "include/clFFT.h" "include/clAmdFft.h" - "include/clAmdFft.version.h" - DESTINATION + "include/clAmdFft.version.h" + DESTINATION "./include" ) @@ -278,7 +283,7 @@ else( ) message( "GoogleTest unit tests will NOT be built" ) endif( ) -# The following code is setting variables to control the behavior of CPack to generate our +# The following code is setting variables to control the behavior of CPack to generate our if( WIN32 ) set( CPACK_SOURCE_GENERATOR "ZIP" ) set( CPACK_GENERATOR "ZIP" ) diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake index 8725612f..ca83e5f8 100644 --- a/src/FindOpenCL.cmake +++ b/src/FindOpenCL.cmake @@ -56,6 +56,8 @@ find_path(OPENCL_INCLUDE_DIRS PATHS /usr/include /usr/local/include + /usr/local/cuda/include + /opt/cuda/include DOC "OpenCL header file path" ) mark_as_advanced( OPENCL_INCLUDE_DIRS ) @@ -70,6 +72,8 @@ if( LIB64 ) ${OPENCL_ROOT}/lib $ENV{AMDAPPSDKROOT}/lib $ENV{CUDA_PATH}/lib + /usr/local/cuda/lib + /opt/cuda/lib DOC "OpenCL dynamic library path" PATH_SUFFIXES x86_64 x64 PATHS @@ -82,6 +86,8 @@ else( ) ${OPENCL_ROOT}/lib $ENV{AMDAPPSDKROOT}/lib $ENV{CUDA_PATH}/lib + /usr/local/cuda/lib + /opt/cuda/lib DOC "OpenCL dynamic library path" PATH_SUFFIXES x86 Win32 PATHS diff --git a/src/library/private.h b/src/library/private.h index 5a31d2f2..7c00ca32 100644 --- a/src/library/private.h +++ b/src/library/private.h @@ -270,6 +270,8 @@ inline tstring clfftErrorStatusAsString( const cl_int& status ) // This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition. // If an error occurs, we issue a return statement to exit the calling function. +#if defined( _DEBUG ) + #define OPENCL_V( fn, msg ) \ { \ clfftStatus vclStatus = static_cast< clfftStatus >( fn ); \ @@ -288,6 +290,23 @@ inline tstring clfftErrorStatusAsString( const cl_int& status ) } \ } +#else + +#define OPENCL_V( fn, msg ) \ +{ \ + clfftStatus vclStatus = static_cast< clfftStatus >( fn ); \ + switch( vclStatus ) \ + { \ + case CL_SUCCESS: /**< No error */ \ + break; \ + default: \ + { \ + return vclStatus; \ + } \ + } \ +} +#endif + static inline bool IsPo2 (size_t u) { return (u != 0) && (0 == (u & (u-1))); } diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h index 2c2036ce..a0214ca8 100644 --- a/src/tests/cl_transform.h +++ b/src/tests/cl_transform.h @@ -151,7 +151,6 @@ class clfft { // OpenCL resources that need to be carefully managed std::unique_ptr< _cl_context, clContext_deleter > context; std::unique_ptr< _cl_command_queue, clCommQueue_deleter > queue; - std::unique_ptr< _cl_event, clEvent_deleter > an_event; std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_input; std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_output; std::vector< cl_device_id > device_id; @@ -630,7 +629,6 @@ class clfft { // In order to call clfftEnqueueTransform, we need to pass naked pointers cl_command_queue tempQueue = queue.get( ); - cl_event tempEvent = an_event.get( ); size_t buffer_size = 0; EXPECT_EQ( CLFFT_SUCCESS, clfftBakePlan(*plan_handle, 1, &tempQueue, NULL, NULL )); @@ -683,7 +681,7 @@ class clfft { // In order to call clfftEnqueueTransform, we need to pass naked pointers cl_command_queue tempQueue = queue.get( ); - cl_event tempEvent = an_event.get( ); + std::unique_ptr< _cl_event, clEvent_deleter > tempEvent; std::unique_ptr< _cl_mem, clMem_deleter > intermediate_buffer; throw_if_total_memory_footprint_is_too_large_for_device(); @@ -726,6 +724,7 @@ class clfft { for( cl_uint i = 0; i < cl_mem_output.size( ); ++i ) tempOutput[ i ] = cl_mem_output[ i ].get( ); + cl_event tevent = NULL; if( buffer_size ) { status = clfftEnqueueTransform(*plan_handle, @@ -734,7 +733,7 @@ class clfft { &tempQueue, 0, NULL, - &tempEvent, + &tevent, &tempInput[ 0 ], &tempOutput[ 0 ], intermediate_buffer.get() ); @@ -747,12 +746,13 @@ class clfft { &tempQueue, 0, NULL, - &tempEvent, + &tevent, &tempInput[ 0 ], &tempOutput[ 0 ], NULL ); } clFinish(tempQueue); + tempEvent.reset(tevent); tevent = NULL; if( status != CLFFT_SUCCESS ) { @@ -760,11 +760,12 @@ class clfft { } // wait for the kernel call to finish execution - cl_int wait_status = clWaitForEvents(1, &tempEvent); + const cl_event revent = tempEvent.get(); + cl_int wait_status = clWaitForEvents(1, &revent); if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ) { cl_int error_code; - clGetEventInfo( tempEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &error_code, NULL ); + clGetEventInfo( revent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &error_code, NULL ); throw std::runtime_error(prettyPrintclFFTStatus(error_code).c_str()); } else if( wait_status != CL_SUCCESS )