diff --git a/3rdparty/mshadow/cmake/Cuda.cmake b/3rdparty/mshadow/cmake/Cuda.cmake deleted file mode 100644 index bc09a3905076..000000000000 --- a/3rdparty/mshadow/cmake/Cuda.cmake +++ /dev/null @@ -1,324 +0,0 @@ -if(NOT USE_CUDA) - return() -endif() - -include(CheckCXXCompilerFlag) -check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) - -################################################################################################ -# A function for automatic detection of GPUs installed (if autodetection is enabled) -# Usage: -# mshadow_detect_installed_gpus(out_variable) -function(mshadow_detect_installed_gpus out_variable) -set(CUDA_gpu_detect_output "") - if(NOT CUDA_gpu_detect_output) - message(STATUS "Running GPU architecture autodetection") - set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - - file(WRITE ${__cufile} "" - "#include \n" - "#include \n" - "using namespace std;\n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) { return -1; }\n" - " if (count == 0) { cerr << \"No cuda devices detected\" << endl; return -1; }\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - if(MSVC) - #find vcvarsall.bat and run it building msvc environment - get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY) - find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..") - execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - OUTPUT_STRIP_TRAILING_WHITESPACE) - else() - if(CUDA_LIBRARY_PATH) - set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}") - endif() - execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH} - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - OUTPUT_STRIP_TRAILING_WHITESPACE) - endif() - if(__nvcc_res EQUAL 0) - # nvcc outputs text containing line breaks when building with MSVC. - # The line below prevents CMake from inserting a variable with line - # breaks in the cache - message(STATUS "Found CUDA arch ${__nvcc_out}") - string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") - string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") - set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE) - else() - message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}") - endif() - endif() - - if(NOT CUDA_gpu_detect_output) - message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mshadow_known_gpu_archs}).") - set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE) - else() - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) - endif() -endfunction() - - -################################################################################################ -# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME -# Usage: -# mshadow_select_nvcc_arch_flags(out_variable) -function(mshadow_select_nvcc_arch_flags out_variable) - # List of arch names - set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual") - set(__archs_name_default "All") - if(NOT CMAKE_CROSSCOMPILING) - list(APPEND __archs_names "Auto") - set(__archs_name_default "Auto") - endif() - - # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) - set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") - set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} ) - mark_as_advanced(CUDA_ARCH_NAME) - - # verify CUDA_ARCH_NAME value - if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};") - string(REPLACE ";" ", " __archs_names "${__archs_names}") - message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.") - endif() - - if(${CUDA_ARCH_NAME} STREQUAL "Manual") - set(CUDA_ARCH_BIN ${mshadow_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") - set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") - mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) - else() - unset(CUDA_ARCH_BIN CACHE) - unset(CUDA_ARCH_PTX CACHE) - endif() - - if(${CUDA_ARCH_NAME} STREQUAL "Fermi") - set(__cuda_arch_bin "20 21(20)") - elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler") - set(__cuda_arch_bin "30 35") - elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") - set(__cuda_arch_bin "50") - elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") - set(__cuda_arch_bin "60 61") - elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") - set(__cuda_arch_bin "70") - elseif(${CUDA_ARCH_NAME} STREQUAL "All") - set(__cuda_arch_bin ${mshadow_known_gpu_archs}) - elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") - mshadow_detect_installed_gpus(__cuda_arch_bin) - else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") - set(__cuda_arch_bin ${CUDA_ARCH_BIN}) - endif() - - # remove dots and convert to lists - string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}") - string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}") - string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}") - string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}") - mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx) - - set(__nvcc_flags "") - set(__nvcc_archs_readable "") - - # Tell NVCC to add binaries for the specified GPUs - foreach(__arch ${__cuda_arch_bin}) - if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)") - # User explicitly specified PTX for the concrete BIN - list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) - list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1}) - else() - # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN - list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch}) - list(APPEND __nvcc_archs_readable sm_${__arch}) - endif() - endforeach() - - # Tell NVCC to add PTX intermediate code for the specified architectures - foreach(__arch ${__cuda_arch_ptx}) - list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch}) - list(APPEND __nvcc_archs_readable compute_${__arch}) - endforeach() - - string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}") - set(${out_variable} ${__nvcc_flags} PARENT_SCOPE) - set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Short command for cuda comnpilation -# Usage: -# mshadow_cuda_compile( ) -macro(mshadow_cuda_compile objlist_variable) - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var}_backup_in_cuda_compile_ "${${var}}") - - # we remove /EHa as it generates warnings under windows - string(REPLACE "/EHa" "" ${var} "${${var}}") - - endforeach() - if(UNIX OR APPLE) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC) - endif() - - if(APPLE) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function) - endif() - - set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G") - - if(MSVC) - # disable noisy warnings: - # 4819: The file contains a character that cannot be represented in the current code page (number). - list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819") - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) - endif() - - # If the build system is a container, make sure the nvcc intermediate files - # go into the build output area rather than in /tmp, which may run out of space - if(IS_CONTAINER_BUILD) - set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}") - list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}") - endif() - - cuda_compile(cuda_objcs ${ARGN}) - - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var} "${${var}_backup_in_cuda_compile_}") - unset(${var}_backup_in_cuda_compile_) - endforeach() - - set(${objlist_variable} ${cuda_objcs}) -endmacro() - -################################################################################################ -# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution. -# That's why not FindcuDNN.cmake file, but just the macro -# Usage: -# detect_cuDNN() -function(detect_cuDNN) - set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder") - - find_path(CUDNN_INCLUDE cudnn.h - PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE} - DOC "Path to cuDNN include directory." ) - - get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) - find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a - PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} - DOC "Path to cuDNN library.") - - if(CUDNN_INCLUDE AND CUDNN_LIBRARY) - set(HAVE_CUDNN TRUE PARENT_SCOPE) - set(CUDNN_FOUND TRUE PARENT_SCOPE) - - mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) - message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") - endif() -endfunction() - - -################################################################################################ -### Non macro section -################################################################################################ - -# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so -if(NOT CUDA_TOOLKIT_ROOT_DIR) - find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64) - if(CUDA_LIBRARY_PATH) - get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY) - set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..") - endif() -endif() - -find_package(CUDA 5.5 QUIET REQUIRED) -find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand - -if(NOT CUDA_FOUND) - return() -endif() - -set(HAVE_CUDA TRUE) -message(STATUS "CUDA detected: " ${CUDA_VERSION}) -include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) -list(APPEND mshadow_LINKER_LIBS ${CUDA_CUDART_LIBRARY} - ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - -# Known NVIDIA GPU achitectures mshadow can be compiled for. -# This list will be used for CUDA_ARCH_NAME = All option -if(CUDA_ARCH_ALL) - set(mshadow_known_gpu_archs "${CUDA_ARCH_ALL}") -else() - if(${CUDA_VERSION} EQUAL 9.0 OR ${CUDA_VERSION} GREATER 9.0) - set(mshadow_known_gpu_archs "30 35 50 52 60 61 70") - elseif(${CUDA_VERSION} EQUAL 8.0 OR ${CUDA_VERSION} GREATER 8.0) - set(mshadow_known_gpu_archs "30 35 50 52 60 61") - else() - set(mshadow_known_gpu_archs "30 35 50 52") - endif() -endif() - -# cudnn detection -if(USE_CUDNN) - detect_cuDNN() - if(HAVE_CUDNN) - add_definitions(-DUSE_CUDNN) - include_directories(SYSTEM ${CUDNN_INCLUDE}) - list(APPEND mshadow_LINKER_LIBS ${CUDNN_LIBRARY}) - endif() -endif() - -# setting nvcc arch flags -mshadow_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) -list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) -message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") - -# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or -# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt -if(Boost_VERSION EQUAL 105500) - message(STATUS "Cuda + Boost 1.55: Applying noinline work around") - # avoid warning for CMake >= 2.8.12 - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ") -endif() - -# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc. -foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used) - list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag}) -endforeach() - -# setting default testing device -if(NOT CUDA_TEST_DEVICE) - set(CUDA_TEST_DEVICE -1) -endif() - -mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) -mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) - -# Handle clang/libc++ issue -if(APPLE) - mshadow_detect_darwin_version(OSX_VERSION) - - # OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits - if(OSX_VERSION VERSION_GREATER 10.8) - # enabled by default if and only if CUDA version is less than 7.0 - mshadow_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0)) - endif() -endif() diff --git a/3rdparty/mshadow/cmake/Utils.cmake b/3rdparty/mshadow/cmake/Utils.cmake deleted file mode 100644 index dc464f0092f5..000000000000 --- a/3rdparty/mshadow/cmake/Utils.cmake +++ /dev/null @@ -1,398 +0,0 @@ -################################################################################################ -# Command alias for debugging messages -# Usage: -# dmsg() -function(dmsg) - message(STATUS ${ARGN}) -endfunction() - -################################################################################################ -# Removes duplicates from list(s) -# Usage: -# mshadow_list_unique( [] [...]) -macro(mshadow_list_unique) - foreach(__lst ${ARGN}) - if(${__lst}) - list(REMOVE_DUPLICATES ${__lst}) - endif() - endforeach() -endmacro() - -################################################################################################ -# Clears variables from list -# Usage: -# mshadow_clear_vars() -macro(mshadow_clear_vars) - foreach(_var ${ARGN}) - unset(${_var}) - endforeach() -endmacro() - -################################################################################################ -# Removes duplicates from string -# Usage: -# mshadow_string_unique() -function(mshadow_string_unique __string) - if(${__string}) - set(__list ${${__string}}) - separate_arguments(__list) - list(REMOVE_DUPLICATES __list) - foreach(__e ${__list}) - set(__str "${__str} ${__e}") - endforeach() - set(${__string} ${__str} PARENT_SCOPE) - endif() -endfunction() - -################################################################################################ -# Prints list element per line -# Usage: -# mshadow_print_list() -function(mshadow_print_list) - foreach(e ${ARGN}) - message(STATUS ${e}) - endforeach() -endfunction() - -################################################################################################ -# Function merging lists of compiler flags to single string. -# Usage: -# mshadow_merge_flag_lists(out_variable [] [] ...) -function(mshadow_merge_flag_lists out_var) - set(__result "") - foreach(__list ${ARGN}) - foreach(__flag ${${__list}}) - string(STRIP ${__flag} __flag) - set(__result "${__result} ${__flag}") - endforeach() - endforeach() - string(STRIP ${__result} __result) - set(${out_var} ${__result} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Converts all paths in list to absolute -# Usage: -# mshadow_convert_absolute_paths() -function(mshadow_convert_absolute_paths variable) - set(__dlist "") - foreach(__s ${${variable}}) - get_filename_component(__abspath ${__s} ABSOLUTE) - list(APPEND __list ${__abspath}) - endforeach() - set(${variable} ${__list} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Reads set of version defines from the header file -# Usage: -# mshadow_parse_header( ..) -macro(mshadow_parse_header FILENAME FILE_VAR) - set(vars_regex "") - set(__parnet_scope OFF) - set(__add_cache OFF) - foreach(name ${ARGN}) - if("${name}" STREQUAL "PARENT_SCOPE") - set(__parnet_scope ON) - elseif("${name}" STREQUAL "CACHE") - set(__add_cache ON) - elseif(vars_regex) - set(vars_regex "${vars_regex}|${name}") - else() - set(vars_regex "${name}") - endif() - endforeach() - if(EXISTS "${FILENAME}") - file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" ) - else() - unset(${FILE_VAR}) - endif() - foreach(name ${ARGN}) - if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE") - if(${FILE_VAR}) - if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*") - string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}") - else() - set(${name} "") - endif() - if(__add_cache) - set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE) - elseif(__parnet_scope) - set(${name} "${${name}}" PARENT_SCOPE) - endif() - else() - unset(${name} CACHE) - endif() - endif() - endforeach() -endmacro() - -################################################################################################ -# Reads single version define from the header file and parses it -# Usage: -# mshadow_parse_header_single_define( ) -function(mshadow_parse_header_single_define LIBNAME HDR_PATH VARNAME) - set(${LIBNAME}_H "") - if(EXISTS "${HDR_PATH}") - file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1) - endif() - - if(${LIBNAME}_H) - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}") - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}") - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}") - set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE) - - # append a TWEAK version if it exists: - set(${LIBNAME}_VERSION_TWEAK "") - if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$") - set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE) - endif() - if(${LIBNAME}_VERSION_TWEAK) - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE) - else() - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE) - endif() - endif() -endfunction() - -######################################################################################################## -# An option that the user can select. Can accept condition to control when option is available for user. -# Usage: -# mshadow_option( "doc string" [IF ]) -function(mshadow_option variable description value) - set(__value ${value}) - set(__condition "") - set(__varname "__value") - foreach(arg ${ARGN}) - if(arg STREQUAL "IF" OR arg STREQUAL "if") - set(__varname "__condition") - else() - list(APPEND ${__varname} ${arg}) - endif() - endforeach() - unset(__varname) - if("${__condition}" STREQUAL "") - set(__condition 2 GREATER 1) - endif() - - if(${__condition}) - if("${__value}" MATCHES ";") - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - elseif(DEFINED ${__value}) - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - else() - option(${variable} "${description}" ${__value}) - endif() - else() - unset(${variable} CACHE) - endif() -endfunction() - -################################################################################################ -# Utility macro for comparing two lists. Used for CMake debugging purposes -# Usage: -# mshadow_compare_lists( [description]) -function(mshadow_compare_lists list1 list2 desc) - set(__list1 ${${list1}}) - set(__list2 ${${list2}}) - list(SORT __list1) - list(SORT __list2) - list(LENGTH __list1 __len1) - list(LENGTH __list2 __len2) - - if(NOT ${__len1} EQUAL ${__len2}) - message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}") - endif() - - foreach(__i RANGE 1 ${__len1}) - math(EXPR __index "${__i}- 1") - list(GET __list1 ${__index} __item1) - list(GET __list2 ${__index} __item2) - if(NOT ${__item1} STREQUAL ${__item2}) - message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}") - endif() - endforeach() -endfunction() - -################################################################################################ -# Command for disabling warnings for different platforms (see below for gcc and VisualStudio) -# Usage: -# mshadow_warnings_disable( -Wshadow /wd4996 ..,) -macro(mshadow_warnings_disable) - set(_flag_vars "") - set(_msvc_warnings "") - set(_gxx_warnings "") - - foreach(arg ${ARGN}) - if(arg MATCHES "^CMAKE_") - list(APPEND _flag_vars ${arg}) - elseif(arg MATCHES "^/wd") - list(APPEND _msvc_warnings ${arg}) - elseif(arg MATCHES "^-W") - list(APPEND _gxx_warnings ${arg}) - endif() - endforeach() - - if(NOT _flag_vars) - set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endif() - - if(MSVC AND _msvc_warnings) - foreach(var ${_flag_vars}) - foreach(warning ${_msvc_warnings}) - set(${var} "${${var}} ${warning}") - endforeach() - endforeach() - elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings) - foreach(var ${_flag_vars}) - foreach(warning ${_gxx_warnings}) - if(NOT warning MATCHES "^-Wno-") - string(REPLACE "${warning}" "" ${var} "${${var}}") - string(REPLACE "-W" "-Wno-" warning "${warning}") - endif() - set(${var} "${${var}} ${warning}") - endforeach() - endforeach() - endif() - mshadow_clear_vars(_flag_vars _msvc_warnings _gxx_warnings) -endmacro() - -################################################################################################ -# Helper function get current definitions -# Usage: -# mshadow_get_current_definitions() -function(mshadow_get_current_definitions definitions_var) - get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS) - set(result "") - - foreach(d ${current_definitions}) - list(APPEND result -D${d}) - endforeach() - - mshadow_list_unique(result) - set(${definitions_var} ${result} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function get current includes/definitions -# Usage: -# mshadow_get_current_cflags() -function(mshadow_get_current_cflags cflags_var) - get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES) - mshadow_convert_absolute_paths(current_includes) - mshadow_get_current_definitions(cflags) - - foreach(i ${current_includes}) - list(APPEND cflags "-I${i}") - endforeach() - - mshadow_list_unique(cflags) - set(${cflags_var} ${cflags} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function to parse current linker libs into link directories, libflags and osx frameworks -# Usage: -# mshadow_parse_linker_libs( ) -function(mshadow_parse_linker_libs mshadow_LINKER_LIBS_variable folders_var flags_var frameworks_var) - - set(__unspec "") - set(__debug "") - set(__optimized "") - set(__framework "") - set(__varname "__unspec") - - # split libs into debug, optimized, unspecified and frameworks - foreach(list_elem ${${mshadow_LINKER_LIBS_variable}}) - if(list_elem STREQUAL "debug") - set(__varname "__debug") - elseif(list_elem STREQUAL "optimized") - set(__varname "__optimized") - elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)") - list(APPEND __framework -framework ${CMAKE_MATCH_1}) - else() - list(APPEND ${__varname} ${list_elem}) - set(__varname "__unspec") - endif() - endforeach() - - # attach debug or optimized libs to unspecified according to current configuration - if(CMAKE_BUILD_TYPE MATCHES "Debug") - set(__libs ${__unspec} ${__debug}) - else() - set(__libs ${__unspec} ${__optimized}) - endif() - - set(libflags "") - set(folders "") - - # convert linker libraries list to link flags - foreach(lib ${__libs}) - if(TARGET ${lib}) - list(APPEND folders $) - list(APPEND libflags -l${lib}) - elseif(lib MATCHES "^-l.*") - list(APPEND libflags ${lib}) - elseif(IS_ABSOLUTE ${lib}) - get_filename_component(name_we ${lib} NAME_WE) - get_filename_component(folder ${lib} PATH) - - string(REGEX MATCH "^lib(.*)" __match ${name_we}) - list(APPEND libflags -l${CMAKE_MATCH_1}) - list(APPEND folders ${folder}) - else() - message(FATAL_ERROR "Logic error. Need to update cmake script") - endif() - endforeach() - - mshadow_list_unique(libflags folders) - - set(${folders_var} ${folders} PARENT_SCOPE) - set(${flags_var} ${libflags} PARENT_SCOPE) - set(${frameworks_var} ${__framework} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, .... -# Usage: -# mshadow_detect_darwin_version() -function(mshadow_detect_darwin_version output_var) - if(APPLE) - execute_process(COMMAND /usr/bin/sw_vers -productVersion - RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - - set(${output_var} ${__sw_vers_out} PARENT_SCOPE) - else() - set(${output_var} "" PARENT_SCOPE) - endif() -endfunction() - -################################################################################################ -# Convenient command to setup source group for IDEs that support this feature (VS, XCode) -# Usage: -# caffe_source_group( GLOB[_RECURSE] ) -function(mshadow_source_group group) - cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN}) - if(CAFFE_SOURCE_GROUP_GLOB) - file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB}) - source_group(${group} FILES ${srcs1}) - endif() - - if(CAFFE_SOURCE_GROUP_GLOB_RECURSE) - file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE}) - source_group(${group} FILES ${srcs2}) - endif() -endfunction() \ No newline at end of file diff --git a/3rdparty/mshadow/cmake/mshadow.cmake b/3rdparty/mshadow/cmake/mshadow.cmake deleted file mode 100644 index 1ef76988d8d0..000000000000 --- a/3rdparty/mshadow/cmake/mshadow.cmake +++ /dev/null @@ -1,91 +0,0 @@ -set(mshadow_LINKER_LIBS "") - -set(BLAS "Open" CACHE STRING "Selected BLAS library") -set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL") - -if(DEFINED USE_BLAS) - set(BLAS "${USE_BLAS}") -else() - if(USE_MKL_IF_AVAILABLE) - if(NOT MKL_FOUND) - find_package(MKL) - endif() - if(MKL_FOUND) - set(BLAS "MKL") - endif() - endif() -endif() - -if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas") - find_package(Atlas REQUIRED) - include_directories(SYSTEM ${Atlas_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES}) - add_definitions(-DMSHADOW_USE_CBLAS=1) - add_definitions(-DMSHADOW_USE_MKL=0) -elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") - find_package(OpenBLAS REQUIRED) - include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB}) - add_definitions(-DMSHADOW_USE_CBLAS=1) - add_definitions(-DMSHADOW_USE_MKL=0) -elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl") - find_package(MKL REQUIRED) - include_directories(SYSTEM ${MKL_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES}) - add_definitions(-DMSHADOW_USE_CBLAS=0) - add_definitions(-DMSHADOW_USE_MKL=1) -elseif(BLAS STREQUAL "apple") - find_package(Accelerate REQUIRED) - include_directories(SYSTEM ${Accelerate_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES}) - add_definitions(-DMSHADOW_USE_MKL=0) - add_definitions(-DMSHADOW_USE_CBLAS=1) -endif() - -if(SUPPORT_MSSE2) - add_definitions(-DMSHADOW_USE_SSE=1) -else() - add_definitions(-DMSHADOW_USE_SSE=0) -endif() - -if(NOT DEFINED SUPPORT_F16C AND NOT MSVC) - check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C) - if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - execute_process(COMMAND cat /proc/cpuinfo - COMMAND grep flags - COMMAND grep f16c - OUTPUT_VARIABLE CPU_SUPPORT_F16C) - elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - execute_process(COMMAND sysctl -a - COMMAND grep machdep.cpu.features - COMMAND grep F16C - OUTPUT_VARIABLE CPU_SUPPORT_F16C) - endif() - if(NOT CPU_SUPPORT_F16C) - message("CPU does not support F16C instructions") - endif() - if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C) - set(SUPPORT_F16C TRUE) - endif() -endif() - -if(SUPPORT_F16C) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") -else() - add_definitions(-DMSHADOW_USE_F16C=0) -endif() - -if(USE_CUDA) - find_package(CUDA 5.5 QUIET) - find_cuda_helper_libs(curand) - if(NOT CUDA_FOUND) - message(FATAL_ERROR "-- CUDA is disabled.") - endif() - add_definitions(-DMSHADOW_USE_CUDA=1) - add_definitions(-DMSHADOW_FORCE_STREAM) - include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) - list(APPEND mshadow_LINKER_LIBS ${CUDA_CUDART_LIBRARY} - ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) -else() - add_definitions(-DMSHADOW_USE_CUDA=0) -endif() diff --git a/3rdparty/mshadow/cmake/mshadowUtils.cmake b/3rdparty/mshadow/cmake/mshadowUtils.cmake deleted file mode 100644 index d4b8bfc89b7a..000000000000 --- a/3rdparty/mshadow/cmake/mshadowUtils.cmake +++ /dev/null @@ -1,2 +0,0 @@ -include("${CMAKE_CURRENT_LIST_DIR}/Utils.cmake") - diff --git a/CMakeLists.txt b/CMakeLists.txt index f0779abd06d4..6f3031502668 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,14 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake) include(CMakeDependentOption) #Some things have order. This must be put in front alone option(USE_CUDA "Build with CUDA support" ON) -option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF) +set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture. +Format: Auto | Common | All | LIST(ARCH_AND_PTX ...) +- \"Auto\" detects local machine GPU compute arch at runtime. +- \"Common\" and \"All\" cover common and entire subsets of architectures +- ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX +- NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing +- NUM: Any number. Only those pairs are currently accepted by NVCC though: + 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5") option(USE_NCCL "Use NVidia NCCL with CUDA" OFF) option(USE_OPENCV "Build with OpenCV support" ON) option(USE_OPENMP "Build with Openmp support" ON) @@ -71,28 +78,25 @@ if(USE_TVM_OP) add_definitions(-DMXNET_USE_TVM_OP=1) endif() -if(USE_CUDA AND NOT USE_OLDCMAKECUDA) - message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'") - if( - ( - (${CMAKE_GENERATOR} MATCHES "Visual Studio.*") - OR (${CMAKE_GENERATOR} MATCHES "Xcode.*") - OR (${CMAKE_GENERATOR} STREQUAL "Unix Makefiles") - ) AND ( - (${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} VERSION_EQUAL "3.9.0") - ) - ) - set(FIRST_CUDA TRUE) - project(mxnet C CXX CUDA) - else() - set(FIRST_CUDA FALSE) - set(USE_OLDCMAKECUDA TRUE) - project(mxnet C CXX) +message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'") +project(mxnet C CXX) +if(USE_CUDA) + cmake_minimum_required(VERSION 3.13.2) # CUDA 10 (Turing) detection available starting 3.13.2 + enable_language(CUDA) + set(CMAKE_CUDA_STANDARD 11) + include(CheckCXXCompilerFlag) + if(USE_CXX14_IF_AVAILABLE) + check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) + if (SUPPORT_CXX14) + set(CMAKE_CUDA_STANDARD 14) + endif() endif() -else() - project(mxnet C CXX) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) endif() +if(UNIX) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +endif() if(MSVC) set(SYSTEM_ARCHITECTURE x86_64) @@ -129,7 +133,7 @@ if(MSVC) endif() set(CMAKE_C_FLAGS "/MP") set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj") -else(MSVC) +else() include(CheckCXXCompilerFlag) if(USE_CXX14_IF_AVAILABLE) check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) @@ -142,6 +146,7 @@ else(MSVC) check_cxx_compiler_flag("-msse3" SUPPORT_MSSE3) check_cxx_compiler_flag("-msse2" SUPPORT_MSSE2) else() + set(SUPPORT_MSSE3 FALSE) set(SUPPORT_MSSE2 FALSE) endif() # For cross complication, turn off flag if target device does not support it @@ -158,7 +163,6 @@ else(MSVC) else() add_definitions(-DMSHADOW_USE_F16C=0) endif() - set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas -Wno-sign-compare") if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-braced-scalar-init") @@ -176,8 +180,12 @@ else(MSVC) endif() if(SUPPORT_MSSE3) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") + add_definitions(-DMSHADOW_USE_SSE=1) elseif(SUPPORT_MSSE2) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2") + add_definitions(-DMSHADOW_USE_SSE=1) + else() + add_definitions(-DMSHADOW_USE_SSE=0) endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_FLAGS}") if(SUPPORT_CXX14) @@ -290,45 +298,6 @@ endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) -if(USE_CUDA) - find_package(CUDA REQUIRED) - add_definitions(-DMSHADOW_USE_CUDA=1) - if(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA)) - if(NOT CUDA_TOOLSET) - set(CUDA_TOOLSET "${CUDA_VERSION_STRING}") - endif() - else() - set(FIRST_CUDA FALSE) - endif() - if(USE_NCCL) - find_package(NCCL) - if(NCCL_FOUND) - include_directories(${NCCL_INCLUDE_DIRS}) - list(APPEND mxnet_LINKER_LIBS ${NCCL_LIBRARIES}) - else() - message(WARNING "Could not find NCCL libraries") - endif() - endif() - if(UNIX) - find_package(NVTX) - if(NVTX_FOUND) - include_directories(${NVTX_INCLUDE_DIRS}) - list(APPEND mxnet_LINKER_LIBS ${NVTX_LIBRARIES}) - add_definitions(-DMXNET_USE_NVTX=1) - else() - message(WARNING "Could not find NVTX libraries") - endif() - endif() -else() - add_definitions(-DMSHADOW_USE_CUDA=0) -endif() - -if(NCCL_FOUND) - add_definitions(-DMXNET_USE_NCCL=1) -else() - add_definitions(-DMXNET_USE_NCCL=0) -endif() - if (USE_INT64_TENSOR_SIZE) message(STATUS "Using 64-bit integer for tensor size") add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=1) @@ -337,21 +306,6 @@ else() endif() include(cmake/ChooseBlas.cmake) -if(USE_CUDA AND FIRST_CUDA) - include(3rdparty/mshadow/cmake/Utils.cmake) - include(cmake/FirstClassLangCuda.cmake) - include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) -else() - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake) - include(3rdparty/mshadow/cmake/mshadow.cmake) - include(3rdparty/mshadow/cmake/Utils.cmake) - include(3rdparty/mshadow/cmake/Cuda.cmake) - else() - include(mshadowUtils) - include(Cuda) - include(mshadow) - endif() -endif() if(USE_ASAN) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer -fsanitize=address") @@ -534,9 +488,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dmlc-core/cmake) add_subdirectory("3rdparty/dmlc-core") endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake) - add_subdirectory("3rdparty/mshadow") -endif() +add_subdirectory("3rdparty/mshadow") FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h" "include/*.h") FILE(GLOB_RECURSE CUDA "src/*.cu" "src/*.cuh") @@ -636,61 +588,55 @@ if(MSVC) endif() if(USE_CUDA) - if(FIRST_CUDA) - mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH) - string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}") - set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}") - list(APPEND mxnet_LINKER_LIBS cublas cufft cusolver curand) - if(ENABLE_CUDA_RTC) - list(APPEND mxnet_LINKER_LIBS nvrtc cuda) - add_definitions(-DMXNET_ENABLE_CUDA_RTC=1) - endif() - list(APPEND SOURCE ${CUDA}) - add_definitions(-DMXNET_USE_CUDA=1) - link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64) - else() - list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES}) - # define preprocessor macro so that we will not include the generated forcelink header - if(ENABLE_CUDA_RTC) + # CUDA_SELECT_NVCC_ARCH_FLAGS is not deprecated, though part of deprecated + # FindCUDA https://gitlab.kitware.com/cmake/cmake/issues/19199 + include(${CMAKE_ROOT}/Modules/FindCUDA/select_compute_arch.cmake) + CUDA_SELECT_NVCC_ARCH_FLAGS(CUDA_ARCH_FLAGS ${MXNET_CUDA_ARCH}) + message("-- CUDA: Using the following NVCC architecture flags ${CUDA_ARCH_FLAGS}") + string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}") + string(APPEND CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}") + + find_package(CUDAToolkit REQUIRED cublas cufft cusolver curand + OPTIONAL_COMPONENTS nvToolsExt nvrtc) + + list(APPEND mxnet_LINKER_LIBS CUDA::cudart CUDA::cublas CUDA::cufft CUDA::cusolver CUDA::curand) + if(ENABLE_CUDA_RTC) + if(CUDA_nvrtc_LIBRARY) + list(APPEND mxnet_LINKER_LIBS CUDA::nvrtc cuda) add_definitions(-DMXNET_ENABLE_CUDA_RTC=1) + else() + message(FATAL_ERROR "ENABLE_CUDA_RTC=ON, but failed to find NVRTC. CMake will exit." ) endif() - # Create '.cmake' files for cuda compiles given definitions added thus far - mshadow_cuda_compile(cuda_objs ${CUDA}) - if(MSVC) - if(ENABLE_CUDA_RTC) - FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) - set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") - list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY}) - endif() - FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator - FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver - link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/win32) - link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) - else(MSVC) - list(APPEND mxnet_LINKER_LIBS cufft cusolver) - if(ENABLE_CUDA_RTC) - list(APPEND mxnet_LINKER_LIBS nvrtc cuda) - endif() - link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + list(APPEND SOURCE ${CUDA}) + add_definitions(-DMXNET_USE_CUDA=1) + add_definitions(-DMSHADOW_USE_CUDA=1) + add_definitions(-DMSHADOW_FORCE_STREAM) + + if(USE_NCCL) + find_package(NCCL) + if(NCCL_FOUND) + include_directories(${NCCL_INCLUDE_DIRS}) + list(APPEND mxnet_LINKER_LIBS ${NCCL_LIBRARIES}) + add_definitions(-DMXNET_USE_NCCL=1) + else() + add_definitions(-DMXNET_USE_NCCL=0) + message(WARNING "Could not find NCCL libraries") endif() - list(APPEND SOURCE ${cuda_objs} ${CUDA}) - add_definitions(-DMXNET_USE_CUDA=1) - if(CUDA_LIBRARY_PATH) - if(IS_CONTAINER_BUILD) - # In case of building on a production-like build container which may not have Cuda installed - if(NOT CMAKE_SYSTEM_HAS_CUDA) - # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine) - # so use the stub cuda driver shared library - if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so) - link_directories(${CUDA_LIBRARY_PATH}/stubs) - endif() - endif() - endif() + endif() + if(UNIX) + if(CUDA_nvToolsExt_LIBRARY) + list(APPEND mxnet_LINKER_LIBS CUDA::nvToolsExt) + add_definitions(-DMXNET_USE_NVTX=1) + else() + message("Building without NVTX support.") endif() - endif() + endif() + + include_directories(${CUDAToolkit_INCLUDE_DIRS}) + link_directories(${CUDAToolkit_LIBRARY_DIR}) +else() + add_definitions(-DMSHADOW_USE_CUDA=0) endif() # unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well @@ -738,11 +684,9 @@ else() set_target_properties(sample_lib PROPERTIES PREFIX "lib") endif() -if(USE_CUDA) - if(FIRST_CUDA AND MSVC) - target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MTd -Gy>") - target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MT -Gy>") - endif() +if(USE_CUDA AND MSVC) + target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MTd -Gy>") + target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MT -Gy>") endif() if(USE_DIST_KVSTORE) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ps-lite/CMakeLists.txt) @@ -780,8 +724,8 @@ if(USE_TVM_OP) endif() set(TVM_OP_COMPILE_OPTIONS "-o${CMAKE_CURRENT_BINARY_DIR}/libtvmop.so" "--config" "${CMAKE_CURRENT_BINARY_DIR}/tvmop.conf") - if(CUDA_ARCH_BIN) - set(TVM_OP_COMPILE_OPTIONS "${TVM_OP_COMPILE_OPTIONS}" "--cuda-arch" "${CUDA_ARCH_BIN}") + if(USE_CUDA) + set(TVM_OP_COMPILE_OPTIONS "${TVM_OP_COMPILE_OPTIONS}" "--cuda-arch" "\"${CUDA_ARCH_FLAGS}\"") endif() add_custom_command(TARGET mxnet POST_BUILD COMMAND ${CMAKE_COMMAND} -E env diff --git a/ci/build_windows.py b/ci/build_windows.py index 5839e8d793d1..b334b68fef2c 100755 --- a/ci/build_windows.py +++ b/ci/build_windows.py @@ -114,9 +114,7 @@ class BuildFlavour(Enum): '-DUSE_BLAS=open ' '-DUSE_LAPACK=ON ' '-DUSE_DIST_KVSTORE=OFF ' - '-DCUDA_ARCH_NAME=Manual ' - '-DCUDA_ARCH_BIN=52 ' - '-DCUDA_ARCH_PTX=52 ' + '-DMXNET_CUDA_ARCH="5.2" ' '-DCMAKE_CXX_FLAGS="/FS /MD /O2 /Ob2" ' '-DUSE_MKL_IF_AVAILABLE=OFF ' '-DCMAKE_BUILD_TYPE=Release') @@ -130,9 +128,7 @@ class BuildFlavour(Enum): '-DUSE_BLAS=open ' '-DUSE_LAPACK=ON ' '-DUSE_DIST_KVSTORE=OFF ' - '-DCUDA_ARCH_NAME=Manual ' - '-DCUDA_ARCH_BIN=52 ' - '-DCUDA_ARCH_PTX=52 ' + '-DMXNET_CUDA_ARCH="5.2" ' '-DUSE_MKLDNN=ON ' '-DCMAKE_CXX_FLAGS="/FS /MD /O2 /Ob2" ' '-DCMAKE_BUILD_TYPE=Release') diff --git a/ci/docker/install/ubuntu_core.sh b/ci/docker/install/ubuntu_core.sh index bd5d1f6fdf6f..70bc285d163b 100755 --- a/ci/docker/install/ubuntu_core.sh +++ b/ci/docker/install/ubuntu_core.sh @@ -53,7 +53,7 @@ apt-get install -y \ wget # Use libturbojpeg package as it is correctly compiled with -fPIC flag -# https://github.com/HaxeFoundation/hashlink/issues/147 +# https://github.com/HaxeFoundation/hashlink/issues/147 ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0.1.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index e078b2a8f89c..328a008c07a5 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -25,7 +25,7 @@ set -ex NOSE_COVERAGE_ARGUMENTS="--with-coverage --cover-inclusive --cover-xml --cover-branches --cover-package=mxnet" NOSE_TIMER_ARGUMENTS="--with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error" CI_CUDA_COMPUTE_CAPABILITIES="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_70,code=sm_70" -CI_CMAKE_CUDA_ARCH_BIN="52,70" +CI_CMAKE_CUDA_ARCH="5.2 7.0" clean_repo() { set -ex @@ -753,8 +753,7 @@ build_ubuntu_gpu_tensorrt() { -DUSE_OPENMP=0 \ -DUSE_MKLDNN=0 \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DCUDA_ARCH_NAME=Manual \ - -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ + -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -G Ninja \ /work/mxnet @@ -872,8 +871,7 @@ build_ubuntu_gpu_cmake_mkldnn() { -DPython3_EXECUTABLE=/usr/bin/python3 \ -DUSE_MKLML_MKL=1 \ -DCMAKE_BUILD_TYPE=Release \ - -DCUDA_ARCH_NAME=Manual \ - -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ + -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -G Ninja \ /work/mxnet @@ -898,8 +896,7 @@ build_ubuntu_gpu_cmake() { -DUSE_MKLDNN=OFF \ -DUSE_DIST_KVSTORE=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DCUDA_ARCH_NAME=Manual \ - -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ + -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DBUILD_CYTHON_MODULES=1 \ -G Ninja \ /work/mxnet @@ -925,8 +922,7 @@ build_ubuntu_gpu_cmake_no_tvm_op() { -DUSE_MKLDNN=OFF \ -DUSE_DIST_KVSTORE=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DCUDA_ARCH_NAME=Manual \ - -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ + -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DBUILD_CYTHON_MODULES=1 \ -G Ninja \ /work/mxnet @@ -972,8 +968,7 @@ build_ubuntu_gpu_large_tensor() { -DUSE_MKLDNN=OFF \ -DUSE_DIST_KVSTORE=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DCUDA_ARCH_NAME=Manual \ - -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ + -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_INT64_TENSOR_SIZE=ON \ -G Ninja \ /work/mxnet diff --git a/cmake/BuildTVM.cmake b/cmake/BuildTVM.cmake index 4bb749552f01..2c2f573cddbd 100644 --- a/cmake/BuildTVM.cmake +++ b/cmake/BuildTVM.cmake @@ -98,6 +98,21 @@ set(USE_RANDOM OFF) # Whether use NNPack set(USE_NNPACK OFF) +# First-class Cuda in modern CMake provides us with CMAKE_CUDA_COMPILER But TVM +# uses the deprecated findCUDA functionality which requires +# CUDA_TOOLKIT_ROOT_DIR We follow the FindCUDAToolkit.cmake logic to compute +# CUDA_TOOLKIT_ROOT_DIR for TVM https://gitlab.kitware.com/cmake/cmake/merge_requests/4093/ +if(USE_CUDA) + get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) + set(CUDA_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + unset(cuda_dir) + get_filename_component(CUDA_TOOLKIT_ROOT_DIR ${CUDA_BIN_DIR} DIRECTORY ABSOLUTE) + + message("CMAKE_CUDA_COMPILER: ${CMAKE_CUDA_COMPILER}") + message("Inferred CUDA_TOOLKIT_ROOT_DIR for TVM as: ${CUDA_TOOLKIT_ROOT_DIR}") + set(USE_CUDA ${CUDA_TOOLKIT_ROOT_DIR}) +endif() + # Whether use cuBLAS set(USE_CUBLAS OFF) diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake deleted file mode 100644 index 0eca1aff78d4..000000000000 --- a/cmake/FirstClassLangCuda.cmake +++ /dev/null @@ -1,249 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#this file is CUDA help function with CMAKE first class CUDA - -include(CheckCXXCompilerFlag) -check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) -if(USE_CXX14_IF_AVAILABLE) - check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) -endif() - - -################################################################################################ -# A function for automatic detection of GPUs installed (if autodetection is enabled) -# Usage: -# mshadow_detect_installed_gpus(out_variable) -function(mshadow_detect_installed_gpus out_variable) - if(NOT CUDA_gpu_detect_output) - set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - - file(WRITE ${__cufile} "" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - enable_language(CUDA) - - try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${__cufile} - COMPILE_OUTPUT_VARIABLE __compile_out - RUN_OUTPUT_VARIABLE __nvcc_out) - - if(__nvcc_res EQUAL 0 AND __compile_result) - # nvcc outputs text containing line breaks when building with MSVC. - # The line below prevents CMake from inserting a variable with line - # breaks in the cache - string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") - string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") - set(CUDA_gpu_detect_output ${__nvcc_out}) - else() - message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out} ${__compile_out}") - endif() - endif() - - if(NOT CUDA_gpu_detect_output) - message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mxnet_known_gpu_archs}).") - set(${out_variable} ${mxnet_known_gpu_archs} PARENT_SCOPE) - else() - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) - endif() -endfunction() - - -# This list will be used for CUDA_ARCH_NAME = All option -set(CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell") - -# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) -set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") - -if (CUDA_TOOLSET VERSION_GREATER "6.5") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2" "3.7") -endif () - -if (CUDA_TOOLSET VERSION_GREATER "7.5") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX") -else() - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") -endif () - -if (CUDA_TOOLSET VERSION_GREATER "9.0") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Volta") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0") -endif() - -if (CUDA_TOOLSET VERSION_GREATER "10.0") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Turing") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5") -endif() - -################################################################################################ -# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME -# Usage: -# mshadow_select_nvcc_arch_flags(out_variable) -function(mshadow_select_nvcc_arch_flags out_variable) - - set(CUDA_ARCH_LIST "Auto" CACHE STRING "Select target NVIDIA GPU achitecture.") - set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "Auto" "All" "Common" ${CUDA_KNOWN_GPU_ARCHITECTURES} ) - mark_as_advanced(CUDA_ARCH_NAME) - - - if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) - set(CUDA_ARCH_LIST "All") - endif() - - set(cuda_arch_bin) - set(cuda_arch_ptx) - - message(STATUS " CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}") - if("${CUDA_ARCH_LIST}" STREQUAL "All") - set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) - elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") - set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) - elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto" OR "${CUDA_ARCH_LIST}" STREQUAL "") - set(mxnet_known_gpu_archs ${CUDA_COMMON_GPU_ARCHITECTURES}) - mshadow_detect_installed_gpus(CUDA_ARCH_LIST) - message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") - endif() - - # Now process the list and look for names - string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") - list(REMOVE_DUPLICATES CUDA_ARCH_LIST) - foreach(arch_name ${CUDA_ARCH_LIST}) - set(arch_bin) - set(arch_ptx) - set(add_ptx FALSE) - # Check to see if we are compiling PTX - if(arch_name MATCHES "(.*)\\+PTX$") - set(add_ptx TRUE) - set(arch_name ${CMAKE_MATCH_1}) - endif() - if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") - set(arch_bin ${CMAKE_MATCH_1}) - set(arch_ptx ${arch_bin}) - else() - # Look for it in our list of known architectures - if(${arch_name} STREQUAL "Fermi") - if (CUDA_TOOLSET VERSION_LESS "8.0") - set(arch_bin 2.0 "2.1(2.0)") - endif() - elseif(${arch_name} STREQUAL "Kepler+Tegra") - set(arch_bin 3.2) - elseif(${arch_name} STREQUAL "Kepler+Tesla") - set(arch_bin 3.7) - elseif(${arch_name} STREQUAL "Kepler") - set(arch_bin 3.0 3.5) - set(arch_ptx 3.5) - elseif(${arch_name} STREQUAL "Maxwell+Tegra") - set(arch_bin 5.3) - elseif(${arch_name} STREQUAL "Maxwell") - set(arch_bin 5.0 5.2) - set(arch_ptx 5.2) - elseif(${arch_name} STREQUAL "Pascal") - set(arch_bin 6.0 6.1) - set(arch_ptx 6.1) - elseif(${arch_name} STREQUAL "Volta") - set(arch_bin 7.0) - set(arch_ptx 7.0) - elseif(${arch_name} STREQUAL "Turing") - set(arch_bin 7.5) - set(arch_ptx 7.5) - else() - message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") - endif() - endif() - list(APPEND cuda_arch_bin ${arch_bin}) - if(add_ptx) - if (NOT arch_ptx) - set(arch_ptx ${arch_bin}) - endif() - list(APPEND cuda_arch_ptx ${arch_ptx}) - endif() - endforeach() - - # remove dots and convert to lists - string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") - string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") - string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") - string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") - - if(cuda_arch_bin) - list(REMOVE_DUPLICATES cuda_arch_bin) - endif() - if(cuda_arch_ptx) - list(REMOVE_DUPLICATES cuda_arch_ptx) - endif() - - message(STATUS "cuda arch bin: ${cuda_arch_bin}") - message(STATUS "cuda arch ptx: ${cuda_arch_ptx}") - set(nvcc_flags "") - set(nvcc_archs_readable "") - - # Tell NVCC to add binaries for the specified GPUs - foreach(arch ${cuda_arch_bin}) - if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") - # User explicitly specified ARCH for the concrete CODE - list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) - list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) - else() - # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE - list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) - list(APPEND nvcc_archs_readable sm_${arch}) - endif() - endforeach() - - # Tell NVCC to add PTX intermediate code for the specified architectures - foreach(arch ${cuda_arch_ptx}) - list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) - list(APPEND nvcc_archs_readable compute_${arch}) - endforeach() - - if(NOT MSVC) - if(SUPPORT_CXX14) - list(APPEND nvcc_flags "-std=c++14") - elseif(SUPPORT_CXX11) - list(APPEND nvcc_flags "-std=c++11") - endif() - endif() - - string (REPLACE " " ";" CMAKE_CXX_FLAGS_STR "${CMAKE_CXX_FLAGS}") - foreach(_flag ${CMAKE_CXX_FLAGS_STR}) - # Remove -std=c++XX flags - if(NOT "${_flag}" MATCHES "-std=.+") - # Remove link flags - if(NOT "${_flag}" MATCHES "-Wl,.+") - list(APPEND nvcc_flags "-Xcompiler ${_flag}") - endif() - endif() - endforeach() - - string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") - set(${out_variable} ${nvcc_flags} PARENT_SCOPE) - set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) -endfunction() - diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake new file mode 100644 index 000000000000..1d9af2f548d0 --- /dev/null +++ b/cmake/Modules/FindCUDAToolkit.cmake @@ -0,0 +1,833 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Original license notice, prior to modification by MXNet Contributors: +# +# Copyright 2000-2019 Kitware, Inc. and Contributors +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Kitware, Inc. nor the names of Contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#[=======================================================================[.rst: +FindCUDAToolkit +--------------- + +This script locates the NVIDIA CUDA toolkit and the associated libraries, but +does not require the ``CUDA`` language be enabled for a given project. This +module does not search for the NVIDIA CUDA Samples. + +Search Behavior +^^^^^^^^^^^^^^^ + +Finding the CUDA Toolkit requires finding the ``nvcc`` executable, which is +searched for in the following order: + +1. If the ``CUDA`` language has been enabled we will use the directory + containing the compiler as the first search location for ``nvcc``. + +2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g., + ``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it + will be searched. If both an environment variable **and** a + configuration variable are specified, the *configuration* variable takes + precedence. + + The directory specified here must be such that the executable ``nvcc`` can be + found underneath the directory specified by ``CUDAToolkit_ROOT``. If + ``CUDAToolkit_ROOT`` is specified, but no ``nvcc`` is found underneath, this + package is marked as **not** found. No subsequent search attempts are + performed. + +3. If the CUDA_PATH environment variable is defined, it will be searched. + +4. The user's path is searched for ``nvcc`` using :command:`find_program`. If + this is found, no subsequent search attempts are performed. Users are + responsible for ensuring that the first ``nvcc`` to show up in the path is + the desired path in the event that multiple CUDA Toolkits are installed. + +5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is + used. No subsequent search attempts are performed. No default symbolic link + location exists for the Windows platform. + +6. The platform specific default install locations are searched. If exactly one + candidate is found, this is used. The default CUDA Toolkit install locations + searched are: + + +-------------+-------------------------------------------------------------+ + | Platform | Search Pattern | + +=============+=============================================================+ + | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Other Unix | ``/usr/local/cuda-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` | + +-------------+-------------------------------------------------------------+ + + Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as + ``/usr/local/cuda-9.0`` or + ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0`` + + .. note:: + + When multiple CUDA Toolkits are installed in the default location of a + system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0`` + exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this + package is marked as **not** found. + + There are too many factors involved in making an automatic decision in + the presence of multiple CUDA Toolkits being installed. In this + situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or + (2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for + :command:`find_program` to find. + +Options +^^^^^^^ + +``VERSION`` + If specified, describes the version of the CUDA Toolkit to search for. + +``REQUIRED`` + If specified, configuration will error if a suitable CUDA Toolkit is not + found. + +``QUIET`` + If specified, the search for a suitable CUDA Toolkit will not produce any + messages. + +``EXACT`` + If specified, the CUDA Toolkit is considered found only if the exact + ``VERSION`` specified is recovered. + +Imported targets +^^^^^^^^^^^^^^^^ + +An :ref:`imported target ` named ``CUDA::toolkit`` is provided. + +This module defines :prop_tgt:`IMPORTED` targets for each +of the following libraries that are part of the CUDAToolkit: + +- :ref:`CUDA Runtime Library` +- :ref:`CUDA Driver Library` +- :ref:`cuBLAS` +- :ref:`cuFFT` +- :ref:`cuRAND` +- :ref:`cuSOLVER` +- :ref:`cuSPARSE` +- :ref:`NPP` +- :ref:`nvBLAS` +- :ref:`nvGRAPH` +- :ref:`nvJPEG` +- :ref:`nvidia-ML` +- :ref:`nvRTC` +- :ref:`nvToolsExt` +- :ref:`OpenCL` +- :ref:`cuLIBOS` + +.. _`cuda_toolkit_rt_lib`: + +CUDA Runtime Library +"""""""""""""""""""" + +The CUDA Runtime library (cudart) are what most applications will typically +need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. +They are an explicit dependency of almost every library. + +Targets Created: + +- ``CUDA::cudart`` +- ``CUDA::cudart_static`` + +.. _`cuda_toolkit_driver_lib`: + +CUDA Driver Library +"""""""""""""""""""" + +The CUDA Driver library (cuda) are used by applications that use calls +such as `cuMemAlloc`, and `cuMemFree`. This is generally used by advanced + + +Targets Created: + +- ``CUDA::cuda_driver`` +- ``CUDA::cuda_driver`` + +.. _`cuda_toolkit_cuBLAS`: + +cuBLAS +"""""" + +The `cuBLAS `_ library. + +Targets Created: + +- ``CUDA::cublas`` +- ``CUDA::cublas_static`` + +.. _`cuda_toolkit_cuFFT`: + +cuFFT +""""" + +The `cuFFT `_ library. + +Targets Created: + +- ``CUDA::cufft`` +- ``CUDA::cufftw`` +- ``CUDA::cufft_static`` +- ``CUDA::cufftw_static`` + +cuRAND +"""""" + +The `cuRAND `_ library. + +Targets Created: + +- ``CUDA::curand`` +- ``CUDA::curand_static`` + +.. _`cuda_toolkit_cuSOLVER`: + +cuSOLVER +"""""""" + +The `cuSOLVER `_ library. + +Targets Created: + +- ``CUDA::cusolver`` +- ``CUDA::cusolver_static`` + +.. _`cuda_toolkit_cuSPARSE`: + +cuSPARSE +"""""""" + +The `cuSPARSE `_ library. + +Targets Created: + +- ``CUDA::cusparse`` +- ``CUDA::cusparse_static`` + +.. _`cuda_toolkit_NPP`: + +NPP +""" + +The `NPP `_ libraries. + +Targets Created: + +- `nppc`: + + - ``CUDA::nppc`` + - ``CUDA::nppc_static`` + +- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h` + + - ``CUDA::nppial`` + - ``CUDA::nppial_static`` + +- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h` + + - ``CUDA::nppicc`` + - ``CUDA::nppicc_static`` + +- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h` + + - ``CUDA::nppicom`` + - ``CUDA::nppicom_static`` + +- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h` + + - ``CUDA::nppidei`` + - ``CUDA::nppidei_static`` + +- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h` + + - ``CUDA::nppif`` + - ``CUDA::nppif_static`` + +- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h` + + - ``CUDA::nppig`` + - ``CUDA::nppig_static`` + +- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h` + + - ``CUDA::nppim`` + - ``CUDA::nppim_static`` + +- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h` + + - ``CUDA::nppist`` + - ``CUDA::nppist_static`` + +- `nppisu`: Memory support functions in `nppi_support_functions.h` + + - ``CUDA::nppisu`` + - ``CUDA::nppisu_static`` + +- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h` + + - ``CUDA::nppitc`` + - ``CUDA::nppitc_static`` + +- `npps`: + + - ``CUDA::npps`` + - ``CUDA::npps_static`` + +.. _`cuda_toolkit_nvBLAS`: + +nvBLAS +"""""" + +The `nvBLAS `_ libraries. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvblas`` + +.. _`cuda_toolkit_nvGRAPH`: + +nvGRAPH +""""""" + +The `nvGRAPH `_ library. + +Targets Created: + +- ``CUDA::nvgraph`` +- ``CUDA::nvgraph_static`` + + +.. _`cuda_toolkit_nvJPEG`: + +nvJPEG +"""""" + +The `nvJPEG `_ library. +Introduced in CUDA 10. + +Targets Created: + +- ``CUDA::nvjpeg`` +- ``CUDA::nvjpeg_static`` + +.. _`cuda_toolkit_nvRTC`: + +nvRTC +""""" + +The `nvRTC `_ (Runtime Compilation) library. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvrtc`` + +.. _`cuda_toolkit_nvml`: + +nvidia-ML +""""""""" + +The `NVIDIA Management Library `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvml`` + +.. _`cuda_toolkit_opencl`: + +.. _`cuda_toolkit_nvToolsExt`: + +nvToolsExt +"""""""""" + +The `NVIDIA Tools Extension `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvToolsExt`` + +OpenCL +"""""" + +The `NVIDIA OpenCL Library `_. +This is a shared library only. + +Targets Created: + +- ``CUDA::OpenCL`` + +.. _`cuda_toolkit_cuLIBOS`: + +cuLIBOS +""""""" + +The cuLIBOS library is a backend thread abstraction layer library which is +static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``, +``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP +libraries all automatically have this dependency linked. + +Target Created: + +- ``CUDA::culibos`` + +**Note**: direct usage of this target by consumers should not be necessary. + +.. _`cuda_toolkit_cuRAND`: + + + +Result variables +^^^^^^^^^^^^^^^^ + +``CUDAToolkit_FOUND`` + A boolean specifying whether or not the CUDA Toolkit was found. + +``CUDAToolkit_VERSION`` + The exact version of the CUDA Toolkit found (as reported by + ``nvcc --version``). + +``CUDAToolkit_VERSION_MAJOR`` + The major version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_MAJOR`` + The minor version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_PATCH`` + The patch version of the CUDA Toolkit. + +``CUDAToolkit_BIN_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + executable ``nvcc``. + +``CUDAToolkit_INCLUDE_DIRS`` + The path to the CUDA Toolkit ``include`` folder containing the header files + required to compile a project linking against CUDA. + +``CUDAToolkit_LIBRARY_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + Runtime library ``cudart``. + +``CUDAToolkit_NVCC_EXECUTABLE`` + The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may + **not** be the same as + :variable:`CMAKE_CUDA_COMPILER _COMPILER>`. ``nvcc`` must be + found to determine the CUDA Toolkit version as well as determining other + features of the Toolkit. This variable is set for the convenience of + modules that depend on this one. + + +#]=======================================================================] + +# NOTE: much of this was simply extracted from FindCUDA.cmake. + +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# Copyright (c) 2007-2009 +# Scientific Computing and Imaging Institute, University of Utah +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### + +if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR) + get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) + # use the already detected cuda compiler + set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") + unset(cuda_dir) +endif() + +# Try language- or user-provided path first. +if(CUDAToolkit_BIN_DIR) + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${CUDAToolkit_BIN_DIR} + NO_DEFAULT_PATH + ) +endif() + +# Search using CUDAToolkit_ROOT +find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ENV CUDA_PATH + PATH_SUFFIXES bin +) + +# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error. +if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT})) + # Declare error messages now, print later depending on find_package args. + set(fail_base "Could not find nvcc executable in path specified by") + set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}") + set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}") + + if (CUDAToolkit_FIND_REQUIRED) + if (DEFINED CUDAToolkit_ROOT) + message(FATAL_ERROR ${cuda_root_fail}) + elseif (DEFINED ENV{CUDAToolkit_ROOT}) + message(FATAL_ERROR ${env_cuda_root_fail}) + endif() + else() + if (NOT CUDAToolkit_FIND_QUIETLY) + if (DEFINED CUDAToolkit_ROOT) + message(STATUS ${cuda_root_fail}) + elseif (DEFINED ENV{CUDAToolkit_ROOT}) + message(STATUS ${env_cuda_root_fail}) + endif() + endif() + set(CUDAToolkit_FOUND FALSE) + unset(fail_base) + unset(cuda_root_fail) + unset(env_cuda_root_fail) + return() + endif() +endif() + +# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults. +# +# - Linux: /usr/local/cuda-X.Y +# - macOS: /Developer/NVIDIA/CUDA-X.Y +# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y +# +# We will also search the default symlink location /usr/local/cuda first since +# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked +# directory is the desired location. +if (NOT CUDAToolkit_NVCC_EXECUTABLE) + if (UNIX) + if (NOT APPLE) + set(platform_base "/usr/local/cuda-") + else() + set(platform_base "/Developer/NVIDIA/CUDA-") + endif() + else() + set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + endif() + + # Build out a descending list of possible cuda installations, e.g. + file(GLOB possible_paths "${platform_base}*") + # Iterate the glob results and create a descending list. + set(possible_versions) + foreach (p ${possible_paths}) + # Extract version number from end of string + string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p}) + if (IS_DIRECTORY ${p} AND p_version) + list(APPEND possible_versions ${p_version}) + endif() + endforeach() + + # Cannot use list(SORT) because that is alphabetical, we need numerical. + # NOTE: this is not an efficient sorting strategy. But even if a user had + # every possible version of CUDA installed, this wouldn't create any + # significant overhead. + set(versions) + foreach (v ${possible_versions}) + list(LENGTH versions num_versions) + # First version, nothing to compare with so just append. + if (num_versions EQUAL 0) + list(APPEND versions ${v}) + else() + # Loop through list. Insert at an index when comparison is + # VERSION_GREATER since we want a descending list. Duplicates will not + # happen since this came from a glob list of directories. + set(i 0) + set(early_terminate FALSE) + while (i LESS num_versions) + list(GET versions ${i} curr) + if (v VERSION_GREATER curr) + list(INSERT versions ${i} ${v}) + set(early_terminate TRUE) + break() + endif() + math(EXPR i "${i} + 1") + endwhile() + # If it did not get inserted, place it at the end. + if (NOT early_terminate) + list(APPEND versions ${v}) + endif() + endif() + endforeach() + + # With a descending list of versions, populate possible paths to search. + set(search_paths) + foreach (v ${versions}) + list(APPEND search_paths "${platform_base}${v}") + endforeach() + + # Force the global default /usr/local/cuda to the front on Unix. + if (UNIX) + list(INSERT search_paths 0 "/usr/local/cuda") + endif() + + # Now search for nvcc again using the platform default search paths. + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${search_paths} + PATH_SUFFIXES bin + ) + + # We are done with these variables now, cleanup for caller. + unset(platform_base) + unset(possible_paths) + unset(possible_versions) + unset(versions) + unset(i) + unset(early_terminate) + unset(search_paths) + + if (NOT CUDAToolkit_NVCC_EXECUTABLE) + if (CUDAToolkit_FIND_REQUIRED) + message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.") + elseif(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.") + endif() + + set(CUDAToolkit_FOUND FALSE) + return() + endif() +endif() + +if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) + get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) + set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + unset(cuda_dir) +endif() + +if(CUDAToolkit_NVCC_EXECUTABLE AND + CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) + # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value + # This if statement will always match, but is used to provide variables for MATCH 1,2,3... + if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") + endif() +else() + # Compute the version by invoking nvcc + execute_process (COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) +endif() + + +get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + +# Now that we have the real ROOT_DIR, find components inside it. +list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + +# Find the include/ directory +find_path(CUDAToolkit_INCLUDE_DIR + NAMES cuda_runtime.h +) + +# And find the CUDA Runtime Library libcudart +find_library(CUDA_CUDART + NAMES cudart + PATH_SUFFIXES lib64 lib/x64 +) +if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cudart library.") +endif() + +unset(CUDAToolkit_ROOT_DIR) +list(REMOVE_AT CMAKE_PREFIX_PATH -1) + +#----------------------------------------------------------------------------- +# Perform version comparison and validate all required variables are set. +# MXNET NOTE: This differs from CMake source by ${CMAKE_CURRENT_LIST_DIR} +# replaced with ${CMAKE_ROOT}/Modules +include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) +find_package_handle_standard_args(CUDAToolkit + REQUIRED_VARS + CUDAToolkit_INCLUDE_DIR + CUDA_CUDART + CUDAToolkit_NVCC_EXECUTABLE + VERSION_VAR + CUDAToolkit_VERSION +) + +#----------------------------------------------------------------------------- +# Construct result variables +if(CUDAToolkit_FOUND) + set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR}) + get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE) +endif() + +#----------------------------------------------------------------------------- +# Construct import targets +if(CUDAToolkit_FOUND) + + function(find_and_add_cuda_import_lib lib_name) + + if(ARGC GREATER 1) + set(search_names ${ARGN}) + else() + set(search_names ${lib_name}) + endif() + + find_library(CUDA_${lib_name}_LIBRARY + NAMES ${search_names} + PATHS ${CUDAToolkit_LIBRARY_DIR} + ENV CUDA_PATH + PATH_SUFFIXES nvidia/current lib64 lib/x64 lib + ) + + if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + add_library(CUDA::${lib_name} IMPORTED INTERFACE) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") + endif() + endfunction() + + function(add_cuda_link_dependency lib_name) + foreach(dependency IN LISTS ${ARGN}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) + endforeach() + endfunction() + + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + + + find_and_add_cuda_import_lib(cuda_driver cuda) + + find_and_add_cuda_import_lib(cudart) + find_and_add_cuda_import_lib(cudart_static) + + foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg) + find_and_add_cuda_import_lib(${cuda_lib}) + add_cuda_link_dependency(${cuda_lib} cudart) + + find_and_add_cuda_import_lib(${cuda_lib}_static) + add_cuda_link_dependency(${cuda_lib}_static cudart_static) + endforeach() + + # cuSOLVER depends on cuBLAS, and cuSPARSE + add_cuda_link_dependency(cusolver cublas cusparse) + add_cuda_link_dependency(cusolver_static cublas_static cusparse) + + # nvGRAPH depends on cuRAND, and cuSOLVER. + add_cuda_link_dependency(nvgraph curand cusolver) + add_cuda_link_dependency(nvgraph_static curand_static cusolver_static) + + find_and_add_cuda_import_lib(nppc) + find_and_add_cuda_import_lib(nppc_static) + + add_cuda_link_dependency(nppc cudart) + add_cuda_link_dependency(nppc_static cudart_static culibos) + + # Process the majority of the NPP libraries. + foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) + find_and_add_cuda_import_lib(${cuda_lib}) + find_and_add_cuda_import_lib(${cuda_lib}_static) + add_cuda_link_dependency(${cuda_lib} nppc) + add_cuda_link_dependency(${cuda_lib}_static nppc_static) + endforeach() + + find_and_add_cuda_import_lib(nvrtc) + add_cuda_link_dependency(nvrtc cuda_driver) + + find_and_add_cuda_import_lib(nvml nvidia-ml nvml) + + if(WIN32) + # nvtools can be installed outside the CUDA toolkit directory + # so prefer the NVTOOLSEXT_PATH windows only environment variable + # In addition on windows the most common name is nvToolsExt64_1 + find_library(CUDA_nvToolsExt_LIBRARY + NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt + PATHS ENV NVTOOLSEXT_PATH + ENV CUDA_PATH + PATH_SUFFIXES lib/x64 lib + ) + endif() + find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64) + + add_cuda_link_dependency(nvToolsExt cudart) + + find_and_add_cuda_import_lib(OpenCL) + + find_and_add_cuda_import_lib(culibos) + if(TARGET CUDA::culibos) + foreach (cuda_lib cublas cufft cusparse curand nvjpeg) + add_cuda_link_dependency(${cuda_lib}_static culibos) + endforeach() + endif() + +endif() diff --git a/contrib/tvmop/compile.py b/contrib/tvmop/compile.py index b0254218077a..43657f274348 100644 --- a/contrib/tvmop/compile.py +++ b/contrib/tvmop/compile.py @@ -50,6 +50,11 @@ def get_cuda_arch(arch): if len(arch) == 0: return None + # the arch string is of format '-gencode;arch=compute_XX,code=sm_XX' + # this format is computed by CMake CUDA_SELECT_NVCC_ARCH_FLAGS + if arch.startswith('-gencode;'): + return arch.split(';') + # the arch string contains '-arch=sm_xx' flags = arch.split() for flag in flags: