diff --git a/examples/alpaka/asyncblur/CMakeLists.txt b/examples/alpaka/asyncblur/CMakeLists.txt index b0e9b32438..f32e99fc39 100644 --- a/examples/alpaka/asyncblur/CMakeLists.txt +++ b/examples/alpaka/asyncblur/CMakeLists.txt @@ -5,6 +5,11 @@ if (NOT TARGET llama::llama) find_package(llama REQUIRED) endif() find_package(alpaka 0.9.0 REQUIRED) +if (alpaka_ACC_GPU_CUDA_ENABLE AND (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND + (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3) AND (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4)) + message(WARNING "nvcc 11.3 fails to compile the alpaka asyncblur example, so it is disabled.") + return() +endif() alpaka_add_executable(${PROJECT_NAME} asyncblur.cpp ../../common/alpakaHelpers.hpp ../../common/Stopwatch.hpp) target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../../../thirdparty/stb/include) target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama alpaka::alpaka) diff --git a/examples/alpaka/asyncblur/asyncblur.cpp b/examples/alpaka/asyncblur/asyncblur.cpp index 4d7c215ce0..afd3f325a2 100644 --- a/examples/alpaka/asyncblur/asyncblur.cpp +++ b/examples/alpaka/asyncblur/asyncblur.cpp @@ -145,12 +145,6 @@ struct BlurKernel auto main(int argc, char** argv) -> int try { -#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4 -// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default -// argument (which we need for the default accessor) -# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler" - return -1; -#else // ALPAKA using Dim = alpaka::DimInt<2>; @@ -398,7 +392,6 @@ try } return 0; -#endif } catch(const std::exception& e) { diff --git a/examples/alpaka/nbody/CMakeLists.txt b/examples/alpaka/nbody/CMakeLists.txt index 57650a9831..7a48265420 100644 --- a/examples/alpaka/nbody/CMakeLists.txt +++ b/examples/alpaka/nbody/CMakeLists.txt @@ -11,6 +11,12 @@ if (NOT TARGET llama::llama) find_package(llama REQUIRED) endif() find_package(alpaka 0.9.0 REQUIRED) +if (alpaka_ACC_GPU_CUDA_ENABLE AND (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND + (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)) + # nvcc <= 11.5 chokes on `pis(tag::Pos{}, tag::X{})` inside `pPInteraction()` and segfauls + message(WARNING "nvcc < 11.6 fails to compile the alpaka n-body example, so it is disabled.") + return() +endif() alpaka_add_executable(${PROJECT_NAME} nbody.cpp ../../common/Stopwatch.hpp) target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17) target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama fmt::fmt alpaka::alpaka xsimd) diff --git a/examples/alpaka/nbody/nbody.cpp b/examples/alpaka/nbody/nbody.cpp index 1dac58fc1c..89c35d7533 100644 --- a/examples/alpaka/nbody/nbody.cpp +++ b/examples/alpaka/nbody/nbody.cpp @@ -314,11 +314,6 @@ void run(std::ostream& plotFile) auto main() -> int try { -#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 5 -// nvcc <= 11.5 chokes on `pis(tag::Pos{}, tag::X{})` inside `pPInteraction()` -# warning "alpaka nbody example disabled for nvcc <= 11.5, because the compiler segfaults" - return -1; -#else std::cout << problemSize / 1000 << "k particles (" << problemSize * llama::sizeOf / 1024 << "kiB)\n" << "Caching " << threadsPerBlock << " particles (" << threadsPerBlock * llama::sizeOf / 1024 << " kiB) in shared memory\n" @@ -363,7 +358,6 @@ plot $data using 2:xtic(1) ti col axis x1y1, "" using 3 ti col axis x1y2 std::cout << "Plot with: ./nbody_alpaka.sh\n"; return 0; -#endif } catch(const std::exception& e) { diff --git a/examples/alpaka/vectoradd/vectoradd.cpp b/examples/alpaka/vectoradd/vectoradd.cpp index 067d3f2420..76c4fba327 100644 --- a/examples/alpaka/vectoradd/vectoradd.cpp +++ b/examples/alpaka/vectoradd/vectoradd.cpp @@ -192,12 +192,6 @@ catch(const std::exception& e) auto main() -> int { -#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4 -// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default -// argument (which we need for the default accessor) -# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler" - return -1; -#else std::cout << problemSize / 1000 / 1000 << "M values " << "(" << problemSize * sizeof(float) / 1024 << "kiB)\n"; @@ -224,5 +218,4 @@ plot $data using 2:xtic(1) ti "compute kernel" )"; std::cout << "Plot with: ./vectoradd_alpaka.sh\n"; return 0; -#endif } diff --git a/examples/cuda/pitch/CMakeLists.txt b/examples/cuda/pitch/CMakeLists.txt index d7523b2712..58c91c5143 100644 --- a/examples/cuda/pitch/CMakeLists.txt +++ b/examples/cuda/pitch/CMakeLists.txt @@ -1,6 +1,14 @@ cmake_minimum_required(VERSION 3.18.3) project(llama-cuda-pitch CUDA) +if ((CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND + (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3) AND (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4)) + # nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default + # argument (which we need for the default accessor), and generates invalid C++ code for the host compiler + message(WARNING "nvcc 11.3 fails to compile the CUDA pitch example, so it is disabled.") + return() +endif() + find_package(CUDAToolkit) # for include directories find_package(fmt CONFIG REQUIRED) if (NOT TARGET llama::llama) diff --git a/examples/cuda/pitch/pitch.cu b/examples/cuda/pitch/pitch.cu index 7c9e4bd0df..98bf84c3df 100644 --- a/examples/cuda/pitch/pitch.cu +++ b/examples/cuda/pitch/pitch.cu @@ -124,12 +124,6 @@ namespace llamaex auto main() -> int try { -#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4 -// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default -// argument (which we need for the default accessor) -# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler" - return -1; -#else int device = 0; checkError(cudaGetDevice(&device)); cudaDeviceProp prop{}; @@ -169,8 +163,6 @@ try stbi_write_png("pitch1.png", extents[1], extents[0], 3, host1.data(), 0); } - // nvcc 11.3 fails to compile the AoS mapping here -# if !(defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 3) { std::byte* mem = nullptr; checkError(cudaMalloc(&mem, widthBytes * extents[0])); @@ -185,13 +177,11 @@ try stbi_write_png("pitch2.png", extents[1], extents[0], 3, host2.data(), 0); } -# endif if(host1 != host2) fmt::print("ERROR: produced two different images"); return 0; -#endif } catch(const std::exception& e) {