Skip to content

Commit

Permalink
Move example disabling because of nvcc bugs into cmake
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Dec 15, 2022
1 parent 3cfc64d commit 38ebb45
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 30 deletions.
5 changes: 5 additions & 0 deletions examples/alpaka/asyncblur/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 0.9.0 REQUIRED)
if (alpaka_ACC_GPU_CUDA_ENABLE AND (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND
(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3) AND (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4))
message(WARNING "nvcc 11.3 fails to compile the alpaka asyncblur example, so it is disabled.")
return()
endif()
alpaka_add_executable(${PROJECT_NAME} asyncblur.cpp ../../common/alpakaHelpers.hpp ../../common/Stopwatch.hpp)
target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../../../thirdparty/stb/include)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama alpaka::alpaka)
7 changes: 0 additions & 7 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,6 @@ struct BlurKernel
auto main(int argc, char** argv) -> int
try
{
#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4
// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default
// argument (which we need for the default accessor)
# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler"
return -1;
#else
// ALPAKA
using Dim = alpaka::DimInt<2>;

Expand Down Expand Up @@ -398,7 +392,6 @@ try
}

return 0;
#endif
}
catch(const std::exception& e)
{
Expand Down
6 changes: 6 additions & 0 deletions examples/alpaka/nbody/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 0.9.0 REQUIRED)
if (alpaka_ACC_GPU_CUDA_ENABLE AND (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND
(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6))
# nvcc <= 11.5 chokes on `pis(tag::Pos{}, tag::X{})` inside `pPInteraction()` and segfauls
message(WARNING "nvcc < 11.6 fails to compile the alpaka n-body example, so it is disabled.")
return()
endif()
alpaka_add_executable(${PROJECT_NAME} nbody.cpp ../../common/Stopwatch.hpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama fmt::fmt alpaka::alpaka xsimd)
Expand Down
6 changes: 0 additions & 6 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,6 @@ void run(std::ostream& plotFile)
auto main() -> int
try
{
#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 5
// nvcc <= 11.5 chokes on `pis(tag::Pos{}, tag::X{})` inside `pPInteraction()`
# warning "alpaka nbody example disabled for nvcc <= 11.5, because the compiler segfaults"
return -1;
#else
std::cout << problemSize / 1000 << "k particles (" << problemSize * llama::sizeOf<Particle> / 1024 << "kiB)\n"
<< "Caching " << threadsPerBlock << " particles (" << threadsPerBlock * llama::sizeOf<Particle> / 1024
<< " kiB) in shared memory\n"
Expand Down Expand Up @@ -363,7 +358,6 @@ plot $data using 2:xtic(1) ti col axis x1y1, "" using 3 ti col axis x1y2
std::cout << "Plot with: ./nbody_alpaka.sh\n";

return 0;
#endif
}
catch(const std::exception& e)
{
Expand Down
7 changes: 0 additions & 7 deletions examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,6 @@ catch(const std::exception& e)

auto main() -> int
{
#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4
// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default
// argument (which we need for the default accessor)
# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler"
return -1;
#else
std::cout << problemSize / 1000 / 1000 << "M values "
<< "(" << problemSize * sizeof(float) / 1024 << "kiB)\n";

Expand All @@ -224,5 +218,4 @@ plot $data using 2:xtic(1) ti "compute kernel"
)";
std::cout << "Plot with: ./vectoradd_alpaka.sh\n";
return 0;
#endif
}
8 changes: 8 additions & 0 deletions examples/cuda/pitch/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
cmake_minimum_required(VERSION 3.18.3)
project(llama-cuda-pitch CUDA)

if ((CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") AND
(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3) AND (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4))
# nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default
# argument (which we need for the default accessor), and generates invalid C++ code for the host compiler
message(WARNING "nvcc 11.3 fails to compile the CUDA pitch example, so it is disabled.")
return()
endif()

find_package(CUDAToolkit) # for include directories
find_package(fmt CONFIG REQUIRED)
if (NOT TARGET llama::llama)
Expand Down
10 changes: 0 additions & 10 deletions examples/cuda/pitch/pitch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,6 @@ namespace llamaex
auto main() -> int
try
{
#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 3 && __CUDACC_VER_MINOR__ < 4
// nvcc 11.3 fails to generate the template signature for llama::View, if it has a forward declaration with a default
// argument (which we need for the default accessor)
# warning "alpaka nbody example disabled for nvcc 11.3, because it generates invalid C++ code for the host compiler"
return -1;
#else
int device = 0;
checkError(cudaGetDevice(&device));
cudaDeviceProp prop{};
Expand Down Expand Up @@ -169,8 +163,6 @@ try
stbi_write_png("pitch1.png", extents[1], extents[0], 3, host1.data(), 0);
}

// nvcc 11.3 fails to compile the AoS mapping here
# if !(defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 3)
{
std::byte* mem = nullptr;
checkError(cudaMalloc(&mem, widthBytes * extents[0]));
Expand All @@ -185,13 +177,11 @@ try

stbi_write_png("pitch2.png", extents[1], extents[0], 3, host2.data(), 0);
}
# endif

if(host1 != host2)
fmt::print("ERROR: produced two different images");

return 0;
#endif
}
catch(const std::exception& e)
{
Expand Down

0 comments on commit 38ebb45

Please sign in to comment.