Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor benchmark commons and drop clang-11 #802

Merged
merged 3 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,6 @@ jobs:
add_toolchain_repo: true
asan: ON
install_extra: g++-13
- name: build-ubuntu-clang11
cxx: clang++-11
install_extra: clang-11 libomp-11-dev
- name: build-ubuntu-clang12
cxx: clang++-12
install_extra: clang-12 libomp-12-dev
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ The following compilers are supported by LLAMA and tested as part of our CI:

| Linux | Windows | MacOS |
|----------------------------------------------------------------------------------------------|-----------------------------------------------------|----------------------------------|
| g++ 9 - 13 </br> clang++ 11 - 17 </br> icpx (latest) </br> nvc++ 23.5 </br> nvcc 11.6 - 12.3 | Visual Studio 2022 </br> (latest on GitHub actions) | clang++ </br> (latest from brew) |
| g++ 9 - 13 </br> clang++ 12 - 17 </br> icpx (latest) </br> nvc++ 23.5 </br> nvcc 11.6 - 12.3 | Visual Studio 2022 </br> (latest on GitHub actions) | clang++ </br> (latest from brew) |


Single header
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/asyncblur/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 1.0 REQUIRED)
alpaka_add_executable(${PROJECT_NAME} asyncblur.cpp ../../common/alpakaHelpers.hpp ../../common/Stopwatch.hpp)
alpaka_add_executable(${PROJECT_NAME} asyncblur.cpp ../../common/Stopwatch.hpp)
target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../../../thirdparty/stb/include)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama alpaka::alpaka)
35 changes: 34 additions & 1 deletion examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,39 @@ constexpr auto elemsPerBlock = 16; /// number of elements per direction(!) every

using FP = float;

constexpr auto threadElemDistMinElem = 2;

/** Returns a good guess for an optimal number of threads and elements in a
* block based on the total number of elements in the block.
*/
template<typename T_Acc, std::size_t BlockSize, std::size_t HardwareThreads>
struct ThreadsElemsDistribution
{
/// number of elements per thread
static constexpr std::size_t elemCount = BlockSize;
/// number of threads per block
static constexpr std::size_t threadCount = 1u;
};

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
template<std::size_t BlockSize, std::size_t HardwareThreads, typename Dim, typename Size>
struct ThreadsElemsDistribution<alpaka::AccGpuCudaRt<Dim, Size>, BlockSize, HardwareThreads>
{
static constexpr std::size_t elemCount = threadElemDistMinElem;
static constexpr std::size_t threadCount = BlockSize / threadElemDistMinElem;
};
#endif

#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
template<std::size_t BlockSize, std::size_t HardwareThreads, typename Dim, typename Size>
struct ThreadsElemsDistribution<alpaka::AccCpuOmp2Threads<Dim, Size>, BlockSize, HardwareThreads>
{
static constexpr std::size_t elemCount = (BlockSize + HardwareThreads - 1u) / HardwareThreads;
static constexpr std::size_t threadCount = HardwareThreads;
};
#endif


// clang-format off
namespace tag
{
Expand Down Expand Up @@ -157,7 +190,7 @@ try
int bufferY = defaultImgY + 2 * kernelSize;

constexpr int hardwareThreads = 2; // relevant for OpenMP2Threads
using Distribution = common::ThreadsElemsDistribution<Acc, elemsPerBlock, hardwareThreads>;
using Distribution = ThreadsElemsDistribution<Acc, elemsPerBlock, hardwareThreads>;
constexpr int elemCount = Distribution::elemCount;
constexpr int threadCount = Distribution::threadCount;

Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/daxpy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 1.0 REQUIRED)
alpaka_add_executable(${PROJECT_NAME} daxpy.cpp ../../common/Stopwatch.hpp ../../common/hostname.hpp)
alpaka_add_executable(${PROJECT_NAME} daxpy.cpp ../../common/Stopwatch.hpp ../../common/env.hpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama OpenMP::OpenMP_CXX alpaka::alpaka)

Expand Down
24 changes: 8 additions & 16 deletions examples/alpaka/daxpy/daxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: LGPL-3.0-or-later

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -156,26 +156,20 @@ void daxpyAlpakaLlama(std::string mappingName, std::ofstream& plotFile, Mapping
auto main() -> int
try
{
const auto numThreads = static_cast<std::size_t>(omp_get_max_threads());
const char* affinity = std::getenv("GOMP_CPU_AFFINITY"); // NOLINT(concurrency-mt-unsafe)
affinity = affinity == nullptr ? "NONE - PLEASE PIN YOUR THREADS!" : affinity;
const auto env = common::captureEnv();

fmt::print(
R"({}Mi doubles ({}MiB data)
Threads: {}
Affinity: {}
)",
"{}Mi doubles ({}MiB data)\n{}\n",
problemSize / 1024 / 1024,
problemSize * sizeof(double) / 1024 / 1024,
numThreads,
affinity);
env);

std::ofstream plotFile{"daxpy.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
# threads: {} affinity: {}
set title "daxpy CPU {}Mi doubles on {}"
# {}
set title "daxpy CPU {}Mi doubles"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -184,10 +178,8 @@ set yrange [0:*]
set ylabel "runtime [s]"
$data << EOD
)",
numThreads,
affinity,
problemSize / 1024 / 1024,
common::hostname());
env,
problemSize / 1024 / 1024);

daxpy(plotFile);

Expand Down
16 changes: 8 additions & 8 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: LGPL-3.0-or-later

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -356,21 +356,21 @@ try
using Size = int;
using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;

const auto env = common::captureEnv<Acc>();
std::cout << problemSize / 1000 << "k particles (" << problemSize * llama::sizeOf<Particle> / 1024 << "kiB)\n"
<< "Caching " << threadsPerBlock << " particles (" << threadsPerBlock * llama::sizeOf<Particle> / 1024
<< " kiB) in shared memory\n"
<< "Reducing on " << elementsPerThread << " particles per thread\n"
<< "Using " << threadsPerBlock << " threads per block\n";
const auto dev = alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0);
const auto props = alpaka::getAccDevProps<Acc>(dev);
std::cout << "Running on " << alpaka::getName(dev) << ", " << props.m_sharedMemSizeBytes / 1024 << "kiB SM\n";
<< "Using " << threadsPerBlock << " threads per block\n"
<< env << '\n';
std::cout << std::fixed;

std::ofstream plotFile{"nbody_alpaka.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
set title "nbody alpaka {}ki particles on {} on {}"
# {}
set title "nbody alpaka {}ki particles on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -382,9 +382,9 @@ set y2label "move runtime [s]"
set y2tics auto
$data << EOD
)",
env,
problemSize / 1024,
alpaka::getAccName<Acc>(),
common::hostname());
alpaka::getAccName<Acc>());
plotFile << "\"\"\t\"update\"\t\"move\"\n";

run<Acc, AoS, AoS>(plotFile);
Expand Down
30 changes: 8 additions & 22 deletions examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_CUDA_ONLY_MODE)
# define ALPAKA_ACC_GPU_CUDA_ONLY_MODE
#endif
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -877,29 +877,18 @@ void run(std::ostream& plotFile)
auto main() -> int
try
{
const auto numThreads = static_cast<std::size_t>(omp_get_max_threads());
const char* affinity = std::getenv("GOMP_CPU_AFFINITY"); // NOLINT(concurrency-mt-unsafe)
affinity = affinity == nullptr ? "NONE - PLEASE PIN YOUR THREADS!" : affinity;

using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
auto accName = alpaka::getName(alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0u));
while(static_cast<bool>(std::isspace(accName.back())))
accName.pop_back();
fmt::print(
"Running {} steps with grid {}x{} and {}k particles on {}\n",
nsteps,
gridX,
gridY,
numpart / 1000,
accName);
const auto env = common::captureEnv<Acc>();
const auto accName = common::trim(alpaka::getName(alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0u)));
fmt::print("Running {} steps with grid {}x{} and {}k particles\n{}\n", nsteps, gridX, gridY, numpart / 1000, env);

std::ofstream plotFile{"pic.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
fmt::print(
plotFile,
R"aa(#!/usr/bin/gnuplot -p
# threads: {} affinity: {}
set title "PIC grid {}x{} {}k particles on {} ({})"
# {}
set title "PIC grid {}x{} {}k particles on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -909,14 +898,11 @@ set ylabel "runtime [s]"
$data << EOD
"" "clr J" "integr" " dep J" " bnd J" "adv B1" "bnd B1" " adv E" " bnd E" "adv B2" "bnd B2" "total"
)aa",
numThreads,
affinity,
env,
gridX,
gridY,
numpart / 1000,
accName,
common::hostname());

accName);

// FieldMapping: AoS RM, AoS CM, AoS Mo,
// SoA RM, SoA CM, SoA Mo,
Expand Down
2 changes: 1 addition & 1 deletion examples/alpaka/vectoradd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 1.0 REQUIRED)
alpaka_add_executable(${PROJECT_NAME} vectoradd.cpp ../../common/alpakaHelpers.hpp ../../common/Stopwatch.hpp)
alpaka_add_executable(${PROJECT_NAME} vectoradd.cpp ../../common/Stopwatch.hpp)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama alpaka::alpaka)
14 changes: 9 additions & 5 deletions examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: CC0-1.0

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -189,24 +189,28 @@ catch(const std::exception& e)

auto main() -> int
{
using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Size>;
const auto env = common::captureEnv<Acc>();
std::cout << problemSize / 1000 / 1000 << "M values "
<< "(" << problemSize * sizeof(float) / 1024 << "kiB)\n";
<< "(" << problemSize * sizeof(float) / 1024 << "kiB)\n"
<< env << '\n';

std::ofstream plotFile{"vectoradd_alpaka.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
set title "vectoradd alpaka {}Mi elements on {} on {}"
# {}
set title "vectoradd alpaka {}Mi elements on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
set yrange [0:*]
set ylabel "runtime [s]"
$data << EOD
)",
env,
problemSize / 1024 / 1024,
alpaka::getAccName<alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Size>>(),
common::hostname());
alpaka::getAccName<Acc>());

boost::mp11::mp_for_each<boost::mp11::mp_iota_c<6>>([&](auto ic) { run<decltype(ic)::value>(plotFile); });

Expand Down
43 changes: 0 additions & 43 deletions examples/common/alpakaHelpers.hpp

This file was deleted.

Loading
Loading