Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#0: Faster builds by enabling Unity build for TTNN and tests #14461

Merged
merged 14 commits into from
Oct 30, 2024
Merged
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ endif()
list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

include(project_options)
include(unity)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -121,6 +122,7 @@ message(STATUS "Build Python bindings: ${WITH_PYTHON_BINDINGS}")
message(STATUS "Build Programming Examples: ${BUILD_PROGRAMMING_EXAMPLES}")
message(STATUS "Build TT METAL Tests: ${TT_METAL_BUILD_TESTS}")
message(STATUS "Build TTNN Tests: ${TTNN_BUILD_TESTS}")
message(STATUS "Build with Unity builds: ${TT_UNITY_BUILDS}")
############################################################################################################################

if(ENABLE_BUILD_TIME_TRACE)
Expand Down
17 changes: 15 additions & 2 deletions build_metal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ show_help() {
echo " --debug Set the build type as Debug."
echo " --clean Remove build workspaces."
echo " --build-static-libs Build tt_metal (not ttnn) as a static lib (BUILD_SHARED_LIBS=OFF)"
echo " --disable-unity-builds Disable Unity builds"
}

clean() {
Expand All @@ -49,11 +50,12 @@ build_metal_tests="OFF"
build_umd_tests="OFF"
build_programming_examples="OFF"
build_static_libs="OFF"
unity_builds="ON"

declare -a cmake_args

OPTIONS=h,e,c,t,a,m,s,u,b:,p
LONGOPTIONS=help,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-static-libs,release,development,debug,clean
LONGOPTIONS=help,export-compile-commands,enable-ccache,enable-time-trace,enable-asan,enable-msan,enable-tsan,enable-ubsan,build-type:,enable-profiler,install-prefix:,build-tests,build-ttnn-tests,build-metal-tests,build-umd-tests,build-programming-examples,build-static-libs,disable-unity-builds,release,development,debug,clean

# Parse the options
PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTIONS --name "$0" -- "$@")
Expand All @@ -70,7 +72,7 @@ while true; do
-h|--help)
show_help;exit 0;;
-e|--export-compile-commands)
export_compile_commands="ON";;
export_compile_commands="ON";unity_builds="OFF";;
-c|--enable-ccache)
enable_ccache="ON";;
-t|--enable-time-trace)
Expand Down Expand Up @@ -101,6 +103,8 @@ while true; do
build_programming_examples="ON";;
--build-static-libs)
build_static_libs="ON";;
--disable-unity-builds)
unity_builds="OFF";;
--release)
build_type="Release";;
--development)
Expand Down Expand Up @@ -156,6 +160,7 @@ echo "INFO: Enable UndefinedBehaviorSanitizer: $enable_ubsan"
echo "INFO: Build directory: $build_dir"
echo "INFO: Install Prefix: $cmake_install_prefix"
echo "INFO: Build tests: $build_tests"
echo "INFO: Enable Unity builds: $unity_builds"

# Prepare cmake arguments
cmake_args+=("-B" "$build_dir")
Expand Down Expand Up @@ -194,6 +199,8 @@ fi

if [ "$export_compile_commands" = "ON" ]; then
cmake_args+=("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON")
else
cmake_args+=("-DCMAKE_EXPORT_COMPILE_COMMANDS=OFF")
fi

if [ "$build_tests" = "ON" ]; then
Expand Down Expand Up @@ -222,6 +229,12 @@ if [ "$build_static_libs" = "ON" ]; then
cmake_args+=("-DBUILD_SHARED_LIBS=OFF")
fi

if [ "$unity_builds" = "ON" ]; then
cmake_args+=("-DTT_UNITY_BUILDS=ON")
else
cmake_args+=("-DTT_UNITY_BUILDS=OFF")
fi

# Create and link the build directory
mkdir -p $build_dir
ln -nsf $build_dir build
Expand Down
1 change: 1 addition & 0 deletions cmake/helper_functions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ function(CREATE_EAGER_TEST_EXE TESTLIST)
set(TEST_TARGET ${TEST_NAME})
endif()
add_executable(${TEST_TARGET} ${TEST_SRC_PATH})
TT_ENABLE_UNITY_BUILD(${TEST_TARGET})

target_link_libraries(
${TEST_TARGET}
Expand Down
12 changes: 12 additions & 0 deletions cmake/project_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,16 @@ option(BUILD_PROGRAMMING_EXAMPLES "Enables build of tt_metal programming example
option(TT_METAL_BUILD_TESTS "Enables build of tt_metal tests" OFF)
option(TTNN_BUILD_TESTS "Enables build of ttnn tests" OFF)
option(ENABLE_CCACHE "Build with compiler cache" FALSE)
option(TT_UNITY_BUILDS "Build with Unity builds" ON)
###########################################################################################

if(TT_UNITY_BUILDS)
if(CMAKE_EXPORT_COMPILE_COMMANDS)
message(STATUS "Disabling Unity builds because CMAKE_EXPORT_COMPILE_COMMANDS is ON")
set(TT_UNITY_BUILDS OFF)
endif()
if(CMAKE_VERSION VERSION_LESS "3.20.0")
message(STATUS "CMake 3.20 or newer is required for Unity builds, disabling")
set(TT_UNITY_BUILDS OFF)
endif()
endif()
12 changes: 12 additions & 0 deletions cmake/unity.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function(TT_ENABLE_UNITY_BUILD TARGET)
if(TT_UNITY_BUILDS)
set_target_properties(
${TARGET}
PROPERTIES
UNITY_BUILD
ON
UNITY_BUILD_UNIQUE_ID
sminakov-tt marked this conversation as resolved.
Show resolved Hide resolved
"CMAKE_UNIQUE_NAMESPACE"
)
endif()
endfunction()
sminakov-tt marked this conversation as resolved.
Show resolved Hide resolved
47 changes: 0 additions & 47 deletions tests/tt_metal/test_utils/df/bfloat16.hpp

This file was deleted.

1 change: 0 additions & 1 deletion tests/tt_metal/test_utils/df/df.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "tt_metal/test_utils/df/bfloat16.hpp"
#include "tt_metal/test_utils/df/float32.hpp"
1 change: 1 addition & 0 deletions tests/tt_metal/tt_metal/unit_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_executable(
${UNIT_TESTS_SRC}
$<TARGET_OBJECTS:unit_tests_common_o>
)
TT_ENABLE_UNITY_BUILD(unit_tests)
add_executable(unit_tests_galaxy ${CMAKE_CURRENT_SOURCE_DIR}/multichip/galaxy_cluster_api.cpp)

target_link_libraries(
Expand Down
4 changes: 2 additions & 2 deletions tests/tt_metal/tt_metal/unit_tests/buffer/test_banked.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@ bool reader_datacopy_writer(Device* device, const BankedConfig& cfg) {
////////////////////////////////////////////////////////////////////////////
// Stimulus Generation
////////////////////////////////////////////////////////////////////////////
std::vector<uint32_t> input_packed = tt::test_utils::generate_packed_uniform_random_vector<uint32_t, tt::test_utils::df::bfloat16>(
-1.0f, 1.0f, cfg.size_bytes / tt::test_utils::df::bfloat16::SIZEOF, std::chrono::system_clock::now().time_since_epoch().count());
std::vector<uint32_t> input_packed = tt::test_utils::generate_packed_uniform_random_vector<uint32_t, bfloat16>(
-1.0f, 1.0f, cfg.size_bytes / bfloat16::SIZEOF, std::chrono::system_clock::now().time_since_epoch().count());

////////////////////////////////////////////////////////////////////////////
// Compile and Execute Appli cation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "gtest/gtest.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ clang-diagnostic-error ⚠️
gtest/gtest.h file not found

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SHUT THE **** UPPPPPPP

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this was the fallout from compile_commands.json not working with unity enabled. Haven't looked into how that's invoked.

#include "tt_metal/host_api.hpp"
#include "tt_metal/test_utils/env_vars.hpp"
Expand Down
34 changes: 17 additions & 17 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_broadcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct BroadcastConfig {
MathFidelity math_fidelity = MathFidelity::HiFi4;
};

void mask_src_b_for_broadcast(std::vector<tt::test_utils::df::bfloat16>& tile, const std::vector<uint32_t> &shape, BroadcastDim dim) {
void mask_src_b_for_broadcast(std::vector<bfloat16>& tile, const std::vector<uint32_t> &shape, BroadcastDim dim) {
int num_rows = shape.at(0);
int num_cols = shape.at(1);

Expand All @@ -83,14 +83,14 @@ void mask_src_b_for_broadcast(std::vector<tt::test_utils::df::bfloat16>& tile, c
}
}

std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_utils::df::bfloat16>& src_a, std::vector<tt::test_utils::df::bfloat16>& src_b, const std::vector<uint32_t> &shape, EltwiseOp op, BroadcastDim dim, MathFidelity math_fidelity = MathFidelity::HiFi4) {
std::vector<bfloat16> gold_broadcast(std::vector<bfloat16>& src_a, std::vector<bfloat16>& src_b, const std::vector<uint32_t> &shape, EltwiseOp op, BroadcastDim dim, MathFidelity math_fidelity = MathFidelity::HiFi4) {
int num_rows = shape.at(0);
int num_cols = shape.at(1);

uint16_t srca_fid_mask = 0xFFFF;
uint16_t srcb_fid_mask = 0xFFFF;

std::vector<tt::test_utils::df::bfloat16> golden(num_cols * num_rows);
std::vector<bfloat16> golden(num_cols * num_rows);
auto arch = get_arch_from_string(get_umd_arch_name());

switch (math_fidelity) {
Expand All @@ -103,7 +103,7 @@ std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_ut

for (int i = 0; i < num_rows; i++) {
for (int j = 0; j < num_cols; j++) {
tt::test_utils::df::bfloat16 broadcast_value;
bfloat16 broadcast_value;
switch (dim)
{
case BroadcastDim::ROW: { broadcast_value = src_b[j]; break; }
Expand All @@ -118,8 +118,8 @@ std::vector<tt::test_utils::df::bfloat16> gold_broadcast(std::vector<tt::test_ut
case EltwiseOp::SUB: { golden[i * num_cols + j] = src_a[i * num_cols + j].to_float() - broadcast_value.to_float(); break; }
case EltwiseOp::MUL: {
golden[i * num_cols + j] =
tt::test_utils::df::bfloat16(std::bit_cast<uint32_t>(src_a[i * num_cols + j].to_packed() & srca_fid_mask)).to_float() *
tt::test_utils::df::bfloat16(std::bit_cast<uint32_t>(broadcast_value.to_packed() & srcb_fid_mask)).to_float();
bfloat16(std::bit_cast<uint32_t>(src_a[i * num_cols + j].to_packed() & srca_fid_mask)).to_float() *
bfloat16(std::bit_cast<uint32_t>(broadcast_value.to_packed() & srcb_fid_mask)).to_float();
break;
}
default: { TT_THROW("Unsupported EltwiseOp={}", op); break; }
Expand All @@ -142,7 +142,7 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
constexpr uint32_t tile_width = 32;
constexpr uint32_t tile_height = 32;

constexpr uint32_t single_tile_size = tile_width * tile_height * tt::test_utils::df::bfloat16::SIZEOF;
constexpr uint32_t single_tile_size = tile_width * tile_height * bfloat16::SIZEOF;

tt_metal::InterleavedBufferConfig dram_config{
.device=device,
Expand Down Expand Up @@ -244,25 +244,25 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
(uint32_t)1,
});

std::vector<tt::test_utils::df::bfloat16> input0 = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input0 = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
single_tile_size / tt::test_utils::df::bfloat16::SIZEOF,
single_tile_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

std::vector<tt::test_utils::df::bfloat16> input1 = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input1 = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
single_tile_size / tt::test_utils::df::bfloat16::SIZEOF,
single_tile_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

mask_src_b_for_broadcast(input1, {tile_width, tile_height}, test_config.broadcast_dim);

std::vector<tt::test_utils::df::bfloat16> golden = gold_broadcast(input0, input1, {tile_width, tile_height}, test_config.eltwise_op, test_config.broadcast_dim, test_config.math_fidelity);
std::vector<bfloat16> golden = gold_broadcast(input0, input1, {tile_width, tile_height}, test_config.eltwise_op, test_config.broadcast_dim, test_config.math_fidelity);

auto packed_input0 = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input0);
auto packed_input1 = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input1);
auto packed_golden = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(golden);
auto packed_input0 = pack_vector<uint32_t, bfloat16>(input0);
auto packed_input1 = pack_vector<uint32_t, bfloat16>(input1);
auto packed_golden = pack_vector<uint32_t, bfloat16>(golden);
unit_tests::compute::GoldenConfig config = {
.num_tiles_r_dim = tile_width/32,
.num_tiles_c_dim = tile_height/32
Expand All @@ -279,10 +279,10 @@ void run_single_core_broadcast(tt_metal::Device* device, const BroadcastConfig&
tt_metal::detail::ReadFromBuffer(dst_dram_buffer, dest_buffer_data);
auto dest_buffer_data_untilized = unit_tests::compute::gold_standard_untilize(dest_buffer_data, config);

bool result = is_close_packed_vectors<tt::test_utils::df::bfloat16, uint32_t>(
bool result = is_close_packed_vectors<bfloat16, uint32_t>(
dest_buffer_data_untilized,
packed_golden,
[&](const tt::test_utils::df::bfloat16& a, const tt::test_utils::df::bfloat16& b) {
[&](const bfloat16& a, const bfloat16& b) {
return is_close(a, b, 0.0155);
});
ASSERT_TRUE(result);
Expand Down
20 changes: 10 additions & 10 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_cumsum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ struct CumsumConfig {
bool rowwise;
};

std::vector<tt::test_utils::df::bfloat16> gold_cumsum(std::vector<tt::test_utils::df::bfloat16>& src, const std::vector<uint32_t> &shape, bool rowwise) {
std::vector<bfloat16> gold_cumsum(std::vector<bfloat16>& src, const std::vector<uint32_t> &shape, bool rowwise) {
int N = shape.at(0);
int W = shape.at(1);
int H = shape.at(2);

std::vector<tt::test_utils::df::bfloat16> golden(N * W * H);
std::vector<bfloat16> golden(N * W * H);

int dim_a = rowwise ? H : W;
int dim_b = rowwise ? W : H;
Expand Down Expand Up @@ -57,7 +57,7 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c
constexpr uint32_t tile_width = 32;
constexpr uint32_t tile_height = 32;

constexpr uint32_t single_tile_size = tile_width * tile_height * tt::test_utils::df::bfloat16::SIZEOF;
constexpr uint32_t single_tile_size = tile_width * tile_height * bfloat16::SIZEOF;

uint32_t W = test_config.Wt * tile_width;
uint32_t H = test_config.Ht * tile_height;
Expand Down Expand Up @@ -147,16 +147,16 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c
(uint32_t)test_config.Ht * test_config.Wt // Used for transposing kernel
});

std::vector<tt::test_utils::df::bfloat16> input = generate_uniform_random_vector<tt::test_utils::df::bfloat16>(
std::vector<bfloat16> input = generate_uniform_random_vector<bfloat16>(
-1.0f,
1.0f,
dram_buffer_size / tt::test_utils::df::bfloat16::SIZEOF,
dram_buffer_size / bfloat16::SIZEOF,
std::chrono::system_clock::now().time_since_epoch().count());

std::vector<tt::test_utils::df::bfloat16> golden = gold_cumsum(input, {test_config.N, W, H}, test_config.rowwise);
auto golden_packed = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(golden);
std::vector<bfloat16> golden = gold_cumsum(input, {test_config.N, W, H}, test_config.rowwise);
auto golden_packed = pack_vector<uint32_t, bfloat16>(golden);

auto input_packed = pack_vector<uint32_t, tt::test_utils::df::bfloat16>(input);
auto input_packed = pack_vector<uint32_t, bfloat16>(input);
auto input_packed_tilized = unit_tests::compute::gold_standard_tilize(input_packed, {test_config.N * test_config.Ht, test_config.Wt});

tt_metal::detail::WriteToBuffer(src_dram_buffer, input_packed_tilized);
Expand All @@ -169,10 +169,10 @@ void run_single_core_cumsum(tt_metal::Device* device, const CumsumConfig& test_c

log_info(tt::LogTest, "Running test for N = {}, Wt = {}, Ht = {}", test_config.N, test_config.Wt, test_config.Ht);

bool result = is_close_packed_vectors<tt::test_utils::df::bfloat16, uint32_t>(
bool result = is_close_packed_vectors<bfloat16, uint32_t>(
output_packed,
golden_packed,
[&](const tt::test_utils::df::bfloat16& a, const tt::test_utils::df::bfloat16& b) {
[&](const bfloat16& a, const bfloat16& b) {
return is_close(a, b, 0.01f);
});
ASSERT_TRUE(result);
Expand Down
Loading
Loading