Skip to content

Commit

Permalink
LLK Test Coverage Follow-up (#11715)
Browse files Browse the repository at this point in the history
* #0: Expanded Compute Kernel API Test Support:
1. test_copy_block_matmul_partials [new test] - tests
block tile copy and pack API
2. test_reconfig [new test] - tests reconfiguration
API for unpack, math and pack
3. test_matmul_X_tile [expanded] - tests matmul block
and related init API
4. test_reduce [expanded] - reduce_tile_math test coverage
  • Loading branch information
ncvetkovicTT authored Aug 23, 2024
1 parent fad4623 commit e3ee890
Show file tree
Hide file tree
Showing 13 changed files with 1,130 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <cstdint>
#include "compute_kernel_api/eltwise_unary/sfpu_split_includes.h"
#include "compute_kernel_api/eltwise_binary.h"
#include "compute_kernel_api/tile_move_copy.h"
#include "compute_kernel_api/eltwise_unary/eltwise_unary.h"
#include "compute_kernel_api.h"

#define START_IN_TILE_ID (0)
#define START_DST_TILE_ID (0)
namespace NAMESPACE {
void MAIN {

constexpr uint32_t num_tiles = get_compile_time_arg_val(0);
constexpr uint32_t num_single_transfer = get_compile_time_arg_val(1);
constexpr uint32_t in_cb_id = get_compile_time_arg_val(2);
constexpr uint32_t out_cb_id = get_compile_time_arg_val(3);

constexpr uint32_t outer_loop = num_tiles / num_single_transfer;

unary_op_init_common(in_cb_id, out_cb_id);

// Run the outer loop
for(uint32_t b = 0; b < outer_loop; ++b) {
// Wait for num_single_transfer tiles to be available in in_cb
cb_wait_front(in_cb_id, num_single_transfer);
// Acquire DEST reg for MATH/PACK
acquire_dst(tt::DstMode::Half);
// Reserve out_cb space for num_single_transfer tiles
cb_reserve_back(out_cb_id, num_single_transfer);

// Copy num_single_transfer tiles from in_cb to DEST
copy_block_matmul_partials(in_cb_id, START_IN_TILE_ID, START_DST_TILE_ID, num_single_transfer);
// Pack num_single_transfer tiles to out_cb
matmul_pack_tile(START_DST_TILE_ID, out_cb_id, num_single_transfer);

// Release DEST reg marking compute/pack complete
release_dst(tt::DstMode::Half);
// Move rd ptr from in_cb by num_single_transfer places
cb_pop_front(in_cb_id, num_single_transfer);
// Move wr prt from out_cb by num_single_transfer places
cb_push_back(out_cb_id, num_single_transfer);
}
}
}
107 changes: 107 additions & 0 deletions tests/tt_metal/tt_metal/test_kernels/compute/matmul_block.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <cstdint>
#include "compute_kernel_api/tile_move_copy.h"
#include "compute_kernel_api/matmul.h"

namespace NAMESPACE {
void MAIN {

uint32_t block_tile_dim = get_compile_time_arg_val(0);
uint32_t dst_tile_rows = get_compile_time_arg_val(1);
uint32_t dst_tile_cols = get_compile_time_arg_val(2);
uint32_t block_cnt = get_compile_time_arg_val(3);
uint32_t in0_block_tile_cnt = get_compile_time_arg_val(4);
uint32_t in1_block_tile_cnt = get_compile_time_arg_val(5);
uint32_t out_block_tile_cnt = get_compile_time_arg_val(6);

#if (TEST_INIT_SHORT == 1)
#if (WITH_DT == 1)
// Intentionally wrong init with different data formats
mm_block_init(
tt::CB::c_in0,
tt::CB::c_in2,
tt::CB::c_out0,
false,
dst_tile_cols - 1,
dst_tile_rows - 1,
block_tile_dim - 1
);
// Corrected init short with dt
mm_block_init_short_with_dt(
tt::CB::c_in0,
tt::CB::c_in1,
tt::CB::c_in2,
false,
dst_tile_cols,
dst_tile_rows,
block_tile_dim
);
#elif (WITH_DT == 0)
// Intentionally wrong init with same data formats
mm_block_init(
tt::CB::c_in1,
tt::CB::c_in0,
tt::CB::c_out0,
false,
dst_tile_cols - 1,
dst_tile_rows - 1,
block_tile_dim - 1
);
// Corrected init short
mm_block_init_short(
tt::CB::c_in0,
tt::CB::c_in1,
false,
dst_tile_cols,
dst_tile_rows,
block_tile_dim
);
#endif
#elif (TEST_INIT_SHORT == 0)
mm_block_init(
tt::CB::c_in0,
tt::CB::c_in1,
tt::CB::c_out0,
false,
dst_tile_cols,
dst_tile_rows,
block_tile_dim
);
#endif

acquire_dst(tt::DstMode::Full);
for(uint32_t b=0;b<block_cnt;++b)
{
cb_wait_front(tt::CB::c_in0, in0_block_tile_cnt);
cb_wait_front(tt::CB::c_in1, in1_block_tile_cnt);

matmul_block(
tt::CB::c_in0,
tt::CB::c_in1,
0,
0,
0,
false,
dst_tile_cols,
dst_tile_rows,
block_tile_dim
);

cb_pop_front(tt::CB::c_in0, in0_block_tile_cnt);
cb_pop_front(tt::CB::c_in1, in1_block_tile_cnt);
}

// Pack out
cb_reserve_back(tt::CB::c_out0, out_block_tile_cnt);
for(uint32_t i=0 ; i<out_block_tile_cnt;++i)
{
pack_tile(i, tt::CB::c_out0);
}
cb_push_back(tt::CB::c_out0, out_block_tile_cnt);

release_dst(tt::DstMode::Full);
}
}
108 changes: 108 additions & 0 deletions tests/tt_metal/tt_metal/test_kernels/compute/reconfig.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "compute_kernel_api/eltwise_binary.h"
#include <cstdint>
#include "compute_kernel_api/eltwise_unary/sfpu_split_includes.h"
#include "compute_kernel_api/tile_move_copy.h"
#include "compute_kernel_api/pack.h"
#include "compute_kernel_api/unpack.h"

#define START_IN_TILE_ID (0)
#define START_DST_TILE_ID (0)

namespace NAMESPACE {
void MAIN {
uint32_t num_tiles = get_arg_val<uint32_t>(0);
uint32_t ublock_size_tiles = get_arg_val<uint32_t>(1);

constexpr auto cb_in0 = tt::CB::c_in0; // Bfp8_b
constexpr auto cb_in1 = tt::CB::c_in1; // Bfp16_b
constexpr auto cb_in2 = tt::CB::c_in2; // Bfp16_b
constexpr auto cb_out0 = tt::CB::c_out0; // Bfp16_b
constexpr auto cb_out1 = tt::CB::c_out1; // Bfp8_b


binary_op_init_common(cb_in0, cb_in1, cb_out0);
add_tiles_init_nof();
for (uint32_t block = 0; block < num_tiles; ++block) {

cb_wait_front(cb_in0, ublock_size_tiles);
cb_wait_front(cb_in1, ublock_size_tiles);
cb_reserve_back(cb_out0, ublock_size_tiles);
cb_reserve_back(cb_out1, ublock_size_tiles);

acquire_dst(tt::DstMode::Half);

// ------------------------- Copy to DEST -----------------------------

// Tests both inits, 1st one inits UNPACK for Bfp8_b
// data inside CB_0, 2nd one inits it to Bfp16_b
// which is inside CB_2
copy_tile_init();
copy_tile_to_dst_init_short_with_dt(cb_in0, cb_in2);

cb_wait_front(cb_in2, ublock_size_tiles);
copy_block_matmul_partials(cb_in2, START_IN_TILE_ID, START_DST_TILE_ID, ublock_size_tiles);
cb_pop_front(cb_in2, ublock_size_tiles);

// -------------------- Addition with acc -----------------------------

// Init like CB_0 is in A and CB_1 is in B
add_tiles_init(cb_in0, cb_in1, true);

// Reconfigure UNPACK for correct source formats, tests reconfig calls
#if (EXPLICIT_RECONFIG == 1)
#if (SPLIT_SRC_RECONFIG == 1)
// Indices for old_operand, new_operand
unpack_reconfig_data_format_srca(cb_in0, cb_in1);
unpack_reconfig_data_format_srcb(cb_in1, cb_in0);
#elif (SPLIT_SRC_RECONFIG == 0)
// Indices for old_A, new_A, old_B, new_B
unpack_reconfig_data_format(cb_in0, cb_in1, cb_in1, cb_in0);
#endif // SPLIT_SRC_RECONFIG
#elif (EXPLICIT_RECONFIG == 0)
#if (SPLIT_SRC_RECONFIG == 1)
// Indices for new_operand
unpack_reconfig_data_format_srca(cb_in1);
unpack_reconfig_data_format_srcb(cb_in0);
#elif (SPLIT_SRC_RECONFIG == 0)
// Indices for new_A, new_B
unpack_reconfig_data_format(cb_in1, cb_in0);
#endif // SPLIT_SRC_RECONFIG
#endif // EXPLICIT_RECONFIG

for (uint32_t i = 0; i < ublock_size_tiles; ++i) {
add_tiles(cb_in1, cb_in0, i, i, i);
}

// ----------------------- Pack to 2 outs -----------------------------

// Reconfig for L1 accumulation with old calc values
#if (L1_ACC == 1)
pack_reconfig_l1_acc(true);
#endif
// Configured already for CB_16, Bfp16_b
matmul_pack_tile(START_DST_TILE_ID, cb_out0, ublock_size_tiles);
// Reconfig for CB_17, Bfp8_b, then pack to CB_17
#if (EXPLICIT_RECONFIG == 1)
// Indices for old_output, new_output
pack_reconfig_data_format(cb_out0, cb_out1);
#elif (EXPLICIT_RECONFIG == 0)
// Indices for new_output
pack_reconfig_data_format(cb_out1);
#endif
// Not testing for L1 accumulation
pack_reconfig_l1_acc(false);

matmul_pack_tile(START_DST_TILE_ID, cb_out1, ublock_size_tiles);
release_dst(tt::DstMode::Half);

cb_pop_front(cb_in0, ublock_size_tiles);
cb_pop_front(cb_in1, ublock_size_tiles);
cb_push_back(cb_out0, ublock_size_tiles);
cb_push_back(cb_out1, ublock_size_tiles);
}
}
} // namespace NAMESPACE
6 changes: 6 additions & 0 deletions tests/tt_metal/tt_metal/test_kernels/compute/reduce_w.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,14 @@ void MAIN {
acquire_dst(tt::DstMode::Half);
for(uint32_t wt = 0; wt < Wt; ++wt) {
cb_wait_front(tt::CB::c_in0, onetile);
#if (MATH_ONLY == 1)
UNPACK(( llk_unpack_AB(tt::CB::c_in0, tt::CB::c_in2, 0, 0) ));
// REDUCE_OP is expected to come from add_define
reduce_tile_math(reduce_dst_idx);
#elif (MATH_ONLY == 0)
// REDUCE_OP is expected to come from add_define
reduce_tile(tt::CB::c_in0, tt::CB::c_in2, 0, 0, reduce_dst_idx);
#endif
cb_pop_front(tt::CB::c_in0, onetile);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "dataflow_api.h"

void kernel_main() {
uint32_t src_addr = get_arg_val<uint32_t>(0);
uint32_t src_noc_x = get_arg_val<uint32_t>(1);
uint32_t src_noc_y = get_arg_val<uint32_t>(2);
uint32_t num_tiles = get_arg_val<uint32_t>(3);
uint32_t cb_id_in0 = get_arg_val<uint32_t>(4);
uint32_t ublock_size_tiles = get_arg_val<uint32_t>(5);
bool reader_only = get_arg_val<uint32_t>(6);

uint32_t ublock_size_bytes = get_tile_size(cb_id_in0) * ublock_size_tiles;

for (uint32_t i = 0; i<num_tiles; i += ublock_size_tiles) {
uint64_t src_noc_addr = get_noc_addr(src_noc_x, src_noc_y, src_addr);
if (reader_only == false) {
cb_reserve_back(cb_id_in0, ublock_size_tiles);
}
uint32_t l1_write_addr = get_write_ptr(cb_id_in0);

noc_async_read(src_noc_addr, l1_write_addr, ublock_size_bytes);

noc_async_read_barrier();
if (reader_only == false) {
cb_push_back(cb_id_in0, ublock_size_tiles);
}
src_addr += ublock_size_bytes;
}
}
41 changes: 41 additions & 0 deletions tests/tt_metal/tt_metal/test_kernels/dataflow/writer_binary.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "dataflow_api.h"

void kernel_main() {
uint32_t dst0_addr = get_arg_val<uint32_t>(0);
uint32_t dst0_noc_x = get_arg_val<uint32_t>(1);
uint32_t dst0_noc_y = get_arg_val<uint32_t>(2);
uint32_t cb_id_out0 = get_arg_val<uint32_t>(3);
uint32_t dst1_addr = get_arg_val<uint32_t>(4);
uint32_t dst1_noc_x = get_arg_val<uint32_t>(5);
uint32_t dst1_noc_y = get_arg_val<uint32_t>(6);
uint32_t cb_id_out1 = get_arg_val<uint32_t>(7);
uint32_t num_tiles = get_arg_val<uint32_t>(8);
uint32_t ublock_size_tiles = get_arg_val<uint32_t>(9);

uint32_t ublock0_size_bytes = get_tile_size(cb_id_out0) * ublock_size_tiles;
uint32_t ublock1_size_bytes = get_tile_size(cb_id_out1) * ublock_size_tiles;

for (uint32_t i = 0; i < num_tiles; i += ublock_size_tiles) {
uint64_t dst0_noc_addr = get_noc_addr(dst0_noc_x, dst0_noc_y, dst0_addr);
uint64_t dst1_noc_addr = get_noc_addr(dst1_noc_x, dst1_noc_y, dst1_addr);

cb_wait_front(cb_id_out0, ublock_size_tiles);
cb_wait_front(cb_id_out1, ublock_size_tiles);
uint32_t l1_read_addr0 = get_read_ptr(cb_id_out0);
uint32_t l1_read_addr1 = get_read_ptr(cb_id_out1);

noc_async_write(l1_read_addr0, dst0_noc_addr, ublock0_size_bytes);
noc_async_write(l1_read_addr1, dst1_noc_addr, ublock1_size_bytes);

noc_async_write_barrier();

cb_pop_front(cb_id_out0, ublock_size_tiles);
cb_pop_front(cb_id_out1, ublock_size_tiles);
dst0_addr += ublock0_size_bytes;
dst1_addr += ublock1_size_bytes;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "dataflow_api.h"

void kernel_main() {
uint32_t dst_addr = get_arg_val<uint32_t>(0);
uint32_t dst_noc_x = get_arg_val<uint32_t>(1);
uint32_t dst_noc_y = get_arg_val<uint32_t>(2);
uint32_t num_tiles = get_arg_val<uint32_t>(3);
uint32_t cb_id_out0 = get_arg_val<uint32_t>(4);
uint32_t ublock_size_tiles = get_arg_val<uint32_t>(5);
bool writer_only = get_arg_val<uint32_t>(6);

uint32_t ublock_size_bytes = get_tile_size(cb_id_out0) * ublock_size_tiles;

for (uint32_t i = 0; i < num_tiles; i += ublock_size_tiles) {
uint64_t dst_noc_addr = get_noc_addr(dst_noc_x, dst_noc_y, dst_addr);
if (writer_only == false) {
cb_wait_front(cb_id_out0, ublock_size_tiles);
}
uint32_t l1_read_addr = get_read_ptr(cb_id_out0);

noc_async_write(l1_read_addr, dst_noc_addr, ublock_size_bytes);

noc_async_write_barrier();
if (writer_only == false) {
cb_pop_front(cb_id_out0, ublock_size_tiles);
}
dst_addr += ublock_size_bytes;
}
}
2 changes: 2 additions & 0 deletions tests/tt_metal/tt_metal/unit_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ set(UNIT_TESTS_SRC
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_sfpu_compute.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_dropout_sfpu_compute.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_untilize_tilize.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_copy_block_matmul_partials.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_reconfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute/test_transpose.cpp
${CMAKE_CURRENT_SOURCE_DIR}/core_coord/test_CoreRange_adjacent.cpp
${CMAKE_CURRENT_SOURCE_DIR}/core_coord/test_CoreRange_contains.cpp
Expand Down
Loading

0 comments on commit e3ee890

Please sign in to comment.