Skip to content

Commit

Permalink
#11795: Implemented randomized FD tests with trace to test the ring b…
Browse files Browse the repository at this point in the history
…uffer (#14485)

* #0: Initial commit

* #0: Added trace tests

* #0: Run trace for several iterations
  • Loading branch information
sagarwalTT authored Oct 31, 2024
1 parent 017410d commit c0d46d5
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@ inline std::pair<std::shared_ptr<tt::tt_metal::Buffer>, std::vector<uint32_t>> E
EnqueueWriteBuffer(cq, *buffer, src, false);
return std::make_pair(std::move(buffer), src);
}

inline bool does_device_have_active_eth_cores(const Device *device) {
return !(device->get_active_ethernet_cores(true).empty());
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ struct DummyProgramMultiCBConfig {

namespace local_test_functions {

bool does_device_have_active_eth_cores(const Device *device) {
return !(device->get_active_ethernet_cores(true).empty());
}

void initialize_dummy_kernels(Program& program, const CoreRangeSet& cr_set) {
auto dummy_reader_kernel = CreateKernel(
program, "tt_metal/kernels/dataflow/blank.cpp", cr_set,
Expand Down Expand Up @@ -1404,8 +1400,8 @@ TEST_F(RandomProgramFixture, TestSimpleProgramsOnTensix) {
}

TEST_F(RandomProgramFixture, TestSimpleProgramsOnEth) {
if (!local_test_functions::does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores";
if (!does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
Expand All @@ -1421,8 +1417,8 @@ TEST_F(RandomProgramFixture, TestSimpleProgramsOnEth) {
}

TEST_F(RandomProgramFixture, TestSimpleProgramsOnTensixAndEth) {
if (!local_test_functions::does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores";
if (!does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
Expand Down Expand Up @@ -1460,8 +1456,8 @@ TEST_F(RandomProgramFixture, TestProgramsOnTensix) {
}

TEST_F(RandomProgramFixture, TestProgramsOnEth) {
if (!local_test_functions::does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores";
if (!does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
Expand All @@ -1482,8 +1478,8 @@ TEST_F(RandomProgramFixture, TestProgramsOnEth) {
}

TEST_F(RandomProgramFixture, TestProgramsOnTensixAndEth) {
if (!local_test_functions::does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << "does not have any active ethernet cores";
if (!does_device_have_active_eth_cores(device_)) {
GTEST_SKIP() << "Skipping test because device " << device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

#include <cstdint>
#include <memory>
#include <vector>

#include "command_queue_fixture.hpp"
#include "command_queue_test_utils.hpp"
#include "detail/tt_metal.hpp"
#include "tt_metal/common/env_lib.hpp"
#include "gtest/gtest.h"
Expand Down Expand Up @@ -367,3 +369,224 @@ TEST_F(SingleDeviceTraceFixture, EnqueueMultiProgramTraceBenchmark) {
}

} // end namespace basic_tests

TEST_F(RandomProgramTraceFixture, TensixTestSimpleProgramsTrace) {
for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];
this->create_kernel(program, CoreType::WORKER, true);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, ActiveEthTestSimpleProgramsTrace) {
if (!does_device_have_active_eth_cores(this->device_)) {
GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];
this->create_kernel(program, CoreType::ETH, true);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixActiveEthTestSimpleProgramsTrace) {
if (!does_device_have_active_eth_cores(this->device_)) {
GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];

bool eth_kernel_added_to_program = false;
if (rand() % 2 == 0) {
this->create_kernel(program, CoreType::ETH, true);
eth_kernel_added_to_program = true;
}
if (rand() % 2 == 0 || !eth_kernel_added_to_program) {
this->create_kernel(program, CoreType::WORKER, true);
}

EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixTestProgramsTrace) {
for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];
this->create_kernel(program, CoreType::WORKER);
EnqueueProgram(this->device_->command_queue(), program, false);
}

Finish(device_->command_queue());
}

TEST_F(RandomProgramTraceFixture, ActiveEthTestProgramsTrace) {
if (!does_device_have_active_eth_cores(this->device_)) {
GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];
// Large eth kernels currently don't fit in the ring buffer, so we're reducing the max number of RTAs
// and the max kernel size to ensure that the kernel can fit in the ring buffer
KernelProperties kernel_properties;
kernel_properties.max_kernel_size_bytes = MAX_KERNEL_SIZE_BYTES / 2;
kernel_properties.max_num_rt_args = MAX_NUM_RUNTIME_ARGS / 4;
this->create_kernel(program, CoreType::ETH, false, kernel_properties);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixActiveEthTestProgramsTrace) {
if (!does_device_have_active_eth_cores(this->device_)) {
GTEST_SKIP() << "Skipping test because device " << this->device_->id() << " does not have any active ethernet cores";
}

for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];

bool eth_kernel_added_to_program = false;
if (rand() % 2 == 0) {
// Large eth kernels currently don't fit in the ring buffer, so we're reducing the max number of RTAs
// and the max kernel size to ensure that the kernel can fit in the ring buffer
KernelProperties kernel_properties;
kernel_properties.max_kernel_size_bytes = MAX_KERNEL_SIZE_BYTES / 2;
kernel_properties.max_num_rt_args = MAX_NUM_RUNTIME_ARGS / 4;
kernel_properties.max_num_sems = MAX_NUM_SEMS / 2;
this->create_kernel(program, CoreType::ETH, false, kernel_properties);
eth_kernel_added_to_program = true;
}
if (rand() % 2 == 0 || !eth_kernel_added_to_program) {
KernelProperties kernel_properties;
kernel_properties.max_num_sems = MAX_NUM_SEMS / 2;
this->create_kernel(program, CoreType::WORKER, false, kernel_properties);
}

EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixTestAlternatingLargeAndSmallProgramsTrace) {
for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];

KernelProperties kernel_properties;
if (i % 2 == 0) {
kernel_properties = this->get_large_kernel_properties();
} else {
kernel_properties = this->get_small_kernel_properties();
}

this->create_kernel(program, CoreType::WORKER, false, kernel_properties);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixTestLargeProgramFollowedBySmallProgramsTrace) {
for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];

KernelProperties kernel_properties;
if (i == 0) {
kernel_properties = this->get_large_kernel_properties();
} else {
kernel_properties = this->get_small_kernel_properties();
}

this->create_kernel(program, CoreType::WORKER, false, kernel_properties);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}

TEST_F(RandomProgramTraceFixture, TensixTestLargeProgramInBetweenFiveSmallProgramsTrace) {
for (uint32_t i = 0; i < NUM_PROGRAMS; i++) {
if (i % 10 == 0) {
log_info(tt::LogTest, "Creating Program {}", i);
}
this->programs[i] = CreateProgram();
Program& program = this->programs[i];

KernelProperties kernel_properties;
if (i % 6 == 0) {
kernel_properties = this->get_large_kernel_properties();
} else {
kernel_properties = this->get_small_kernel_properties();
}

this->create_kernel(program, CoreType::WORKER, false, kernel_properties);
EnqueueProgram(this->device_->command_queue(), program, false);
}

const uint32_t trace_id = this->trace_programs();

Finish(this->device_->command_queue());
ReleaseTrace(this->device_, trace_id);
}
Loading

0 comments on commit c0d46d5

Please sign in to comment.