Skip to content

Commit

Permalink
#10866: Read profiler buffer with EnqueueReadBuffer in fast dispatc…
Browse files Browse the repository at this point in the history
…h mode
  • Loading branch information
mo-tenstorrent committed Aug 22, 2024
1 parent 9022fbb commit a1e228a
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 8 deletions.
8 changes: 4 additions & 4 deletions tests/tt_metal/tools/profiler/test_device_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ def test_dispatch_cores():
ZONE_COUNT = 37
REF_COUNT_DICT = {
"grayskull": {
"Tensix CQ Dispatch": 11,
"Tensix CQ Prefetch": 14,
"Tensix CQ Dispatch": 16,
"Tensix CQ Prefetch": 21,
},
"wormhole_b0": {
"Tensix CQ Dispatch": 11,
"Tensix CQ Prefetch": 14,
"Tensix CQ Dispatch": 16,
"Tensix CQ Prefetch": 21,
},
}

Expand Down
27 changes: 25 additions & 2 deletions tt_metal/tools/profiler/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ void DeviceProfiler::generateZoneSourceLocationsHashes()

void DeviceProfiler::dumpResults (
Device *device,
const vector<CoreCoord> &worker_cores){
const vector<CoreCoord> &worker_cores,
bool lastDump){
#if defined(TRACY_ENABLE)
ZoneScoped;

Expand All @@ -380,7 +381,29 @@ void DeviceProfiler::dumpResults (
{
std::vector<uint32_t> profile_buffer(output_dram_buffer->size()/sizeof(uint32_t), 0);

tt_metal::detail::ReadFromBuffer(output_dram_buffer, profile_buffer);
const auto USE_FAST_DISPATCH = std::getenv("TT_METAL_SLOW_DISPATCH_MODE") == nullptr;
if (USE_FAST_DISPATCH)
{
if (lastDump)
{
if (tt::llrt::OptionsG.get_profiler_do_dispatch_cores())
{
tt_metal::detail::ReadFromBuffer(output_dram_buffer, profile_buffer);
}
}
else
{
EnqueueReadBuffer(device->command_queue(),output_dram_buffer, profile_buffer, true);
}
}
else
{
if (!lastDump)
{
tt_metal::detail::ReadFromBuffer(output_dram_buffer, profile_buffer);
}
}


for (const auto &worker_core : worker_cores) {
readRiscProfilerResults(
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/tools/profiler/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class DeviceProfiler {
void setOutputDir(const std::string& new_output_dir);

//Traverse all cores on the device and dump the device profile results
void dumpResults(Device *device, const vector<CoreCoord> &worker_cores);
void dumpResults(Device *device, const vector<CoreCoord> &worker_cores, bool lastDump);
};

} // namespace tt_metal
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/tools/profiler/tt_metal_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ void DumpDeviceProfileResults(Device *device, std::vector<CoreCoord> &worker_cor
syncDeviceHost (device, SYNC_CORE, tt_metal_device_profiler_map.at(device_id).sync_program, false);
}
tt_metal_device_profiler_map.at(device_id).setDeviceArchitecture(device->arch());
tt_metal_device_profiler_map.at(device_id).dumpResults(device, worker_cores);
tt_metal_device_profiler_map.at(device_id).dumpResults(device, worker_cores, lastDump);
if (lastDump)
{
// Process is ending, no more device dumps are coming, reset your ref on the buffer so deallocate is the last
Expand Down

0 comments on commit a1e228a

Please sign in to comment.