From 53900bee4f2361528a2262228d01a30ad8413155 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 26 Dec 2024 10:01:36 -0800 Subject: [PATCH] bug fixes --- hw/rtl/VX_types.vh | 10 ++++------ hw/rtl/core/VX_csr_data.sv | 2 +- runtime/stub/utils.cpp | 23 +++++++++++------------ sim/simx/emulator.cpp | 5 +++-- sim/simx/mem_sim.cpp | 10 ++-------- sim/simx/mem_sim.h | 9 +++------ sim/simx/types.h | 2 -- 7 files changed, 24 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index fc11aba1a..455d42ce1 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -166,10 +166,8 @@ `define VX_CSR_MPM_MEM_WRITES_H 12'hB99 `define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency `define VX_CSR_MPM_MEM_LT_H 12'hB9A -`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests -`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E -`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks -`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F +`define VX_CSR_MPM_MEM_BANK_ST 12'hB1E // bank conflicts +`define VX_CSR_MPM_MEM_BANK_ST_H 12'hB9E // PERF: lmem `define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads `define VX_CSR_MPM_LMEM_READS_H 12'hB9B @@ -178,8 +176,8 @@ `define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts `define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D // PERF: coalescer -`define VX_CSR_MPM_COALESCE_MISS 12'hB1E // coalescer misses -`define VX_CSR_MPM_COALESCE_MISS_H 12'hB9E +`define VX_CSR_MPM_COALESCER_MISS 12'hB1F // coalescer misses +`define VX_CSR_MPM_COALESCER_MISS_H 12'hB9F // Machine Performance-monitoring memory counters (class 3) /////////////////// // diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index e1a6eb8c6..9ba72a353 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -272,7 +272,7 @@ import VX_fpu_pkg::*; `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, sysmem_perf.mem.writes); `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, sysmem_perf.mem.latency); // PERF: coalescer - `CSR_READ_64(`VX_CSR_MPM_COALESCE_MISS, read_data_ro_w, sysmem_perf.coalescer.misses); + `CSR_READ_64(`VX_CSR_MPM_COALESCER_MISS, read_data_ro_w, sysmem_perf.coalescer.misses); default:; endcase end diff --git a/runtime/stub/utils.cpp b/runtime/stub/utils.cpp index 3e363b70a..220f916ae 100644 --- a/runtime/stub/utils.cpp +++ b/runtime/stub/utils.cpp @@ -211,8 +211,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { uint64_t mem_reads = 0; uint64_t mem_writes = 0; uint64_t mem_lat = 0; - uint64_t mem_req_counter = 0; - uint64_t mem_ticks = 0; + uint64_t mem_bank_stalls = 0; uint64_t num_cores; CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { @@ -480,7 +479,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { // PERF: coalescer uint64_t coalescer_misses; - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_ST, core_id, &coalescer_misses), { + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_MISS, core_id, &coalescer_misses), { return err; }); int coalescer_utilization = calcAvgPercent(dcache_requests_per_core - coalescer_misses, dcache_requests_per_core); @@ -551,10 +550,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), { return err; }); - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), { - return err; - }); - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), { + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_ST, core_id, &mem_bank_stalls), { return err; }); } @@ -632,11 +628,14 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization); } - int mem_avg_lat = caclAverage(mem_lat, mem_reads); - int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports)); - fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes); - fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); - fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization); + { + uint64_t mem_requests = mem_reads + mem_writes; + int mem_avg_lat = caclAverage(mem_lat, mem_reads); + int mem_bank_utilization = calcAvgPercent(mem_requests, mem_requests + mem_bank_stalls); + fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", mem_requests, mem_reads, mem_writes); + fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); + fprintf(stream, "PERF: memory bank stalls=%ld (utilization=%d%%)\n", mem_bank_stalls, mem_bank_utilization); + } } break; default: break; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 0b1a53a56..d05476798 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -599,8 +599,9 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads); CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes); CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency); - CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter); - CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks); + CSR_READ_64(VX_CSR_MPM_MEM_BANK_ST, proc_perf.memsim.bank_stalls); + + CSR_READ_64(VX_CSR_MPM_COALESCER_MISS, coalescer_misses); CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads); CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes); diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index 7ee7de9c1..7e3da729a 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -29,7 +29,7 @@ class MemSim::Impl { Config config_; MemCrossBar::Ptr mem_xbar_; DramSim dram_sim_; - PerfStats perf_stats_; + mutable PerfStats perf_stats_; struct DramCallbackArgs { MemSim::Impl* memsim; @@ -57,6 +57,7 @@ class MemSim::Impl { } const PerfStats& perf_stats() const { + perf_stats_.bank_stalls = mem_xbar_->req_collisions(); return perf_stats_; } @@ -66,7 +67,6 @@ class MemSim::Impl { void tick() { dram_sim_.tick(); - uint32_t counter = 0; for (uint32_t i = 0; i < config_.num_banks; ++i) { if (mem_xbar_->ReqOut.at(i).empty()) @@ -102,12 +102,6 @@ class MemSim::Impl { DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req); mem_xbar_->ReqOut.at(i).pop(); - counter++; - } - - perf_stats_.counter += counter; - if (counter > 0) { - ++perf_stats_.ticks; } } }; diff --git a/sim/simx/mem_sim.h b/sim/simx/mem_sim.h index 220d1eb4f..a5aa6ec33 100644 --- a/sim/simx/mem_sim.h +++ b/sim/simx/mem_sim.h @@ -26,17 +26,14 @@ class MemSim : public SimObject{ }; struct PerfStats { - uint64_t counter; - uint64_t ticks; + uint64_t bank_stalls; PerfStats() - : counter(0) - , ticks(0) + : bank_stalls(0) {} PerfStats& operator+=(const PerfStats& rhs) { - this->counter += rhs.counter; - this->ticks += rhs.ticks; + this->bank_stalls += rhs.bank_stalls; return *this; } }; diff --git a/sim/simx/types.h b/sim/simx/types.h index 34d258e9a..220d4b645 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -850,9 +850,7 @@ class TxCrossBar : public SimObject> { if (output_idx != -1) { auto& rsp_out = RspOut.at(output_idx); auto& rsp = rsp_out.front(); - uint32_t input_idx = 0; if (lg2_inputs_ != 0) { - input_idx = rsp.tag & (R-1); rsp.tag >>= lg2_inputs_; } DT(4, this->name() << "-rsp" << i << ": " << rsp);