Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Dec 26, 2024
1 parent 704f525 commit 53900be
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 37 deletions.
10 changes: 4 additions & 6 deletions hw/rtl/VX_types.vh
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,8 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
`define VX_CSR_MPM_MEM_BANK_ST 12'hB1E // bank conflicts
`define VX_CSR_MPM_MEM_BANK_ST_H 12'hB9E
// PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
Expand All @@ -178,8 +176,8 @@
`define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts
`define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D
// PERF: coalescer
`define VX_CSR_MPM_COALESCE_MISS 12'hB1E // coalescer misses
`define VX_CSR_MPM_COALESCE_MISS_H 12'hB9E
`define VX_CSR_MPM_COALESCER_MISS 12'hB1F // coalescer misses
`define VX_CSR_MPM_COALESCER_MISS_H 12'hB9F

// Machine Performance-monitoring memory counters (class 3) ///////////////////
// <Add your own counters: use addresses hB03..B1F, hB83..hB9F>
Expand Down
2 changes: 1 addition & 1 deletion hw/rtl/core/VX_csr_data.sv
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ import VX_fpu_pkg::*;
`CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, sysmem_perf.mem.writes);
`CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, sysmem_perf.mem.latency);
// PERF: coalescer
`CSR_READ_64(`VX_CSR_MPM_COALESCE_MISS, read_data_ro_w, sysmem_perf.coalescer.misses);
`CSR_READ_64(`VX_CSR_MPM_COALESCER_MISS, read_data_ro_w, sysmem_perf.coalescer.misses);
default:;
endcase
end
Expand Down
23 changes: 11 additions & 12 deletions runtime/stub/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;
uint64_t mem_bank_stalls = 0;

uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
Expand Down Expand Up @@ -480,7 +479,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {

// PERF: coalescer
uint64_t coalescer_misses;
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_ST, core_id, &coalescer_misses), {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_COALESCER_MISS, core_id, &coalescer_misses), {
return err;
});
int coalescer_utilization = calcAvgPercent(dcache_requests_per_core - coalescer_misses, dcache_requests_per_core);
Expand Down Expand Up @@ -551,10 +550,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_ST, core_id, &mem_bank_stalls), {
return err;
});
}
Expand Down Expand Up @@ -632,11 +628,14 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization);
}

int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
{
uint64_t mem_requests = mem_reads + mem_writes;
int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int mem_bank_utilization = calcAvgPercent(mem_requests, mem_requests + mem_bank_stalls);
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", mem_requests, mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank stalls=%ld (utilization=%d%%)\n", mem_bank_stalls, mem_bank_utilization);
}
} break;
default:
break;
Expand Down
5 changes: 3 additions & 2 deletions sim/simx/emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,9 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_ST, proc_perf.memsim.bank_stalls);

CSR_READ_64(VX_CSR_MPM_COALESCER_MISS, coalescer_misses);

CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
Expand Down
10 changes: 2 additions & 8 deletions sim/simx/mem_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MemSim::Impl {
Config config_;
MemCrossBar::Ptr mem_xbar_;
DramSim dram_sim_;
PerfStats perf_stats_;
mutable PerfStats perf_stats_;

struct DramCallbackArgs {
MemSim::Impl* memsim;
Expand Down Expand Up @@ -57,6 +57,7 @@ class MemSim::Impl {
}

const PerfStats& perf_stats() const {
perf_stats_.bank_stalls = mem_xbar_->req_collisions();
return perf_stats_;
}

Expand All @@ -66,7 +67,6 @@ class MemSim::Impl {

void tick() {
dram_sim_.tick();
uint32_t counter = 0;

for (uint32_t i = 0; i < config_.num_banks; ++i) {
if (mem_xbar_->ReqOut.at(i).empty())
Expand Down Expand Up @@ -102,12 +102,6 @@ class MemSim::Impl {
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);

mem_xbar_->ReqOut.at(i).pop();
counter++;
}

perf_stats_.counter += counter;
if (counter > 0) {
++perf_stats_.ticks;
}
}
};
Expand Down
9 changes: 3 additions & 6 deletions sim/simx/mem_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,14 @@ class MemSim : public SimObject<MemSim>{
};

struct PerfStats {
uint64_t counter;
uint64_t ticks;
uint64_t bank_stalls;

PerfStats()
: counter(0)
, ticks(0)
: bank_stalls(0)
{}

PerfStats& operator+=(const PerfStats& rhs) {
this->counter += rhs.counter;
this->ticks += rhs.ticks;
this->bank_stalls += rhs.bank_stalls;
return *this;
}
};
Expand Down
2 changes: 0 additions & 2 deletions sim/simx/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,7 @@ class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
if (output_idx != -1) {
auto& rsp_out = RspOut.at(output_idx);
auto& rsp = rsp_out.front();
uint32_t input_idx = 0;
if (lg2_inputs_ != 0) {
input_idx = rsp.tag & (R-1);
rsp.tag >>= lg2_inputs_;
}
DT(4, this->name() << "-rsp" << i << ": " << rsp);
Expand Down

0 comments on commit 53900be

Please sign in to comment.