diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 9c72907c5..274794b07 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx; wire [NUM_BANKS-1:0] per_bank_req_ready; - wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all; + wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos; wire [NUM_REQS-1:0] req_valid_in; wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; @@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( req_bank_addr[i], mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, - mem_bus_if[i].req_data.tag}; + mem_bus_if[i].req_data.tag + }; assign mem_bus_if[i].req_ready = req_ready_in[i]; end @@ -135,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .sel_in (req_bank_idx), .ready_in (req_ready_in), .valid_out (per_bank_req_valid), - .data_out (per_bank_req_data_all), + .data_out (per_bank_req_data_aos), .sel_out (per_bank_req_idx), .ready_out (per_bank_req_ready) ); @@ -146,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( per_bank_req_addr[i], per_bank_req_byteen[i], per_bank_req_data[i], - per_bank_req_tag[i]} = per_bank_req_data_all[i]; + per_bank_req_tag[i] + } = per_bank_req_data_aos[i]; end // banks access @@ -160,17 +162,18 @@ module VX_local_mem import VX_gpu_pkg::*; #( `RESET_RELAY (bank_reset, reset); for (genvar i = 0; i < NUM_BANKS; ++i) begin - wire bank_req_valid, bank_req_ready; + wire bank_rsp_valid, bank_rsp_ready; wire [WORD_WIDTH-1:0] bank_rsp_data; VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), - .WRENW (WORD_SIZE) + .WRENW (WORD_SIZE), + .NO_RWCHECK (1) ) data_store ( .clk (clk), - .reset (1'b0), - .read (1'b1), + .reset (reset), + .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .wren (per_bank_req_byteen[i]), .addr (per_bank_req_addr[i]), @@ -178,19 +181,31 @@ module VX_local_mem import VX_gpu_pkg::*; #( .rdata (bank_rsp_data) ); - // drop write response - assign bank_req_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i]; - assign per_bank_req_ready[i] = bank_req_ready || per_bank_req_rw[i]; + // read-during-write hazard detection + reg [BANK_ADDR_WIDTH-1:0] last_wr_addr; + reg last_wr_valid; + always @(posedge clk) begin + if (reset) begin + last_wr_valid <= 0; + end else begin + last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]; + end + last_wr_addr <= per_bank_req_addr[i]; + end + wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr); + + // drop write response and stall on read-during-write hazard + assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard; + assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard; // register BRAM output - VX_elastic_buffer #( - .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH), - .SIZE (1) + VX_pipe_buffer #( + .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) ) bank_buf ( .clk (clk), .reset (bank_reset), - .valid_in (bank_req_valid), - .ready_in (bank_req_ready), + .valid_in (bank_rsp_valid), + .ready_in (bank_rsp_ready), .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), .data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}), .valid_out (per_bank_rsp_valid[i]), @@ -200,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank responses gather - wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data2; + wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos; for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign per_bank_rsp_data2[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; + assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; end wire [NUM_REQS-1:0] rsp_valid_out; @@ -222,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( `UNUSED_PIN (collisions), .sel_in (per_bank_rsp_idx), .valid_in (per_bank_rsp_valid), - .data_in (per_bank_rsp_data2), + .data_in (per_bank_rsp_data_aos), .ready_in (per_bank_rsp_ready), .valid_out (rsp_valid_out), .data_out (rsp_data_out),