From b6bd6467efe685c27b031e5478a21d5ec3050aed Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 20:04:51 -0700 Subject: [PATCH] cache hit timing optimization --- hw/rtl/cache/VX_bank_flush.sv | 15 +- hw/rtl/cache/VX_cache_bank.sv | 177 +++++++++--------------- hw/rtl/cache/VX_cache_data.sv | 123 ++++++++--------- hw/rtl/cache/VX_cache_define.vh | 1 + hw/rtl/cache/VX_cache_repl.sv | 213 +++++++++++++---------------- hw/rtl/cache/VX_cache_tags.sv | 55 +++----- hw/rtl/libs/VX_dp_ram.sv | 21 ++- hw/rtl/libs/VX_fifo_queue.sv | 16 +-- hw/unittest/generic_queue/Makefile | 2 + 9 files changed, 265 insertions(+), 358 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index ca28d749b..68eefd363 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -33,7 +33,7 @@ module VX_bank_flush #( output wire flush_init, output wire flush_valid, output wire [`CS_LINE_SEL_BITS-1:0] flush_line, - output wire [NUM_WAYS-1:0] flush_way, + output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way, input wire flush_ready, input wire mshr_empty, input wire bank_empty @@ -113,17 +113,10 @@ module VX_bank_flush #( assign flush_valid = (state == STATE_FLUSH); assign flush_line = counter[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way - VX_decoder #( - .N (`CS_WAY_SEL_BITS), - .D (NUM_WAYS) - ) ctr_decoder ( - .sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .data_in (1'b1), - .data_out (flush_way) - ); + if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way + assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; end else begin : g_flush_way_all - assign flush_way = {NUM_WAYS{1'b1}}; + assign flush_way = '0; end endmodule diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 20c0c0612..574659d7e 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -150,19 +150,19 @@ module VX_cache_bank #( wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; - wire [NUM_WAYS-1:0] flush_way_st0; - wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; - wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; + wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1; wire rw_sel, rw_st0, rw_st1; wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1; - wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; @@ -170,18 +170,18 @@ module VX_cache_bank #( wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; + wire is_hit_st0, is_hit_st1; wire mshr_empty; wire flush_valid; wire init_valid; wire [`CS_LINE_SEL_BITS-1:0] flush_sel; - wire [NUM_WAYS-1:0] flush_way; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way; wire flush_ready; // ensure we have no pending memory request in the bank wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty; - // flush unit VX_bank_flush #( .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), @@ -203,9 +203,7 @@ module VX_cache_bank #( .bank_empty (no_pending_req) ); - logic rdw_hazard, post_hazard; - - wire pipe_stall = crsp_queue_stall || rdw_hazard; + wire pipe_stall = crsp_queue_stall; // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. @@ -295,8 +293,6 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; - wire is_init_sel = init_valid; wire is_creq_sel = creq_enable || replay_enable; wire is_fill_sel = fill_enable; @@ -304,7 +300,7 @@ module VX_cache_bank #( wire is_replay_sel = replay_enable; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -334,22 +330,18 @@ module VX_cache_bank #( wire do_read_st1 = valid_st1 && is_read_st1; wire do_write_st1 = valid_st1 && is_write_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; + assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); - wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; - wire [NUM_WAYS-1:0] tag_matches_st1; - - wire is_hit_st1 = (| tag_matches_st1); + assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; wire do_lookup_st0 = do_read_st0 || do_write_st0; - wire do_lookup_st1 = do_read_st1 || do_write_st1; - reg [NUM_WAYS-1:0] victim_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0; + wire [NUM_WAYS-1:0] tag_matches_st0; VX_cache_repl #( .CACHE_SIZE (CACHE_SIZE), @@ -363,10 +355,10 @@ module VX_cache_bank #( .stall (pipe_stall), .hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), - .hit_way (tag_matches_st1), + .hit_way (way_idx_st1), .repl_valid (do_fill_st0 && ~pipe_stall), - .repl_line (line_idx_st0), .repl_line_n(line_idx_sel), + .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); @@ -388,27 +380,29 @@ module VX_cache_bank #( .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), - .line_addr (addr_st0), + .line_idx_n (line_idx_sel), + .line_idx (line_idx_st0), + .line_tag (line_tag_st0), .evict_way (evict_way_st0), // outputs - .tag_matches_r(tag_matches_st1), - .line_tag_r (line_tag_st1), - .evict_tag_r(evict_tag_st1), - .evict_way_r(evict_way_st1) + .tag_matches(tag_matches_st0), + .evict_tag (evict_tag_st0) ); + assign is_hit_st0 = (| tag_matches_st0); + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -422,58 +416,12 @@ module VX_cache_bank #( // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time)) - if (WRITE_ENABLE) begin : g_rdw_hazard - // This implementation uses single-port BRAMs for the tags and data stores. - // Using different stages for read and write operations requires a pipeline stall in between due to address port sharing. - // Tags fill/flush can perform read and write in the same stage, since no dependency between. - // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. - // A data read should happen in st0 for its result to be available in st1. - // A data write should happen in st1 when the tag hit status is available. - // The r/w hazard is needed for consecutive writes since they both wonly write in st1. - // The r/w hazard is also not needed for next writethrough fill/flush to the same line. - // For reads or writeback fill/flush to the same line, we sill need the hazard - // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. - wire is_write_sel = is_creq_sel && rw_sel; - wire is_same_line = (line_idx_sel == line_idx_st0); - always @(posedge clk) begin - if (reset) begin - post_hazard <= 0; - rdw_hazard <= 0; - end else begin - if (~crsp_queue_stall) begin - post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); - end - end - end - end else begin : g_rdw_hazard_ro - assign rdw_hazard = 0; - assign post_hazard = 0; - end - assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (data_st1) - wire [`CS_LINE_WIDTH-1:0] evict_data_st1; + wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1; wire [LINE_SIZE-1:0] evict_byteen_st1; - wire line_dirty_st1; - - wire data_write; - wire [`CS_LINE_SEL_BITS-1:0] data_line_idx; - - if (WRITE_ENABLE) begin : g_data_ctrl - // by default all data accesses happen in sto and use line_idx_st0. - // data writes should happen in st1 when the tag hit is available, - // and use line_idx_st1 to ensure the correct line is updated. - // if a rdw hazard is active due to conflict, ensure we don't write twice. - assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall; - assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; - end else begin : g_data_ctrl_ro - `UNUSED_VAR (post_hazard) - `UNUSED_VAR (do_write_st1) - assign data_write = 0; - assign data_line_idx = line_idx_st0; - end + wire evict_dirty_st1; VX_cache_data #( .CACHE_SIZE (CACHE_SIZE), @@ -493,18 +441,18 @@ module VX_cache_bank #( .fill (do_fill_st0 && ~pipe_stall), .flush (do_flush_st0 && ~pipe_stall), .read (do_read_st0 && ~pipe_stall), - .write (data_write), + .write (do_write_st0 && ~pipe_stall), .evict_way (evict_way_st0), - .tag_matches(tag_matches_st1), - .line_idx (data_line_idx), + .tag_matches(tag_matches_st0), + .line_idx (line_idx_st0), .fill_data (data_st0), - .write_word (write_word_st1), - .word_idx (word_idx_st1), - .write_byteen(byteen_st1), + .write_word (write_word_st0), + .word_idx (word_idx_st0), + .write_byteen(byteen_st0), // outputs + .way_idx (way_idx_st1), .read_data (read_data_st1), - .line_dirty (line_dirty_st1), - .evict_data (evict_data_st1), + .evict_dirty(evict_dirty_st1), .evict_byteen(evict_byteen_st1) ); @@ -600,7 +548,7 @@ module VX_cache_bank #( assign crsp_queue_valid = do_read_st1 && is_hit_st1; assign crsp_queue_idx = req_idx_st1; - assign crsp_queue_data = read_data_st1; + assign crsp_queue_data = read_data_st1[word_idx_st1]; assign crsp_queue_tag = tag_st1; VX_elastic_buffer #( @@ -610,7 +558,7 @@ module VX_cache_bank #( ) core_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (crsp_queue_valid && ~rdw_hazard), + .valid_in (crsp_queue_valid), .ready_in (crsp_queue_ready), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), @@ -618,9 +566,7 @@ module VX_cache_bank #( .ready_out (core_rsp_ready) ); - // we use 'do_read_st1' instead 'crsp_queue_valid' - // to remove costly 'is_hit_st1' signal from critical paths. - assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready; + assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready; // schedule memory request @@ -634,7 +580,7 @@ module VX_cache_bank #( wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue @@ -642,7 +588,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| evict_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction @@ -651,8 +597,10 @@ module VX_cache_bank #( && ~pipe_stall; assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; assign mreq_queue_rw = is_fill_or_flush_st1; - assign mreq_queue_data = evict_data_st1; + assign mreq_queue_data = read_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1; + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; VX_decoder #( @@ -675,7 +623,6 @@ module VX_cache_bank #( `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro @@ -688,8 +635,9 @@ module VX_cache_bank #( assign mreq_queue_byteen = '1; `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid @@ -722,10 +670,6 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; - `UNUSED_VAR (do_fill_st1) - `UNUSED_VAR (do_flush_st1) - `UNUSED_VAR (evict_way_st1) - /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -740,8 +684,8 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, - crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, + crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -764,32 +708,37 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_lookup_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1)) end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 22326e63b..65cf9e026 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -41,38 +41,23 @@ module VX_cache_data #( input wire read, input wire write, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, // outputs - output wire [`CS_WORD_WIDTH-1:0] read_data, - output wire line_dirty, - output wire [`CS_LINE_WIDTH-1:0] evict_data, + output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, + output wire [`CS_LINE_WIDTH-1:0] read_data, + output wire evict_dirty, output wire [LINE_SIZE-1:0] evict_byteen ); `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; - - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - if (WRITEBACK != 0) begin : g_writeback localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); - wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r; - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) fill_way_enc ( - .data_in (evict_way), - .data_out (evict_way_idx), - `UNUSED_PIN (valid_out) - ); - - `BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1); wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; @@ -80,7 +65,7 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata wire evict = fill || flush; - wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; + wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i); wire dirty_data = write; // only asserted on writes wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); if (DIRTY_BYTES != 0) begin : g_dirty_bytes @@ -121,54 +106,47 @@ module VX_cache_data #( ); if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen - assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx]; end else begin : g_line_dirty - assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_dirty = byteen_rdata[way_idx]; assign evict_byteen = '1; end - assign evict_data = line_rdata[evict_way_idx_r]; - end else begin : g_no_writeback `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign line_dirty = 0; - assign evict_data = '0; + assign evict_dirty = 0; assign evict_byteen = '0; end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + if (WRITE_ENABLE) begin : g_data_store + // create a single write-enable block ram to reduce area overhead + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren; + wire line_write; + wire line_read; - if (WRITE_ENABLE != 0) begin : g_wdata + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end - assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren = {LINE_SIZE{fill && fill_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - - end else begin : g_ro_wdata - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - `UNUSED_VAR (word_idx) - assign line_wdata = fill_data; - assign line_wren = fill_way_en; + assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end - wire line_write = fill || (write && WRITE_ENABLE); - wire line_read = read || ((fill || flush) && WRITEBACK); + assign line_write = fill || (write && WRITE_ENABLE); + assign line_read = read || ((fill || flush) && WRITEBACK); VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH), + .DATAW (NUM_WAYS * `CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), + .WRENW (NUM_WAYS * LINE_SIZE), .OUT_REG (1) ) data_store ( .clk (clk), @@ -178,35 +156,46 @@ module VX_cache_data #( .wren (line_wren), .addr (line_idx), .wdata (line_wdata), - .rdata (line_rdata[i]) + .rdata (line_rdata) ); + end else begin : g_data_store + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_word) + `UNUSED_VAR (word_idx) + + // we don't merge the ways into a single block ram due to WREN overhead + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (read), + .write (fill && fill_way_en), + .wren (1'b1), + .addr (line_idx), + .wdata (fill_data), + .rdata (line_rdata[i]) + ); + end end - wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; + wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx; + VX_onehot_encoder #( .N (NUM_WAYS) - ) hit_idx_enc ( + ) way_idx_enc ( .data_in (tag_matches), - .data_out (hit_way_idx), + .data_out (hit_idx), `UNUSED_PIN (valid_out) ); - if (`CS_WORDS_PER_LINE > 1) begin : g_read_data - // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM read and word indexing. - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; - VX_transpose #( - .DATAW (`CS_WORD_WIDTH), - .N (NUM_WAYS), - .M (`CS_WORDS_PER_LINE) - ) transpose ( - .data_in (line_rdata), - .data_out (transposed_rdata) - ); - assign read_data = transposed_rdata[word_idx][hit_way_idx]; - end else begin : g_read_data_1w - `UNUSED_VAR (word_idx) - assign read_data = line_rdata[hit_way_idx]; - end + `BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1); + + assign read_data = line_rdata[way_idx]; endmodule diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index b75845eca..65b239900 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -22,6 +22,7 @@ `define CS_LINE_WIDTH (8 * LINE_SIZE) `define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS) `define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS) +`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS) `define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS)) `define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index dbd51afdd..24425328d 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -97,135 +97,114 @@ module VX_cache_repl #( input wire stall, input wire hit_valid, input wire [`CS_LINE_SEL_BITS-1:0] hit_line, - input wire [NUM_WAYS-1:0] hit_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way, input wire repl_valid, input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, - output wire [NUM_WAYS-1:0] repl_way + output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way ); + localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH; `UNUSED_VAR (stall) - localparam WAY_IDX_BITS = $clog2(NUM_WAYS); - localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS); - - if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru - // Pseudo Least Recently Used replacement policy - localparam LRU_WIDTH = `UP(NUM_WAYS-1); - localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024; - - wire [WAY_IDX_WIDTH-1:0] repl_way_idx; - wire [WAY_IDX_WIDTH-1:0] hit_way_idx; - wire [LRU_WIDTH-1:0] plru_rdata; - wire [LRU_WIDTH-1:0] plru_wdata; - wire [LRU_WIDTH-1:0] plru_wmask; - - VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH), - .OUT_REG (FORCE_BRAM) - ) plru_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (hit_valid), - .wren (plru_wmask), - .waddr (hit_line), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .wdata (plru_wdata), - .rdata (plru_rdata) - ); - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) hit_way_enc ( - .data_in (hit_way), - .data_out (hit_way_idx), - `UNUSED_PIN (valid_out) - ); - - plru_decoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_dec ( - .way_idx (hit_way_idx), - .lru_data (plru_wdata), - .lru_mask (plru_wmask) - ); - - plru_encoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_enc ( - .lru_in (plru_rdata), - .way_idx (repl_way_idx) - ); - - VX_decoder #( - .N (WAY_IDX_BITS) - ) repl_way_dec ( - .sel_in (repl_way_idx), - .data_in (1'b1), - .data_out (repl_way) - ); - - end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic - // Cyclic replacement policy - localparam CTR_WIDTH = $clog2(NUM_WAYS); - localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024; - - `UNUSED_VAR (hit_valid) - `UNUSED_VAR (hit_line) - `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) - - wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; - wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; - - VX_dp_ram #( - .DATAW (`UP(CTR_WIDTH)), - .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (FORCE_BRAM) - ) ctr_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (repl_valid), - .wren (1'b1), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .waddr (repl_line), - .wdata (ctr_wdata), - .rdata (ctr_rdata) - ); - - VX_decoder #( - .N (WAY_IDX_BITS) - ) ctr_decoder ( - .sel_in (ctr_rdata), - .data_in (1'b1), - .data_out (repl_way) - ); - end else begin : g_random - // Random replacement policy - `UNUSED_VAR (hit_valid) - `UNUSED_VAR (hit_line) - `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) - `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) - if (NUM_WAYS > 1) begin : g_repl_way - reg [NUM_WAYS-1:0] victim_way; + if (NUM_WAYS > 1) begin : g_enable + if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru + // Pseudo Least Recently Used replacement policy + localparam LRU_WIDTH = `UP(NUM_WAYS-1); + localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; + + wire [LRU_WIDTH-1:0] plru_rdata; + wire [LRU_WIDTH-1:0] plru_wdata; + wire [LRU_WIDTH-1:0] plru_wmask; + + VX_dp_ram #( + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .OUT_REG (USE_BRAM) + ) plru_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (hit_valid), + .wren (plru_wmask), + .waddr (hit_line), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .wdata (plru_wdata), + .rdata (plru_rdata) + ); + + plru_decoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_dec ( + .way_idx (hit_way), + .lru_data (plru_wdata), + .lru_mask (plru_wmask) + ); + + plru_encoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_enc ( + .lru_in (plru_rdata), + .way_idx (repl_way) + ); + + end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic + // Cyclic replacement policy + localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; + + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + + wire [WAY_SEL_WIDTH-1:0] ctr_rdata; + wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; + + VX_dp_ram #( + .DATAW (WAY_SEL_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (USE_BRAM) + ) ctr_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (repl_valid), + .wren (1'b1), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .waddr (repl_line), + .wdata (ctr_wdata), + .rdata (ctr_rdata) + ); + + assign repl_way = ctr_rdata; + end else begin : g_random + // Random replacement policy + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) + reg [WAY_SEL_WIDTH-1:0] victim_idx; always @(posedge clk) begin if (reset) begin - victim_way <= 1; + victim_idx <= 0; end else if (~stall) begin - victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; + victim_idx <= victim_idx + 1; end end - assign repl_way = victim_way; - end else begin : g_repl_way_1 - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign repl_way = 1'b1; + assign repl_way = victim_idx; end + end else begin : g_disable + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) + assign repl_way = 1'b0; end endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 8793420e1..71f7809dc 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -36,50 +36,35 @@ module VX_cache_tags #( input wire flush, input wire fill, input wire lookup, - input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx, + input wire [`CS_TAG_SEL_BITS-1:0] line_tag, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, // outputs - output wire [NUM_WAYS-1:0] tag_matches_r, - output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, - output wire [NUM_WAYS-1:0] evict_way_r, - output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r + output wire [NUM_WAYS-1:0] tag_matches, + output wire [`CS_TAG_SEL_BITS-1:0] evict_tag ); // valid, tag localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; - wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; - wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); - wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - - if (NUM_WAYS > 1) begin : g_evict_way - `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); - end else begin : g_evict_way_0 - `UNUSED_VAR (evict_way) - assign evict_way_r = 1'b1; - end + `UNUSED_VAR (lookup) if (WRITEBACK) begin : g_evict_tag_wb - VX_onehot_mux #( - .DATAW (`CS_TAG_SEL_BITS), - .N (NUM_WAYS) - ) evict_tag_sel ( - .data_in (read_tag), - .sel_in (evict_way_r), - .data_out (evict_tag_r) - ); + assign evict_tag = read_tag[evict_way]; end else begin : g_evict_tag_wt - assign evict_tag_r = '0; + assign evict_tag = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill && evict_way[i]; - wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode + wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_fill = fill && way_en; + wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode - wire line_read = lookup || (WRITEBACK && (fill || flush)); + //wire line_read = lookup || (WRITEBACK && (fill || flush)); wire line_write = init || do_fill || do_flush; wire line_valid = fill; @@ -89,26 +74,26 @@ module VX_cache_tags #( assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; - VX_sp_ram #( + VX_dp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .WRITE_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), - .read (line_read), + .read (~stall), .write (line_write), .wren (1'b1), - .addr (line_idx), + .waddr (line_idx), + .raddr (line_idx_n), .wdata (line_wdata), .rdata (line_rdata) ); end - `BUFFER_EX(line_tag_r, line_tag, ~stall, 1); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches - assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]); + assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end endmodule diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index b770cfa68..7616aa5b9 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -61,7 +61,7 @@ module VX_dp_ram #( `ifdef SYNTHESIS `ifdef QUARTUS - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : ""); + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto"); localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -70,9 +70,9 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`else - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : ""); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : ""; +`elif VIVADO + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ if (wren[i]) begin \ @@ -80,6 +80,14 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) +`else + `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK `endif if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; @@ -122,7 +130,7 @@ module VX_dp_ram #( rdata_r <= ram[raddr]; end end - end end else if (WRITE_MODE == "U") begin : g_undefined + end else if (WRITE_MODE == "U") begin : g_undefined `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -138,7 +146,8 @@ module VX_dp_ram #( end else begin `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) end - else begin : g_no_out_reg + assign rdata = rdata_r; + end else begin : g_no_out_reg `UNUSED_VAR (read) `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 1410a0dd0..9323c4dc0 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -20,7 +20,7 @@ module VX_fifo_queue #( parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM), + parameter LUTRAM = 0, parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -42,9 +42,6 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) - `UNUSED_PARAM (OUT_REG) - `UNUSED_PARAM (LUTRAM) - VX_pending_size #( .SIZE (DEPTH), .ALM_EMPTY (ALM_EMPTY), @@ -62,6 +59,8 @@ module VX_fifo_queue #( ); if (DEPTH == 1) begin : g_depth_1 + `UNUSED_PARAM (OUT_REG) + `UNUSED_PARAM (LUTRAM) reg [DATAW-1:0] head_r; @@ -75,6 +74,7 @@ module VX_fifo_queue #( end else begin : g_depth_n + localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM); localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; @@ -95,17 +95,17 @@ module VX_fifo_queue #( end end - wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n; + wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r; wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); wire bypass = push && (empty || (going_empty && pop)); - wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop; + wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .OUT_REG(!LUTRAM), + .LUTRAM (!USE_BRAM), + .OUT_REG(USE_BRAM), .WRITE_MODE("W") ) dp_ram ( .clk (clk), diff --git a/hw/unittest/generic_queue/Makefile b/hw/unittest/generic_queue/Makefile index 0adf78fae..ad79c6f94 100644 --- a/hw/unittest/generic_queue/Makefile +++ b/hw/unittest/generic_queue/Makefile @@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs TOP := VX_fifo_queue +PARAMS := -GDATAW=32 -GDEPTH=8 + include ../common.mk \ No newline at end of file