Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #184

Merged
merged 6 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions hw/rtl/VX_define.vh
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@
`define PERF_CTR_BITS 44

`ifndef NDEBUG
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`ifdef SCOPE
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`define UUID_WIDTH 1
Expand Down
32 changes: 25 additions & 7 deletions hw/rtl/core/VX_issue_slice.sv
Original file line number Diff line number Diff line change
Expand Up @@ -91,29 +91,47 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`ifdef SCOPE
`ifdef DBG_SCOPE_ISSUE
`SCOPE_IO_SWITCH (1);
wire decode_fire = decode_if.valid && decode_if.ready;
wire operands_fire = operands_if.valid && operands_if.ready;
`NEG_EDGE (reset_negedge, reset);
`SCOPE_TAP_EX (0, 2, 2, 2, (
`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1
`SCOPE_TAP_EX (0, 2, 4, 3, (
`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 +
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) +
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1
), {
decode_if.valid,
decode_if.ready,
operands_if.valid,
operands_if.ready
}, {
decode_fire,
operands_fire,
writeback_if.valid // ack-free
}, {
decode_if.data.uuid,
decode_if.data.wid,
decode_if.data.tmask,
decode_if.data.PC,
decode_if.data.ex_type,
decode_if.data.op_type,
decode_if.data.wb,
decode_if.data.rd,
decode_if.data.rs1,
decode_if.data.rs2,
decode_if.data.rs3,
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.wb,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data,
operands_if.data.rs1_data[0],
operands_if.data.rs2_data[0],
operands_if.data.rs3_data[0],
writeback_if.data.uuid,
writeback_if.data.wis,
writeback_if.data.tmask,
writeback_if.data.rd,
writeback_if.data.data,
Expand Down
2 changes: 1 addition & 1 deletion hw/rtl/core/VX_lsu_slice.sv
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
wire lsu_mem_rsp_ready;

VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
.INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)),
.CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE),
Expand Down
6 changes: 4 additions & 2 deletions hw/rtl/libs/VX_elastic_buffer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ module VX_elastic_buffer #(
wire [DATAW-1:0] data_out_t;
wire ready_out_t;

wire valid_out_t = ~empty;

wire push = valid_in && ready_in;
wire pop = ~empty && ready_out_t;
wire pop = valid_out_t && ready_out_t;

VX_fifo_queue #(
.DATAW (DATAW),
Expand Down Expand Up @@ -127,7 +129,7 @@ module VX_elastic_buffer #(
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (~empty),
.valid_in (valid_out_t),
.data_in (data_out_t),
.ready_in (ready_out_t),
.valid_out (valid_out),
Expand Down
24 changes: 13 additions & 11 deletions hw/rtl/libs/VX_mem_scheduler.sv
Original file line number Diff line number Diff line change
Expand Up @@ -459,26 +459,28 @@ module VX_mem_scheduler #(

end else begin : g_rsp_full

reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n;
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];

always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr];
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i];
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
wire rsp_wren = mem_rsp_fire_s
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
always @(posedge clk) begin
if (rsp_wren) begin
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
end
end
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
end
end

always @(posedge clk) begin
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
end
if (mem_rsp_valid_s) begin
rsp_store[ibuf_raddr] <= rsp_store_n;
end
end

assign crsp_valid = mem_rsp_valid_s && rsp_complete;
Expand All @@ -488,7 +490,7 @@ module VX_mem_scheduler #(
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = rsp_store_n[i][j];
assign crsp_data[r] = rsp_store_n[j][i];
end

assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
Expand Down
22 changes: 13 additions & 9 deletions hw/rtl/libs/VX_stream_buffer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ module VX_stream_buffer #(
assign valid_out = valid_in;
assign data_out = data_in;

end else if (OUT_REG != 0) begin : g_with_reg
end else if (OUT_REG != 0) begin : g_out_reg

reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
Expand Down Expand Up @@ -84,23 +84,27 @@ module VX_stream_buffer #(
assign valid_out = valid_out_r;
assign data_out = data_out_r;

end else begin : g_no_reg
end else begin : g_no_out_reg

reg [1:0][DATAW-1:0] shift_reg;
reg [1:0] fifo_state;
reg [1:0] fifo_state, fifo_state_n;

wire fire_in = valid_in && ready_in;
wire fire_in = valid_in && ready_in;
wire fire_out = valid_out && ready_out;

always @(*) begin
case ({fire_in, fire_out})
2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
default: fifo_state_n = fifo_state;
endcase
end

always @(posedge clk) begin
if (reset) begin
fifo_state <= 2'b00;
end else begin
case ({fire_in, fire_out})
2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
default: fifo_state <= fifo_state;
endcase
fifo_state <= fifo_state_n;
end
end

Expand Down
Loading