-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathopa_decode.vhd
446 lines (396 loc) · 19.5 KB
/
opa_decode.vhd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
-- opa: Open Processor Architecture
-- Copyright (C) 2014-2016 Wesley W. Terpstra
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program. If not, see <http://www.gnu.org/licenses/>.
--
-- To apply the GPL to my VHDL, please follow these definitions:
-- Program - The entire collection of VHDL in this project and any
-- netlist or floorplan derived from it.
-- System Library - Any macro that translates directly to hardware
-- e.g. registers, IO pins, or memory blocks
--
-- My intent is that if you include OPA into your project, all of the HDL
-- and other design files that go into the same physical chip must also
-- be released under the GPL. If this does not cover your usage, then you
-- must consult me directly to receive the code under a different license.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.opa_pkg.all;
use work.opa_isa_base_pkg.all;
use work.opa_functions_pkg.all;
use work.opa_components_pkg.all;
use work.opa_isa_pkg.all;
entity opa_decode is
generic(
g_isa : t_opa_isa;
g_config : t_opa_config;
g_target : t_opa_target);
port(
clk_i : in std_logic;
rst_n_i : in std_logic;
-- Predicted jumps?
predict_hit_i : in std_logic;
predict_jump_i : in std_logic_vector(f_opa_fetchers(g_config)-1 downto 0);
-- Push a return stack entry
predict_push_o : out std_logic;
predict_ret_o : out std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
-- Fixup PC to new target
predict_fault_o : out std_logic;
predict_return_o : out std_logic;
predict_jump_o : out std_logic_vector(f_opa_fetchers(g_config)-1 downto 0);
predict_source_o : out std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
predict_target_o : out std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
predict_return_i : in std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
-- Instructions delivered from icache
icache_stb_i : in std_logic;
icache_stall_o : out std_logic;
icache_pc_i : in std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
icache_pcn_i : in std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
icache_dat_i : in std_logic_vector(f_opa_fetch_bits(g_isa,g_config)-1 downto 0);
-- Feed data to the renamer
rename_stb_o : out std_logic;
rename_stall_i : in std_logic;
rename_fast_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_slow_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_order_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_setx_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_geta_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_getb_o : out std_logic_vector(f_opa_renamers(g_config)-1 downto 0);
rename_aux_o : out std_logic_vector(f_opa_aux_wide(g_config)-1 downto 0);
rename_archx_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_arch_wide(g_isa)-1 downto 0);
rename_archa_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_arch_wide(g_isa)-1 downto 0);
rename_archb_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_arch_wide(g_isa)-1 downto 0);
-- Accept faults
rename_fault_i : in std_logic;
rename_pc_i : in std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
rename_pcf_i : in std_logic_vector(f_opa_fet_wide(g_config)-1 downto 0);
rename_pcn_i : in std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
-- Give the regfile the information EUs will need for these operations
regfile_stb_o : out std_logic;
regfile_aux_o : out std_logic_vector(f_opa_aux_wide(g_config)-1 downto 0);
regfile_arg_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_arg_wide(g_config)-1 downto 0);
regfile_imm_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_imm_wide(g_isa) -1 downto 0);
regfile_pc_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa));
regfile_pcf_o : out t_opa_matrix(f_opa_renamers(g_config)-1 downto 0, f_opa_fet_wide(g_config)-1 downto 0);
regfile_pcn_o : out std_logic_vector(f_opa_adr_wide(g_config)-1 downto f_opa_op_align(g_isa)));
end opa_decode;
architecture rtl of opa_decode is
constant c_big_endian:boolean := f_opa_big_endian(g_isa);
constant c_op_align : natural := f_opa_op_align(g_isa);
constant c_op_wide : natural := f_opa_op_wide (g_isa);
constant c_imm_wide : natural := f_opa_imm_wide(g_isa);
constant c_arch_wide: natural := f_opa_arch_wide(g_isa);
constant c_fetchers : natural := f_opa_fetchers(g_config);
constant c_renamers : natural := f_opa_renamers(g_config);
constant c_buffers : natural := c_fetchers + 2*c_renamers - 1;
constant c_num_aux : natural := f_opa_num_aux (g_config);
constant c_adr_wide : natural := f_opa_adr_wide(g_config);
constant c_fet_wide : natural := f_opa_fet_wide(g_config);
constant c_buf_wide : natural := f_opa_log2(c_buffers+1); -- [0, c_buffers] inclusive
constant c_aux_wide : natural := f_opa_aux_wide(g_config);
constant c_fetch_align : natural := f_opa_fetch_align(g_isa,g_config);
constant c_min_imm_pc : natural := f_opa_choose(c_imm_wide<c_adr_wide, c_imm_wide, c_adr_wide);
type t_op_array is array(natural range <>) of t_opa_op;
type t_pc_array is array(natural range <>) of std_logic_vector(c_adr_wide-1 downto c_op_align);
type t_pcf_array is array(natural range <>) of std_logic_vector(c_fet_wide-1 downto 0);
function f_flip(x : natural) return natural is
begin
if c_big_endian then
return c_fetchers-1-x;
else
return x;
end if;
end f_flip;
signal s_pc_off : unsigned(c_fet_wide-1 downto 0);
signal s_ops_in : t_op_array(c_fetchers-1 downto 0);
signal s_pc_in : t_pc_array(c_fetchers-1 downto 0);
signal s_immb_in : t_pc_array(c_fetchers-1 downto 0);
signal s_pred_in : t_pc_array(c_fetchers-1 downto 0);
signal s_mask_skip : std_logic_vector(c_fetchers-1 downto 0);
signal s_mask_tail : std_logic_vector(c_fetchers-1 downto 0);
signal s_jump : std_logic_vector(c_fetchers-1 downto 0);
signal s_take : std_logic_vector(c_fetchers-1 downto 0);
signal s_force : std_logic_vector(c_fetchers-1 downto 0);
signal s_push : std_logic_vector(c_fetchers-1 downto 0);
signal s_pop : std_logic_vector(c_fetchers-1 downto 0);
signal s_hit : std_logic_vector(c_fetchers-1 downto 0);
signal s_bad_jump : std_logic_vector(c_fetchers-1 downto 0);
signal s_use_static : std_logic;
signal r_use_static : std_logic := '0';
signal s_static_jumps : std_logic_vector(c_fetchers-1 downto 0);
signal s_static_jump : std_logic_vector(c_fetchers-1 downto 0);
signal s_static_targets: t_opa_matrix(c_fetchers-1 downto 0, c_adr_wide-1 downto c_op_align);
signal s_static_target : std_logic_vector(c_adr_wide-1 downto c_op_align);
signal s_rename_jump : std_logic_vector(c_fetchers-1 downto 0);
signal s_rename_source : std_logic_vector(c_adr_wide-1 downto c_op_align);
signal s_jump_taken : std_logic_vector(c_fetchers-1 downto 0);
signal s_ret_taken : std_logic;
signal s_pcn_taken : std_logic_vector(c_adr_wide-1 downto c_op_align);
signal r_pcn_taken : std_logic_vector(c_adr_wide-1 downto c_op_align);
signal s_jal_pc : std_logic_vector(c_adr_wide-1 downto c_op_align);
signal s_ops : t_op_array (c_buffers-1 downto 0);
signal r_ops : t_op_array (c_buffers-1 downto 0);
signal s_pc : t_pc_array (c_buffers-1 downto 0);
signal r_pc : t_pc_array (c_buffers-1 downto 0);
signal s_pcf : t_pcf_array(c_buffers-1 downto 0);
signal r_pcf : t_pcf_array(c_buffers-1 downto 0);
signal s_stb : std_logic;
signal s_stall : std_logic;
signal s_pcn_reg : std_logic;
signal s_progress : std_logic;
signal s_accept : std_logic;
signal s_ops_sub : unsigned(c_fet_wide-1 downto 0);
signal r_fill : unsigned(c_buf_wide-1 downto 0) := (others => '0');
signal r_aux : unsigned(c_aux_wide-1 downto 0) := (others => '0');
begin
check : process(clk_i) is
begin
if rising_edge(clk_i) then
-- control inputs (safe for when/if)
assert (f_opa_safe(predict_hit_i) = '1') report "decode: predict_hit_i has metavalue" severity failure;
assert (f_opa_safe(predict_jump_i) = '1') report "decode: predict_jump_i has metavalue" severity failure;
assert (f_opa_safe(icache_stb_i) = '1') report "decode: icache_stb_i has metavalue" severity failure;
assert (f_opa_safe(rename_stall_i) = '1') report "decode: rename_stall_i has metavalue" severity failure;
assert (f_opa_safe(rename_fault_i) = '1') report "decode: rename_fault_i has metavalue" severity failure;
-- combinatorial control (safe for when/if)
assert (f_opa_safe(s_stall) = '1') report "decode: s_stall has metavalue" severity failure;
assert (f_opa_safe(s_stb) = '1') report "decode: s_stb has metavalue" severity failure;
assert (f_opa_safe(s_pcn_reg) = '1') report "decode: s_pcn_reg has metavalue" severity failure;
assert (f_opa_safe(s_progress) = '1') report "decode: s_progress has metavalue" severity failure;
assert (f_opa_safe(s_accept) = '1') report "decode: s_accept has metavalue" severity failure;
-- registered control
assert (f_opa_safe(r_use_static) = '1') report "decode: r_use_static has metavalue" severity failure;
assert (f_opa_safe(r_fill) = '1') report "decode: r_fill has metavalue" severity failure;
assert (f_opa_safe(r_aux) = '1') report "decode: r_aux has metavalue" severity failure;
end if;
end process;
-- Decode the flow control information from the instructions
off1p : if c_fetchers > 1 generate
s_pc_off <= unsigned(icache_pc_i(c_fetch_align-1 downto c_op_align));
s_ops_sub <= unsigned(f_opa_1hot_dec(f_opa_reverse(s_jump_taken))) + s_pc_off;
end generate;
off1 : if c_fetchers = 1 generate
s_pc_off <= "0";
s_ops_sub <= "0";
end generate;
s_mask_tail(0) <= '0';
decode : for i in 0 to c_fetchers-1 generate
s_ops_in(i) <= f_opa_isa_decode(g_isa, g_config, icache_dat_i((f_flip(i)+1)*c_op_wide-1 downto f_flip(i)*c_op_wide));
fet1 : if c_fetchers = 1 generate
s_pc_in(i) <= icache_pc_i(c_adr_wide-1 downto c_fetch_align);
end generate;
fet1p : if c_fetchers > 1 generate
s_pc_in(i) <= icache_pc_i(c_adr_wide-1 downto c_fetch_align) & std_logic_vector(to_unsigned(i, c_fet_wide));
end generate;
s_immb_in(i)(c_min_imm_pc-2 downto c_op_align) <= s_ops_in(i).immb(c_min_imm_pc-2 downto c_op_align);
s_immb_in(i)(c_adr_wide-1 downto c_min_imm_pc-1) <= (others => s_ops_in(i).immb(c_min_imm_pc-1));
s_pred_in(i) <= std_logic_vector(unsigned(s_pc_in(i)) + unsigned(s_immb_in(i)));
s_mask_skip(i) <= f_opa_lt(i, s_pc_off); -- Unused ops before loaded PC
tail : if i > 0 generate
s_mask_tail(i) <= s_mask_tail(i-1) or predict_jump_i(i-1); -- Ops following a taken jump
end generate;
s_jump(i) <= s_ops_in(i).jump;
s_take(i) <= s_ops_in(i).take;
s_force(i) <= s_ops_in(i).force;
s_pop(i) <= s_ops_in(i).pop;
s_push(i) <= s_ops_in(i).push;
end generate;
-- Decide if we want to accept the fetch prediction
s_hit <= (others => predict_hit_i);
s_bad_jump <= ((not s_jump and predict_jump_i) or
(s_force and not predict_jump_i) or
(s_take and not s_hit))
and not s_mask_skip and not s_mask_tail;
s_use_static <= f_opa_or(s_bad_jump);
-- What is our prediction?
s_static_jumps<= s_take and not s_mask_skip; -- need to assign valid range before picking
s_static_jump <= f_opa_pick_small(s_static_jumps);
targets : for d in 0 to c_fetchers-1 generate
bits : for b in c_op_align to c_adr_wide-1 generate
s_static_targets(d,b) <= s_pred_in(d)(b);
end generate;
end generate;
s_static_target <= f_opa_product(f_opa_transpose(s_static_targets), s_static_jump);
s_jump_taken <= f_opa_mux(s_use_static, s_static_jump, predict_jump_i);
s_ret_taken <= f_opa_or(s_pop and s_static_jump);
-- pcn MUST be what gets loaded next, b/c instructions compare against it.
-- if issue faults, all this gets blown away, so that doesn't matter
-- if there is no fault, the usual prediction goes through the pipeline
-- if decode faults, then we need to pick whatever the predictor picks!
-- the predictor will always go where we tell it, except for a return.
s_pcn_taken <=
f_opa_mux(s_use_static,
f_opa_mux(s_ret_taken, predict_return_i, s_static_target),
icache_pcn_i);
-- Decode renamer's fault information
s_rename_source(c_adr_wide-1 downto c_fetch_align) <= rename_pc_i(c_adr_wide-1 downto c_fetch_align);
src_fet1p : if c_fetchers > 1 generate
s_rename_source(c_fetch_align-1 downto c_op_align) <= rename_pcf_i;
jumps : for i in 0 to c_fetchers-1 generate
s_rename_jump(i) <= f_opa_eq(unsigned(rename_pc_i(c_fetch_align-1 downto c_op_align)), i);
end generate;
end generate;
src_fet1 : if c_fetchers = 1 generate
s_rename_jump <= "1";
end generate;
-- Feed back information to fetch
predict_fault_o <= (s_use_static and s_accept) or rename_fault_i;
predict_return_o <= s_accept and not rename_fault_i and s_ret_taken;
predict_jump_o <= s_rename_jump when rename_fault_i='1' else s_static_jump;
predict_source_o <= s_rename_source when rename_fault_i='1' else icache_pc_i;
predict_target_o <= rename_pcn_i when rename_fault_i='1' else s_static_target;
-- Do we need to push the PC?
s_jal_pc(c_adr_wide -1 downto c_fetch_align) <= icache_pc_i(c_adr_wide-1 downto c_fetch_align);
subpc : if c_fetchers > 1 generate
s_jal_pc(c_fetch_align-1 downto c_op_align) <= f_opa_1hot_dec(s_jump_taken);
end generate;
predict_push_o <= f_opa_or(s_push and s_jump_taken) and s_accept;
predict_ret_o <= std_logic_vector(1 + unsigned(s_jal_pc));
-- Flow control from fetch and to rename
s_stall <= '1' when r_fill >= 2*c_renamers else '0';
s_stb <= '1' when r_fill >= c_renamers else '0';
s_pcn_reg <= '1' when r_fill = c_renamers else '0';
s_progress <= s_stb and not rename_stall_i;
s_accept <= icache_stb_i and not r_use_static and not s_stall;
-- Select the new buffer fill state
buf1p : if c_fetchers > 1 generate
index : block is
type t_idx_array is array(natural range <>) of unsigned(c_fet_wide-1 downto 0);
signal s_idx_base : unsigned(c_fet_wide-1 downto 0);
signal s_idx : t_idx_array(c_buffers-1 downto 0);
begin
s_idx_base <= s_pc_off - r_fill(s_idx_base'range);
ops : for i in 0 to c_buffers-1 generate
s_idx(i) <= s_idx_base + to_unsigned(i mod c_fetchers, c_fet_wide);
s_ops(i) <= r_ops(i) when i < r_fill else s_ops_in(to_integer(s_idx(i))) when f_opa_safe(s_idx(i))='1' else c_opa_op_undef;
s_pc (i) <= r_pc (i) when i < r_fill else s_pc_in (to_integer(s_idx(i))) when f_opa_safe(s_idx(i))='1' else (others => 'X');
s_pcf(i) <= r_pcf(i) when i < r_fill else icache_pc_i(c_fetch_align-1 downto c_op_align);
end generate;
end block;
end generate;
buf1 : if c_fetchers = 1 generate
ops : for i in 0 to c_buffers-1 generate
s_ops(i) <= r_ops(i) when i < r_fill else s_ops_in(0);
s_pc (i) <= r_pc (i) when i < r_fill else s_pc_in (0);
s_pcf(i) <= "0";
end generate;
end generate;
fill : process(clk_i, rst_n_i) is
begin
if rst_n_i = '0' then
r_use_static <= '0';
r_fill <= (others => '0');
elsif rising_edge(clk_i) then
if rename_fault_i = '1' then
r_use_static <= '1';
r_fill <= (others => '0');
else
-- On a static predicition, we ignore the next valid icache strobe
if (icache_stb_i and not s_stall) = '1' then
if r_use_static = '1' then
r_use_static <= '0';
else
r_use_static <= s_use_static;
end if;
end if;
if s_progress = '1' then
if s_accept = '1' then
r_fill <= (r_fill + c_fetchers) - c_renamers - s_ops_sub;
else
r_fill <= r_fill - c_renamers;
end if;
else
if s_accept = '1' then
r_fill <= (r_fill + c_fetchers) - s_ops_sub;
else
r_fill <= r_fill;
end if;
end if;
end if;
end if;
end process;
aux : process(clk_i, rst_n_i) is
begin
if rst_n_i = '0' then
r_aux <= (others => '0');
elsif rising_edge(clk_i) then
if s_progress = '1' then
if r_aux = c_num_aux-1 then
r_aux <= (others => '0');
else
r_aux <= r_aux+1;
end if;
end if;
end if;
end process;
main : process(clk_i) is
begin
if rising_edge(clk_i) then
if s_progress = '1' then
r_ops(c_buffers-c_renamers-1 downto 0) <= s_ops(c_buffers-1 downto c_renamers);
r_pcf(c_buffers-c_renamers-1 downto 0) <= s_pcf(c_buffers-1 downto c_renamers);
r_pc (c_buffers-c_renamers-1 downto 0) <= s_pc (c_buffers-1 downto c_renamers);
else
r_ops <= s_ops;
r_pcf <= s_pcf;
r_pc <= s_pc;
end if;
end if;
end process;
latch_pcn : process(clk_i) is
begin
if rising_edge(clk_i) then
if s_accept = '1' then
r_pcn_taken <= s_pcn_taken;
end if;
end if;
end process;
icache_stall_o <= s_stall and not rename_fault_i;
rename_stb_o <= s_stb;
rename_aux_o <= std_logic_vector(r_aux);
ops_out : for d in 0 to c_renamers-1 generate
rename_fast_o (d) <= r_ops(d).fast;
rename_slow_o (d) <= not r_ops(d).fast;
rename_order_o(d) <= r_ops(d).order;
rename_setx_o (d) <= r_ops(d).setx;
rename_geta_o (d) <= r_ops(d).geta;
rename_getb_o (d) <= r_ops(d).getb;
bits : for b in 0 to c_arch_wide-1 generate
rename_archx_o(d,b) <= r_ops(d).archx(b);
rename_archa_o(d,b) <= r_ops(d).archa(b);
rename_archb_o(d,b) <= r_ops(d).archb(b);
end generate;
end generate;
regfile_stb_o <= s_stb;
regfile_aux_o <= std_logic_vector(r_aux);
rf_out : for d in 0 to c_renamers-1 generate
arg : for b in 0 to c_arg_wide-1 generate
regfile_arg_o(d,b) <= f_opa_vec_from_arg(r_ops(d).arg)(b);
end generate;
imm : for b in 0 to c_imm_wide-1 generate
regfile_imm_o(d,b) <= r_ops(d).imm(b);
end generate;
pc : for b in c_op_align to c_adr_wide-1 generate
regfile_pc_o(d,b) <= r_pc(d)(b);
end generate;
pcf : for b in 0 to c_fet_wide-1 generate
regfile_pcf_o(d,b) <= r_pcf(d)(b);
end generate;
end generate;
pcn : for b in c_op_align to c_adr_wide-1 generate
regfile_pcn_o(b) <= r_pcn_taken(b) when s_pcn_reg='1' else r_pc(c_renamers)(b);
end generate;
end rtl;