Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Jan 11, 2025
1 parent 083cf04 commit 347889c
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 105 deletions.
6 changes: 3 additions & 3 deletions hw/rtl/VX_platform.vh
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@

`ifdef QUARTUS
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
Expand All @@ -168,7 +168,7 @@
`define STRING string
`elsif VIVADO
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM (* ram_style = "block" *)
`define USE_FAST_BRAM (* ram_style = "distributed" *)
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
Expand All @@ -179,7 +179,7 @@
`define STRING
`else
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM
`define USE_FAST_BRAM
`define NO_RW_RAM_CHECK
Expand Down
17 changes: 8 additions & 9 deletions hw/rtl/cache/VX_cache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,9 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam MEM_ARB_SEL_BITS = `CLOG2(`CDIV(NUM_BANKS, MEM_PORTS));
localparam MEM_ARB_SEL_WIDTH = `UP(MEM_ARB_SEL_BITS);

localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1);

localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
localparam REQ_XBAR_BUF = (NUM_REQS > 2) ? 2 : 0;
localparam CORE_RSP_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);

`ifdef PERF_ENABLE
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
Expand All @@ -133,7 +132,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.NUM_BANKS (NUM_BANKS),
.UUID_WIDTH(UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // request xbar latency
) flush_unit (
.clk (clk),
.reset (reset),
Expand Down Expand Up @@ -387,8 +386,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1)
.CORE_OUT_REG (CORE_RSP_BUF_ENABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
.MEM_OUT_REG (MEM_REQ_BUF_ENABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
) bank (
.clk (clk),
.reset (reset),
Expand Down Expand Up @@ -481,7 +480,7 @@ module VX_cache import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
.SIZE (CORE_RSP_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
) core_rsp_buf (
.clk (clk),
Expand Down Expand Up @@ -578,7 +577,7 @@ module VX_cache import VX_gpu_pkg::*; #(

VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
.clk (clk),
Expand Down
2 changes: 1 addition & 1 deletion hw/rtl/cache/VX_cache_top.sv
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
end

VX_cache #(
VX_cache_wrap #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
Expand Down
3 changes: 2 additions & 1 deletion hw/syn/altera/dut/unittest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE)
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE)
RTL_INCLUDE = -I..
3 changes: 2 additions & 1 deletion hw/syn/altera/opae/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ DEVICE_FAMILY ?= arria10

PREFIX ?= build$(XLEN)
TARGET ?= fpga
NUM_CORES ?= 1

SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae

Expand Down Expand Up @@ -44,6 +43,7 @@ ifeq ($(DEVICE_FAMILY), arria10)
CONFIGS += -DALTERA_A10
endif

ifdef NUM_CORES
# cluster configuration
CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2
Expand All @@ -53,6 +53,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
endif

# include sources
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
Expand Down
12 changes: 8 additions & 4 deletions hw/syn/xilinx/README
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,18 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope
# analyze build report
vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary

# resuming build for routing
# resuming builds
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.synth" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.opt_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.place_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.phys_opt_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 &

# running test
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n1024"
FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n1024"

# build report logs
<build_dir>/bin/vortex_afu.xclbin.info
Expand Down
9 changes: 7 additions & 2 deletions hw/syn/xilinx/dut/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,15 @@ else
endif

clean:
ifndef RESUME
rm -rf project_1
rm -rf .Xil
rm -f *.rpt
rm -f vivado*.log
rm -f vivado*.jou
rm -f *.log
rm -f *.jou
rm -f *.dcp
else
@echo "RESUME is defined, skipping clean."
endif

.PHONY: all gen-sources build clean
197 changes: 117 additions & 80 deletions hw/syn/xilinx/dut/project.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# Start time
set start_time [clock seconds]

if { $::argc != 4 } {
puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n"
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file>\n"
Expand Down Expand Up @@ -46,95 +43,135 @@ if {[info exists ::env(MAX_JOBS)]} {
set num_jobs 0
}

# create fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
set argv [list $ip_dir $device_part]
set argc 2
source ${script_dir}/xilinx_ip_gen.tcl
proc run_setup {} {
global project_name
global top_module device_part vcs_file xdc_file
global script_dir source_dir
global num_jobs
global argv argc ;# Using global system variables: argv and argc

# create fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
set argv [list $ip_dir $device_part]
set argc 2
source ${script_dir}/xilinx_ip_gen.tcl
}

source "${script_dir}/parse_vcs_list.tcl"
set vlist [parse_vcs_list "${vcs_file}"]

set vsources_list [lindex $vlist 0]
set vincludes_list [lindex $vlist 1]
set vdefines_list [lindex $vlist 2]

#puts $vsources_list
#puts $vincludes_list
#puts $vdefines_list
# Create project
create_project $project_name $project_name -force -part $device_part

# Add constrains file
read_xdc $xdc_file

# Add the design sources
add_files -norecurse -verbose $vsources_list

# process defines
set_property verilog_define ${vdefines_list} [current_fileset]

# add fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci
add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci
add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci
}

# Synthesis
set_property top $top_module [current_fileset]
set_property \
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
-objects [get_runs synth_1]

# register compilation hooks
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]

update_compile_order -fileset sources_1
}

source "${script_dir}/parse_vcs_list.tcl"
set vlist [parse_vcs_list "${vcs_file}"]

set vsources_list [lindex $vlist 0]
set vincludes_list [lindex $vlist 1]
set vdefines_list [lindex $vlist 2]

#puts $vsources_list
#puts $vincludes_list
#puts $vdefines_list

# Create project
create_project $project_name $project_name -force -part $device_part
proc run_synthesis {} {
global num_jobs

if {$num_jobs != 0} {
launch_runs synth_1 -verbose -jobs $num_jobs
} else {
launch_runs synth_1 -verbose
}
wait_on_run synth_1
open_run synth_1
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages
write_checkpoint -force post_synth.dcp
}

# Add constrains file
read_xdc $xdc_file
proc run_implementation {} {
global num_jobs

if {$num_jobs != 0} {
launch_runs impl_1 -verbose -jobs $num_jobs
} else {
launch_runs impl_1 -verbose
}
wait_on_run impl_1
open_run impl_1
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages
write_checkpoint -force post_impl.dcp
}

# Add the design sources
add_files -norecurse -verbose $vsources_list
proc run_report {} {
# Generate the synthesis report
report_place_status -file place.rpt
report_route_status -file route.rpt
report_timing_summary -file timing.rpt

# process defines
set_property verilog_define ${vdefines_list} [current_fileset]
# Generate timing report
report_timing -nworst 100 -delay_type max -sort_by group -file timing.rpt

# add fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci
add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci
add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci
# Generate power and drc reports
report_power -file power.rpt
report_drc -file drc.rpt
}

update_compile_order -fileset sources_1

# Synthesis
set_property top $top_module [current_fileset]
###############################################################################

set_property \
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
-objects [get_runs synth_1]

# register compilation hooks
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]
# Start time
set start_time [clock seconds]

if {$num_jobs != 0} {
launch_runs synth_1 -verbose -jobs $num_jobs
} else {
launch_runs synth_1 -verbose
}
wait_on_run synth_1
open_run synth_1
write_checkpoint -force post_synth.dcp
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages

# Implementation
if {$num_jobs != 0} {
launch_runs impl_1 -verbose -jobs $num_jobs
set checkpoint_synth "post_synth.dcp"
set checkpoint_impl "post_impl.dcp"

if { [file exists $checkpoint_impl] } {
puts "Resuming from post-implementation checkpoint: $checkpoint_impl"
open_checkpoint $checkpoint_impl
run_report
} elseif { [file exists $checkpoint_synth] } {
puts "Resuming from post-synthesis checkpoint: $checkpoint_synth"
open_checkpoint $checkpoint_synth
run_implementation
run_report
} else {
launch_runs impl_1 -verbose
# Execute full pipeline
run_setup
run_synthesis
run_implementation
run_report
}
wait_on_run impl_1
open_run impl_1
write_checkpoint -force post_impl.dcp
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages

# Generate the synthesis report
report_place_status -file place.rpt
report_route_status -file route.rpt
report_timing_summary -file timing.rpt

# Generate timing report
report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt

# Generate power and drc reports
report_power -file power.rpt
report_drc -file drc.rpt

# End time and calculation
set elapsed_time [expr {[clock seconds] - $start_time}]
Expand Down
2 changes: 1 addition & 1 deletion hw/syn/xilinx/sandbox/project.tcl.in
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ if { [file exists post_impl.dcp] } {
run_implementation
run_report
} else {
# execute full pipeline
# Execute full pipeline
run_setup
run_synthesis
run_implementation
Expand Down
Loading

0 comments on commit 347889c

Please sign in to comment.