Skip to content

Commit

Permalink
Faster PC Histogram (#644)
Browse files Browse the repository at this point in the history
* macro for declaring dpi functions in profiler

* vanilla_core_trace.def

* adds pc histogram collector

* adds PC_HIST argument to make

* comment note

* refactor histogram structure

* fixes instance name setting

* moves histogram code out of profiler

* fast-functional pc histogram

* cleanup

* scoreboard tracker module

* parameter for classifying long ops

* cleanup in profiler

* simv-pc-histogram

* detailed stall breakdown

* remove pc hist from trace

* bugfix for clearing scoreboard

* counts icache miss in id bubble tracking

* track stall_icache_store

* prioritize stall_all stalls

* revert

* revert

* remove unused file

* revert

* revert

* cleanup

* remove unused ifdef

* revert

* revert

* revert

* revert

* cleanup

* fixup

* stall_ifetch_wait reports pc in mem stage

* scoreboard tracker pkg

* remove debug

* [spmd] stall_remote_ld_wb test

* [testbenches] verilator workaround

* [vanilla_scoreboard_tracker] enum to struct

* [vanilla_core_pc_histogram] omit wb stalls

* [scoreboard_tracker] fixup

* restores wb stalls

* [vanilla_pc_histogram] removes macros, writes out instead instead

* [scoreboard_tracker] fix

* [pc_histogram] style fix

* [pc_histogram] BSG_ABSTRACT_MODULE

* [pc_histogram] bug fix
  • Loading branch information
mrutt92 authored Apr 29, 2022
1 parent 8c1066c commit 841b543
Show file tree
Hide file tree
Showing 17 changed files with 1,170 additions and 322 deletions.
4 changes: 4 additions & 0 deletions machines/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
simv-pc-histogram
simv-pc-histogram.daidir
simv-debug
simv-debug.daidir
simv-profile
Expand All @@ -6,11 +8,13 @@ simv-coverage
simv-coverage.daidir
vc_hdrs.h
stack.info.*
*/build-pc-histogram
*/build-profile
*/build-debug
*/build-coverage
*/build
*/csrc
*/csrc-pc-histogram
*/csrc-debug
*/csrc-profile
*/csrc-coverage
Expand Down
11 changes: 11 additions & 0 deletions machines/Makefile.vcs
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,14 @@ ASCII_TO_ROM_PY = $(BASEJUMP_STL_DIR)/bsg_mem/bsg_ascii_to_rom.py
$(VCS_INCLUDES) $(VCS_DEFINES) $(CSOURCES) $(VCS_SOURCES) $*/bsg_tag_boot_rom.v \
-l $*/build-coverage.log -Mdir=$*/csrc-coverage


# build simv-pc-histogram
# without debug option for faster simulation.
%/simv-pc-histogram : %/Makefile.machine.include $(VSOURCES) $(CSOURCES) $(VINCLUDES) $(VHEADERS)
$(call set_vcs_machine_variables,$<)
python $(POD_TRACE_GEN_PY) $(BSG_MACHINE_PODS_X) $(BSG_MACHINE_PODS_Y) > $*/pod_trace.tr
python $(ASCII_TO_ROM_PY) $*/pod_trace.tr bsg_tag_boot_rom > $*/bsg_tag_boot_rom.v
$(eval VCS_FLAGS += +define+BSG_ENABLE_PC_HISTOGRAM)
$(VCS) $(VCS_FLAGS) $(VCS_CFLAGS) -o $@ \
$(VCS_INCLUDES) $(VCS_DEFINES) $(CSOURCES) $(VCS_SOURCES) $*/bsg_tag_boot_rom.v \
-l $*/build.log -Mdir=$*/csrc
10 changes: 10 additions & 0 deletions machines/Makefile.verilator
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,13 @@ ASCII_TO_ROM_PY = $(BASEJUMP_STL_DIR)/bsg_mem/bsg_ascii_to_rom.py
%/simsc-coverage : %/Makefile.machine.include $(VSOURCES) $(CSOURCES) $(VINCLUDES) $(VHEADERS)
$(info Coverage is not currently supported for Verilator)

# build simsc
# without debug option for faster simulation.
%/simsc-pc-histogram : %/Makefile.machine.include $(VSOURCES) $(CSOURCES) $(VINCLUDES) $(VHEADERS)
$(call set_verilator_machine_variables,$<)
python $(POD_TRACE_GEN_PY) $(BSG_MACHINE_PODS_X) $(BSG_MACHINE_PODS_Y) > $*/pod_trace.tr
python $(ASCII_TO_ROM_PY) $*/pod_trace.tr bsg_tag_boot_rom > $*/bsg_tag_boot_rom.v
$(eval VERILATOR_FLAGS += +define+BSG_ENABLE_PC_HISTOGRAM)
$(VERILATOR) $(VERILATOR_FLAGS) $(VERILATOR_CFLAGS) -Mdir $(@D)/obj_dir -o $(@F) \
$(VERILATOR_INCLUDES) $(VERILATOR_DEFINES) $(CSOURCES) $(VERILATOR_SOURCES) $*/bsg_tag_boot_rom.v
ln -s $(abspath $(@D))/obj_dir/simsc $@
6 changes: 6 additions & 0 deletions machines/sim_filelist.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ VINCLUDES += $(BSG_MANYCORE_DIR)/testbenches/common/v
VHEADERS += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_manycore_mem_cfg_pkg.v
VHEADERS += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_manycore_network_cfg_pkg.v
VHEADERS += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_manycore_profile_pkg.v
VHEADERS += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_exe_bubble_classifier_pkg.v
VHEADERS += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_scoreboard_tracker_pkg.v
VHEADERS += $(BASEJUMP_STL_DIR)/bsg_test/bsg_dramsim3_pkg.v

VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_nonsynth_manycore_axi_mem.v
Expand Down Expand Up @@ -59,10 +61,14 @@ VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/remote_load_trace.v
CSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/remote_load_profiler.cpp
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/instr_trace.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_core_profiler.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_exe_bubble_classifier.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_scoreboard_tracker.v
CSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_core_profiler.cpp
CSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_core_pc_histogram.cpp
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vcache_profiler.v
CSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vcache_profiler.cpp
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vcache_non_blocking_profiler.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/vanilla_core_pc_histogram.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/infinite_mem_profiler.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_nonsynth_manycore_tag_master.v
VSOURCES += $(BSG_MANYCORE_DIR)/testbenches/common/v/bsg_nonsynth_manycore_io_complex.v
Expand Down
5 changes: 5 additions & 0 deletions software/mk/Makefile.verilog
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
MAX_CYCLES ?= 100000000
WAVE ?= 0
TRACE ?= 0
PC_HIST ?= 0
COVERAGE ?= 0
PROG_NAME ?= main

Expand Down Expand Up @@ -47,6 +48,7 @@ else ifeq ($(BSG_PLATFORM),verilator)
endif

ifeq ($(WAVE),0)
ifeq ($(PC_HIST),0)
ifeq ($(TRACE),0)
ifeq ($(COVERAGE),0)
BSG_SIM_EXE = $(BSG_MACHINE_PATH)/$(BSG_SIM_BASE)
Expand All @@ -56,6 +58,9 @@ endif
else
BSG_SIM_EXE = $(BSG_MACHINE_PATH)/$(BSG_SIM_BASE)-profile
endif
else
BSG_SIM_EXE = $(BSG_MACHINE_PATH)/$(BSG_SIM_BASE)-pc-histogram
endif
else
BSG_SIM_EXE = $(BSG_MACHINE_PATH)/$(BSG_SIM_BASE)-debug
endif
Expand Down
15 changes: 15 additions & 0 deletions software/spmd/stall_remote_ld_wb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
bsg_tiles_X= 1
bsg_tiles_Y= 1

all: main.run

include ../Makefile.include

RISCV_LINK_OPTS = -march=rv32imaf -nostdlib -nostartfiles

main.riscv: $(LINK_SCRIPT) main.o
$(RISCV_LINK) main.o -o $@ $(RISCV_LINK_OPTS)


include ../../mk/Makefile.tail_rules

67 changes: 67 additions & 0 deletions software/spmd/stall_remote_ld_wb/main.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "bsg_manycore_arch.h"
#include "bsg_manycore_asm.h"

//.data
// float_dmem_arr: .space 64 // array of 16 floats
//.section .dram, "aw"
// float_dram_arr: .space 64

.section .dram, "aw"
dram_word: .word -1

.text
bsg_asm_init_regfile

// test1
test1:
la x1, dram_word
addi x2, x0, -1
// prime
lw x3, 0(x1)
lw x4, 64(x1)
bne x2, x3, fail
// do many remote loads
// first block
lw x4, 0(x1)
lw x5, 4(x1)
lw x6, 8(x1)
lw x7, 12(x1)
lw x8, 16(x1)
lw x9, 20(x1)
lw x10, 24(x1)
lw x11, 28(x1)
lw x12, 32(x1)
lw x13, 36(x1)
lw x14, 40(x1)
lw x15, 44(x1)
lw x16, 48(x1)
lw x17, 52(x1)
lw x18, 56(x1)
lw x19, 60(x1)
// second block
lw x20, 64(x1)
lw x21, 68(x1)
lw x22, 72(x1)
lw x23, 76(x1)
lw x24, 80(x1)
lw x25, 84(x1)
lw x26, 88(x1)
lw x27, 92(x1)
lw x28, 96(x1)
lw x29, 100(x1)
lw x30, 104(x1)
lw x31, 108(x1)
div x1, x3, x2
// lw x4, 112(x1)
// lw x4, 116(x1)
// lw x4, 120(x1)
// lw x4, 124(x1)
bne x3, x2, fail
pass:
bsg_asm_finish(IO_X_INDEX, 0)
pass_loop:
beq x0, x0, pass_loop
fail:
bsg_asm_fail(IO_X_INDEX, 0)
fail_loop:
beq x0, x0, fail_loop
20 changes: 20 additions & 0 deletions testbenches/common/v/bsg_nonsynth_manycore_testbench.v
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ module bsg_nonsynth_manycore_testbench

, parameter enable_vcore_pc_coverage_p=0

, parameter enable_vanilla_core_pc_histogram_p=0

, parameter cache_bank_addr_width_lp = `BSG_SAFE_CLOG2(bsg_dram_size_p/(2*num_tiles_x_p*num_vcache_rows_p)*4) // byte addr
, parameter link_sif_width_lp =
`bsg_manycore_link_sif_width(addr_width_p,data_width_p,x_cord_width_p,y_cord_width_p)
Expand Down Expand Up @@ -913,6 +915,24 @@ if (enable_vanilla_core_trace_p) begin
end
`endif

//////////////////
// PC Histogram //
//////////////////
`ifndef VERILATOR_WORKAROUND_DISABLE_PC_HISTOGRAM
if (enable_vanilla_core_pc_histogram_p) begin
bind vanilla_core vanilla_core_pc_histogram
#(.x_cord_width_p(x_cord_width_p)
,.y_cord_width_p(y_cord_width_p)
,.data_width_p(data_width_p)
,.icache_tag_width_p(icache_tag_width_p)
,.icache_entries_p(icache_entries_p)
,.origin_x_cord_p(`BSG_MACHINE_ORIGIN_X_CORD)
,.origin_y_cord_p(`BSG_MACHINE_ORIGIN_Y_CORD)
)
vcore_pc_hist
(.*);
`endif
end // if (enable_vanilla_core_pc_histogram_p)

endmodule

Expand Down
16 changes: 9 additions & 7 deletions testbenches/common/v/profiler.vh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ import "DPI-C" context function \
import "DPI-C" context function \
void profiler_name``_unlock();

`define DECLARE_PROFILER_DPI_FUNCTIONS(profiler_name) \
`DECLARE_PROFILER_INIT_FUNC(profiler_name) \
`DECLARE_PROFILER_EXIT_FUNC(profiler_name) \
`DECLARE_PROFILER_IS_INIT_FUNC(profiler_name) \
`DECLARE_PROFILER_IS_EXIT_FUNC(profiler_name) \
`DECLARE_PROFILER_TRACE_FD_FUNC(profiler_name) \
`DECLARE_PROFILER_LOCK_FUNC(profiler_name) \
`DECLARE_PROFILER_UNLOCK_FUNC(profiler_name)

`define DEFINE_PROFILER_INITIAL_BLOCK(profiler_name, trace_file_name, trace_file_header) \
int init_trace_fd; \
Expand All @@ -55,13 +63,7 @@ end
end

`define DEFINE_PROFILER(profiler_name, trace_file_name, trace_file_header) \
`DECLARE_PROFILER_INIT_FUNC(profiler_name) \
`DECLARE_PROFILER_EXIT_FUNC(profiler_name) \
`DECLARE_PROFILER_IS_INIT_FUNC(profiler_name) \
`DECLARE_PROFILER_IS_EXIT_FUNC(profiler_name) \
`DECLARE_PROFILER_TRACE_FD_FUNC(profiler_name) \
`DECLARE_PROFILER_LOCK_FUNC(profiler_name) \
`DECLARE_PROFILER_UNLOCK_FUNC(profiler_name) \
`DECLARE_PROFILER_DPI_FUNCTIONS(profiler_name) \
`DEFINE_PROFILER_INITIAL_BLOCK(profiler_name, trace_file_name, trace_file_header) \
`DEFINE_PROFILER_FINAL_BLOCK(profiler_name)

Expand Down
3 changes: 3 additions & 0 deletions testbenches/common/v/spmd_testbench.v
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ module spmd_testbench
`endif
`ifdef BSG_ENABLE_VANILLA_CORE_TRACE
,.enable_vanilla_core_trace_p(1)
`endif
`ifdef BSG_ENABLE_PC_HISTOGRAM
,.enable_vanilla_core_pc_histogram_p(1)
`endif
// DR: If the instance name is changed, the bind statements in the
// file where this module is defined, and header strings in the
Expand Down
Loading

0 comments on commit 841b543

Please sign in to comment.