From a8685087106662b95bef9ada2df8486b4b590dc0 Mon Sep 17 00:00:00 2001 From: Guokai Chen Date: Tue, 7 Feb 2023 16:29:39 +0800 Subject: [PATCH] difftest: lightqs runahead and oraclebp support for xs --- config/config.h | 2 +- src/test/csrc/common/branch.cpp | 262 ++++++++++++++++++++++++++++ src/test/csrc/difftest/difftest.cpp | 45 +++++ src/test/csrc/difftest/difftest.h | 1 + src/test/csrc/difftest/refproxy.cpp | 68 ++++++++ src/test/csrc/difftest/refproxy.h | 19 ++ src/test/csrc/verilator/emu.cpp | 21 ++- src/test/csrc/verilator/emu.h | 4 + src/test/vsrc/common/branch.v | 146 ++++++++++++++++ verilator.mk | 4 + 10 files changed, 567 insertions(+), 5 deletions(-) create mode 100644 src/test/csrc/common/branch.cpp create mode 100644 src/test/vsrc/common/branch.v diff --git a/config/config.h b/config/config.h index 354f76547..57a62ea8d 100644 --- a/config/config.h +++ b/config/config.h @@ -33,7 +33,7 @@ // first valid instruction's address, difftest starts from this instruction #ifndef FIRST_INST_ADDRESS -#define FIRST_INST_ADDRESS 0x10000000UL +#define FIRST_INST_ADDRESS 0x80000000UL #endif // sdcard image to be used in simulation diff --git a/src/test/csrc/common/branch.cpp b/src/test/csrc/common/branch.cpp new file mode 100644 index 000000000..bce8a3847 --- /dev/null +++ b/src/test/csrc/common/branch.cpp @@ -0,0 +1,262 @@ +#include "common.h" +#include +#include +#include +#include +#include + +#define RAMSIZE (128 * 1024 * 1024) +#define PRED_WIDTH 16 +#define MAX_BR_NUM 20000000 + +using namespace std; +int reset =1; + +#define BRANCH_TYPE_BR 0 +#define BRANCH_TYPE_JR 1 + +typedef struct branch_record { + uint64_t pc; + uint64_t target; + int taken; + int type; +} BR; + +BR *record; +BR *record_orig; +void* (*ahead_isa_query_br_log)() = NULL; + +inline void output_branch_record(BR rec, int idx) { + printf("br[%d]: pc(0x%lx), target(0x%lx), type(%d), taken(%d)\n", idx, rec.pc, rec.target, rec.type, rec.taken); +} + +inline void print_record() { + int idx = 0; + BR *rec = record; + while (rec->pc != 0) { + output_branch_record(*rec, idx); + idx++; + rec++; + } +} + +string Trim(string& str) +{ + str.erase(0,str.find_first_not_of(" \t\r\n")); + str.erase(str.find_last_not_of(" \t\r\n") + 1); + return str; +} + + +// reads branch records into global arrays +void init_branch_record(const char *branch, const uint64_t rate) { + + #ifndef LIGHTQS + if (branch == NULL) { + printf("Branch trace file not provided, oracle branch should not work!\n"); + return ; + } + // assert(branch != NULL); + record_orig = (BR *)(malloc(MAX_BR_NUM * sizeof(BR))); + // initiate the whole buffer to zero + memset((void *) record_orig, 0, MAX_BR_NUM * sizeof(BR)); + + ifstream fin(branch); + printf("Use %s as the branch golden trace\n",branch); + string line; + int idx = 0; + while (getline(fin, line)) { + istringstream sin(line); //将整行字符串line读入到字符串流istringstream中 + vector fields; //声明一个字符串向量 + string field; + while (getline(sin, field, ',')) { fields.push_back(field); } + string pc = Trim(fields[0]); + string taken = Trim(fields[1]); + string type = Trim(fields[2]); + string target = Trim(fields[3]); + stringstream ss, ss1; + ss << std::hex << pc; + ss >> record_orig[idx].pc; + ss1 << std::hex << target; + ss1 >> record_orig[idx].target; + record_orig[idx].taken = taken[0] - '0'; + record_orig[idx].type = type[0] - '0'; + idx++; + } + #endif // LIGHTQS + #ifdef LIGHTQS + record = (BR *)ahead_isa_query_br_log(); + #endif // LIGHTQS +#ifndef LIGHTQS + int miss_rate = rate; + // default set to zero + if (miss_rate < 0) miss_rate = 0; + int num_reverted = 0; + if (miss_rate > 0) { + srand( (unsigned)time( NULL ) ); + for (int i = 0; i < idx; i++) { + // revert as a rate of miss_rate + if (rand() % 100 < miss_rate) { + record[i].taken = ~record[i].taken; + num_reverted++; + } + } + } + printf("Branch miss rate is set to %d%%, totally %d predictions are reverted\n", miss_rate, num_reverted); +#endif // LIGHTQS + reset = 0; +} + +void free_branch_record(){ + // free(record); + // do not free array from so +} + +// int main() { +// init_branch_record(""); +// return 0; +// } + +// TODO: read branch record arrays using idx, and give pc for validation +// read rNum records from array starting at rIdx +// the management of rIdx is left to BPU +// we don't need rNum because we always want to read records in order +extern "C" void branch_prediction_helper( + uint64_t rIdx, + uint64_t *target1, uint64_t *target2, + uint64_t *target3, uint64_t *target4, + uint64_t *target5, uint64_t *target6, + uint64_t *target7, uint64_t *target8, + uint64_t *target9, uint64_t *target10, + uint64_t *target11, uint64_t *target12, + uint64_t *target13, uint64_t *target14, + uint64_t *target15, uint64_t *target16, + uint64_t *pc1, uint64_t *pc2, + uint64_t *pc3, uint64_t *pc4, + uint64_t *pc5, uint64_t *pc6, + uint64_t *pc7, uint64_t *pc8, + uint64_t *pc9, uint64_t *pc10, + uint64_t *pc11, uint64_t *pc12, + uint64_t *pc13, uint64_t *pc14, + uint64_t *pc15, uint64_t *pc16, + uint8_t *taken1, uint8_t *taken2, + uint8_t *taken3, uint8_t *taken4, + uint8_t *taken5, uint8_t *taken6, + uint8_t *taken7, uint8_t *taken8, + uint8_t *taken9, uint8_t *taken10, + uint8_t *taken11, uint8_t *taken12, + uint8_t *taken13, uint8_t *taken14, + uint8_t *taken15, uint8_t *taken16, + uint8_t *type1, uint8_t *type2, + uint8_t *type3, uint8_t *type4, + uint8_t *type5, uint8_t *type6, + uint8_t *type7, uint8_t *type8, + uint8_t *type9, uint8_t *type10, + uint8_t *type11, uint8_t *type12, + uint8_t *type13, uint8_t *type14, + uint8_t *type15, uint8_t *type16, + uint64_t redirectIdx, + uint64_t *redirectpc1, uint64_t *redirectpc2, uint64_t *redirectpc3, uint64_t *redirectpc4, uint64_t *redirectpc5, uint64_t *redirectpc6, uint64_t *redirectpc7, uint64_t *redirectpc8, + uint64_t *redirectpc9, uint64_t *redirectpc10, uint64_t *redirectpc11, uint64_t *redirectpc12, uint64_t *redirectpc13, uint64_t *redirectpc14, uint64_t *redirectpc15, uint64_t *redirectpc16 + ) { + static uint64_t last = 0, pre_last = 0; + if (rIdx - pre_last == 30) { + // consecutive miss + printf("Miss rIdx = %llu last = %llu\n", rIdx, last); + // exit(-1); + } + pre_last = last; + last = rIdx; + + if (reset) return; + + if (rIdx >= MAX_BR_NUM) { + printf("ERROR: branch record idx = %ld out of bound!\n", rIdx); + return; + } + if (redirectIdx >= MAX_BR_NUM) { + printf("Error: branch redirect record idx = %ld out of bound!\n", redirectIdx); + return; + } + //printf("-- rIdx :%ld pc: %lx taken:%d\n",rIdx,record[rIdx].pc,record[rIdx].taken); + *taken1 = record[rIdx].taken; + *taken2 = record[rIdx + 1].taken; + *taken3 = record[rIdx + 2].taken; + *taken4 = record[rIdx + 3].taken; + *taken5 = record[rIdx + 4].taken; + *taken6 = record[rIdx + 5].taken; + *taken7 = record[rIdx + 6].taken; + *taken8 = record[rIdx + 7].taken; + *taken9 = record[rIdx + 8].taken; + *taken10 = record[rIdx + 9].taken; + *taken11 = record[rIdx + 10].taken; + *taken12 = record[rIdx + 11].taken; + *taken13 = record[rIdx + 12].taken; + *taken14 = record[rIdx + 13].taken; + *taken15 = record[rIdx + 14].taken; + *taken16 = record[rIdx + 15].taken; + *pc1 = record[rIdx].pc; + *pc2 = record[rIdx + 1].pc; + *pc3 = record[rIdx + 2].pc; + *pc4 = record[rIdx + 3].pc; + *pc5 = record[rIdx + 4].pc; + *pc6 = record[rIdx + 5].pc; + *pc7 = record[rIdx + 6].pc; + *pc8 = record[rIdx + 7].pc; + *pc9 = record[rIdx + 8].pc; + *pc10 = record[rIdx + 9].pc; + *pc11 = record[rIdx + 10].pc; + *pc12 = record[rIdx + 11].pc; + *pc13 = record[rIdx + 12].pc; + *pc14 = record[rIdx + 13].pc; + *pc15 = record[rIdx + 14].pc; + *pc16 = record[rIdx + 15].pc; + *target1 = record[rIdx].target; + *target2 = record[rIdx + 1].target; + *target3 = record[rIdx + 2].target; + *target4 = record[rIdx + 3].target; + *target5 = record[rIdx + 4].target; + *target6 = record[rIdx + 5].target; + *target7 = record[rIdx + 6].target; + *target8 = record[rIdx + 7].target; + *target9 = record[rIdx + 8].target; + *target10 = record[rIdx + 9].target; + *target11 = record[rIdx + 10].target; + *target12 = record[rIdx + 11].target; + *target13 = record[rIdx + 12].target; + *target14 = record[rIdx + 13].target; + *target15 = record[rIdx + 14].target; + *target16 = record[rIdx + 15].target; + *type1 = record[rIdx].type; + *type2 = record[rIdx + 1].type; + *type3 = record[rIdx + 2].type; + *type4 = record[rIdx + 3].type; + *type5 = record[rIdx + 4].type; + *type6 = record[rIdx + 5].type; + *type7 = record[rIdx + 6].type; + *type8 = record[rIdx + 7].type; + *type9 = record[rIdx + 8].type; + *type10 = record[rIdx + 9].type; + *type11 = record[rIdx + 10].type; + *type12 = record[rIdx + 11].type; + *type13 = record[rIdx + 12].type; + *type14 = record[rIdx + 13].type; + *type15 = record[rIdx + 14].type; + *type16 = record[rIdx + 15].type; + *redirectpc1 = record[redirectIdx].pc; + *redirectpc2 = record[redirectIdx + 1].pc; + *redirectpc3 = record[redirectIdx + 2].pc; + *redirectpc4 = record[redirectIdx + 3].pc; + *redirectpc5 = record[redirectIdx + 4].pc; + *redirectpc6 = record[redirectIdx + 5].pc; + *redirectpc7 = record[redirectIdx + 6].pc; + *redirectpc8 = record[redirectIdx + 7].pc; + *redirectpc9 = record[redirectIdx + 8].pc; + *redirectpc10 = record[redirectIdx + 9].pc; + *redirectpc11 = record[redirectIdx + 10].pc; + *redirectpc12 = record[redirectIdx + 11].pc; + *redirectpc13 = record[redirectIdx + 12].pc; + *redirectpc14 = record[redirectIdx + 13].pc; + *redirectpc15 = record[redirectIdx + 14].pc; + *redirectpc16 = record[redirectIdx + 15].pc; +} diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index 287fc596c..4cef1cf39 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -161,9 +161,11 @@ int Difftest::step() { do_instr_commit(i); dut.commit[i].valid = 0; num_commit++; + ++total_commit; // TODO: let do_instr_commit return number of instructions in this uop if (dut.commit[i].fused) { num_commit++; + ++total_commit; } } } @@ -205,6 +207,9 @@ int Difftest::step() { void Difftest::do_interrupt() { state->record_abnormal_inst(dut.event.exceptionPC, dut.event.exceptionInst, RET_INT, dut.event.interrupt); proxy->raise_intr(dut.event.interrupt | (1ULL << 63)); + #ifdef LIGHTQS + proxy->ahead_raise_intr(dut.event.interrupt | (1ULL << 64), total_commit); + #endif // LIGHTQS progress = true; } @@ -213,12 +218,24 @@ void Difftest::do_exception() { if (dut.event.exception == 12 || dut.event.exception == 13 || dut.event.exception == 15) { // printf("exception cause: %d\n", dut.event.exception); struct ExecutionGuide guide; + #ifdef LIGHTQS + struct ExecutionGuide ahead_guide; + #endif // LIGHTQS guide.force_raise_exception = true; guide.exception_num = dut.event.exception; guide.mtval = dut.csr.mtval; guide.stval = dut.csr.stval; guide.force_set_jump_target = false; proxy->guided_exec(&guide); + + #ifdef LIGHTQS + ahead_guide.force_raise_exception = true; + ahead_guide.exception_num = dut.event.exception; + ahead_guide.mtval = dut.csr.mtval; + ahead_guide.stval = dut.csr.stval; + ahead_guide.force_set_jump_target = false; + proxy->ahead_guided_exec(&ahead_guide, total_commit); + #endif // LIGHTQS } else { #ifdef DEBUG_MODE_DIFF if(DEBUG_MEM_REGION(true, dut.event.exceptionPC)){ @@ -227,6 +244,9 @@ void Difftest::do_exception() { } #endif proxy->exec(1); + #ifdef LIGHTQS + proxy->ahead_exec(1); + #endif // LIGHTQS } progress = true; } @@ -262,6 +282,9 @@ void Difftest::do_instr_commit(int i) { struct SyncState sync; sync.lrscValid = dut.lrsc.success; proxy->uarchstatus_cpy((uint64_t*)&sync, DUT_TO_REF); // sync lr/sc microarchitectural regs + #ifdef LIGHTQS + proxy->ahead_uarchstatus_cpy((uint64_t*)&sync, DUT_TO_REF, total_commit); + #endif // LIGHTQS // clear SC instruction valid bit dut.lrsc.valid = 0; } @@ -279,6 +302,12 @@ void Difftest::do_instr_commit(int i) { ref_regs_ptr[dut.commit[i].wdest] = get_commit_data(i); // printf("Debug Mode? %x is ls? %x\n", DEBUG_MEM_REGION(dut.commit[i].valid, dut.commit[i].pc), IS_LOAD_STORE(dut.commit[i].inst)); // printf("skip %x %x %x %x %x\n", dut.commit[i].pc, dut.commit[i].inst, get_commit_data(i), dut.commit[i].wpdest, dut.commit[i].wdest); + #ifdef LIGHTQS_DEBUG + printf("total commit %d, skip pc == %lx\n", total_commit, dut.commit[i].pc); + #endif // LIGHTQS_DEBUG + #ifdef LIGHTQS + proxy->ahead_regcpy(ref_regs_ptr, DIFFTEST_TO_REF, true, total_commit); + #endif // LIGHTQS } proxy->regcpy(ref_regs_ptr, DIFFTEST_TO_REF); return; @@ -286,9 +315,16 @@ void Difftest::do_instr_commit(int i) { // single step exec proxy->exec(1); + #ifdef LIGHTQS + proxy->ahead_exec(1); + #endif // LIGHTQS + // printf("intentionally ahead exec\n"); // when there's a fused instruction, let proxy execute one more instruction. if (dut.commit[i].fused) { proxy->exec(1); + #ifdef LIGHTQS + proxy->ahead_exec(1); + #endif // LIGHTQS } // Handle load instruction carefully for SMP @@ -384,6 +420,15 @@ void Difftest::do_first_instr_commit() { // If this is main sim thread, simulator has its own initial config // If this process is checkpoint wakeuped, simulator's config has already been updated, // do not override it. + + + // run ahead part + #ifdef LIGHTQS + proxy->ahead_load_flash_bin(get_flash_path(), get_flash_size()); + proxy->ahead_memcpy(0x80000000, get_img_start(), get_img_size(), DIFFTEST_TO_REF); + proxy->ahead_regcpy(dut_regs_ptr, DIFFTEST_TO_REF, false, 0); + proxy->ahead_runahead_init(); + #endif // LIGHTQS } } diff --git a/src/test/csrc/difftest/difftest.h b/src/test/csrc/difftest/difftest.h index c1e77cceb..73e2a1b58 100644 --- a/src/test/csrc/difftest/difftest.h +++ b/src/test/csrc/difftest/difftest.h @@ -310,6 +310,7 @@ class Difftest { // Initialize difftest environments Difftest(int coreid); DIFF_PROXY *proxy = NULL; + uint64_t total_commit = 0; uint32_t num_commit = 0; // # of commits if made progress bool has_commit = false; // Trigger a difftest checking procdure diff --git a/src/test/csrc/difftest/refproxy.cpp b/src/test/csrc/difftest/refproxy.cpp index c30af752b..eea912c2e 100644 --- a/src/test/csrc/difftest/refproxy.cpp +++ b/src/test/csrc/difftest/refproxy.cpp @@ -20,6 +20,8 @@ uint8_t* goldenMem = NULL; const char *difftest_ref_so = NULL; +const char *ahead_ref_so = NULL; +extern void* (*ahead_isa_query_br_log)(); #define check_and_assert(func) \ do { \ @@ -122,6 +124,72 @@ NemuProxy::NemuProxy(int coreid, size_t ram_size = 0) { free((void *)difftest_ref_so); difftest_ref_so = nullptr; } + #ifdef LIGHTQS + const char *ahead_home = getenv(AHEAD_ENV_VARIABLE); + if (ahead_home == NULL) { + printf("FATAL: $(" AHEAD_ENV_VARIABLE ") is not defined!\n"); + printf("Runahead cannot load, exiting\n"); + exit(1); + } + const char *ahead_so = "/" NEMU_SO_FILENAME; + char *ahead_buf = (char *)malloc(strlen(ahead_home) + strlen(ahead_so) + 1); + strcpy(ahead_buf, ahead_home); + strcat(ahead_buf, ahead_so); + ahead_ref_so = ahead_buf; + void *ahead_handle = dlopen(ahead_ref_so, RTLD_LAZY | RTLD_DEEPBIND); + if(!ahead_handle){ + printf("%s\n", dlerror()); + assert(0); + } + + this->ahead_memcpy = (void (*)(paddr_t, void *, size_t, bool))dlsym(ahead_handle, "difftest_memcpy"); + check_and_assert(this->ahead_memcpy); + + ahead_regcpy = (void (*)(void *, bool, bool, uint64_t))dlsym(ahead_handle, "difftest_regcpy"); + check_and_assert(ahead_regcpy); + + ahead_csrcpy = (void (*)(void *, bool))dlsym(ahead_handle, "difftest_csrcpy"); + check_and_assert(ahead_csrcpy); + + ahead_uarchstatus_cpy = (void (*)(void *, bool, uint64_t))dlsym(ahead_handle, "difftest_uarchstatus_cpy"); + check_and_assert(ahead_uarchstatus_cpy); + + ahead_exec = (void (*)(uint64_t))dlsym(ahead_handle, "difftest_exec"); + check_and_assert(ahead_exec); + + ahead_guided_exec = (vaddr_t (*)(void *, uint64_t))dlsym(ahead_handle, "difftest_guided_exec"); + check_and_assert(ahead_guided_exec); + + ahead_update_config = (vaddr_t (*)(void *))dlsym(ahead_handle, "update_dynamic_config"); + check_and_assert(ahead_update_config); + + ahead_store_commit = (int (*)(uint64_t*, uint64_t*, uint8_t*))dlsym(ahead_handle, "difftest_store_commit"); + check_and_assert(ahead_store_commit); + + ahead_raise_intr = (void (*)(uint64_t, uint64_t))dlsym(ahead_handle, "difftest_raise_intr"); + check_and_assert(ahead_raise_intr); + + ahead_isa_reg_display = (void (*)(void))dlsym(ahead_handle, "isa_reg_display"); + check_and_assert(ahead_isa_reg_display); + + ahead_load_flash_bin = (void (*)(void *ahead_flash_bin, size_t size))dlsym(ahead_handle, "difftest_load_flash"); + check_and_assert(ahead_load_flash_bin); + + auto ahead_nemu_difftest_set_mhartid = (void (*)(int))dlsym(ahead_handle, "difftest_set_mhartid"); + if (NUM_CORES > 1) { + check_and_assert(ahead_nemu_difftest_set_mhartid); + ahead_nemu_difftest_set_mhartid(coreid); + } + + auto ahead_nemu_init = (void (*)(void))dlsym(ahead_handle, "difftest_init"); + check_and_assert(ahead_nemu_init); + + ahead_runahead_init = (void (*)(void))dlsym(ahead_handle, "difftest_runahead_init"); + + ahead_isa_query_br_log = (void *(*)(void))dlsym(ahead_handle, "difftest_query_br_log"); + + ahead_nemu_init(); + #endif // LIGHTQS } void ref_misc_put_gmaddr(uint8_t* ptr) { diff --git a/src/test/csrc/difftest/refproxy.h b/src/test/csrc/difftest/refproxy.h index 0aa9e80a5..1386269da 100644 --- a/src/test/csrc/difftest/refproxy.h +++ b/src/test/csrc/difftest/refproxy.h @@ -39,10 +39,29 @@ class RefProxy { void (*debug_mem_sync)(paddr_t addr, void *bytes, size_t size) = NULL; void (*load_flash_bin)(void *flash_bin, size_t size) = NULL; void (*set_ramsize)(size_t size) = NULL; + +#ifdef LIGHTQS + // run ahead ones + void (*ahead_memcpy)(paddr_t nemu_addr, void *dut_buf, size_t n, bool direction) = NULL; + void (*ahead_regcpy)(void *dut, bool direction, bool restore, uint64_t restore_count) = NULL; + void (*ahead_csrcpy)(void *dut, bool direction) = NULL; + void (*ahead_uarchstatus_cpy)(void *dut, bool direction, uint64_t restore_count) = NULL; + int (*ahead_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask) = NULL; + void (*ahead_exec)(uint64_t n) = NULL; + vaddr_t (*ahead_guided_exec)(void *disambiguate_para, uint64_t restore_count) = NULL; + vaddr_t (*ahead_update_config)(void *config) = NULL; + void (*ahead_raise_intr)(uint64_t no, uint64_t restore_count) = NULL; + void (*ahead_isa_reg_display)() = NULL; + void (*ahead_query)(void *result_buffer, uint64_t type) = NULL; + void (*ahead_debug_mem_sync)(paddr_t addr, void *bytes, size_t size) = NULL; + void (*ahead_load_flash_bin)(void *flash_bin, size_t size) = NULL; + void (*ahead_runahead_init)(void) = NULL; +#endif // LIGHTQS }; extern const char *difftest_ref_so; #define NEMU_ENV_VARIABLE "NEMU_HOME" +#define AHEAD_ENV_VARIABLE "AHEAD_HOME" #define NEMU_SO_FILENAME "build/riscv64-nemu-interpreter-so" class NemuProxy : public RefProxy { public: diff --git a/src/test/csrc/verilator/emu.cpp b/src/test/csrc/verilator/emu.cpp index 20b1417f0..1597c2039 100644 --- a/src/test/csrc/verilator/emu.cpp +++ b/src/test/csrc/verilator/emu.cpp @@ -74,6 +74,8 @@ static inline void print_help(const char *file) { printf(" --enable-fork enable folking child processes to debug\n"); printf(" --no-diff disable differential testing\n"); printf(" --diff=PATH set the path of REF for differential testing\n"); + printf(" -r --branch-record PATH load branch record from PATH\n"); + printf(" --miss-rate RATE revert branch direction in trace with given miss rate\n"); printf(" --enable-jtag enable remote bitbang server\n"); printf(" -h, --help print program help info\n"); printf("\n"); @@ -96,7 +98,9 @@ inline EmuArgs parse_args(int argc, const char *argv[]) { { "ram-size", 1, NULL, 0 }, { "sim-run-ahead", 0, NULL, 0 }, { "dump-db", 0, NULL, 0 }, - { "ref-trace", 0, NULL, 0 }, +#ifdef DEBUG_TILELINK + { "dump-tl", 0, NULL, 0 }, +#endif { "seed", 1, NULL, 's' }, { "max-cycles", 1, NULL, 'C' }, { "max-instr", 1, NULL, 'I' }, @@ -109,13 +113,14 @@ inline EmuArgs parse_args(int argc, const char *argv[]) { { "log-begin", 1, NULL, 'b' }, { "log-end", 1, NULL, 'e' }, { "flash", 1, NULL, 'F' }, + { "branch-record", 1, NULL, 'r' }, { "help", 0, NULL, 'h' }, { 0, 0, NULL, 0 } }; int o; while ( (o = getopt_long(argc, const_cast(argv), - "-s:C:I:T:W:hi:m:b:e:F:", long_options, &long_index)) != -1) { + "-s:C:I:T:W:hi:m:b:e:F:r:", long_options, &long_index)) != -1) { switch (o) { case 0: switch (long_index) { @@ -170,9 +175,10 @@ inline EmuArgs parse_args(int argc, const char *argv[]) { case 'W': args.warmup_instr = atoll_strict(optarg, "warmup-instr"); break; case 'D': args.stat_cycles = atoll_strict(optarg, "stat-cycles"); break; case 'i': args.image = optarg; break; - case 'b': args.log_begin = atoll_strict(optarg, "log-begin"); break; - case 'e': args.log_end = atoll_strict(optarg, "log-end"); break; + case 'b': args.log_begin = atoll(optarg); break; + case 'e': args.log_end = atoll(optarg); break; case 'F': args.flash_bin = optarg; break; + case 'r': args.branch_record = optarg; break; } } @@ -267,6 +273,9 @@ Emulator::~Emulator() { } flash_finish(); + extern void free_branch_record(); + free_branch_record(); + #ifdef VM_SAVABLE if (args.enable_snapshot && trapCode != STATE_GOODTRAP && trapCode != STATE_LIMIT_EXCEEDED) { printf("Saving snapshots to file system. Please wait.\n"); @@ -347,6 +356,10 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) { runahead_init(); } + // init branch record for oracle bp + extern void init_branch_record(const char *br, const uint64_t rate); + init_branch_record(args.branch_record, args.branch_miss_rate); + #ifdef DEBUG_REFILL difftest[0]->save_track_instr(args.track_instr); #endif diff --git a/src/test/csrc/verilator/emu.h b/src/test/csrc/verilator/emu.h index eacdc28d6..51db1091e 100644 --- a/src/test/csrc/verilator/emu.h +++ b/src/test/csrc/verilator/emu.h @@ -49,6 +49,8 @@ struct EmuArgs { const char *wave_path; const char *ram_size; const char *flash_bin; + const char *branch_record; + uint64_t branch_miss_rate; bool enable_waveform; bool enable_snapshot; bool force_dump_result; @@ -75,6 +77,8 @@ struct EmuArgs { ram_size = NULL; image = NULL; flash_bin = NULL; + branch_record = NULL; + branch_miss_rate = 0; enable_waveform = false; enable_snapshot = true; force_dump_result = false; diff --git a/src/test/vsrc/common/branch.v b/src/test/vsrc/common/branch.v new file mode 100644 index 000000000..3dbc46fe8 --- /dev/null +++ b/src/test/vsrc/common/branch.v @@ -0,0 +1,146 @@ +import "DPI-C" function void branch_prediction_helper +( + input longint rIdx, + output longint target1, + output longint target2, + output longint target3, + output longint target4, + output longint target5, + output longint target6, + output longint target7, + output longint target8, + output longint target9, + output longint target10, + output longint target11, + output longint target12, + output longint target13, + output longint target14, + output longint target15, + output longint target16, + output longint pc1, + output longint pc2, + output longint pc3, + output longint pc4, + output longint pc5, + output longint pc6, + output longint pc7, + output longint pc8, + output longint pc9, + output longint pc10, + output longint pc11, + output longint pc12, + output longint pc13, + output longint pc14, + output longint pc15, + output longint pc16, + output bit taken1, + output bit taken2, + output bit taken3, + output bit taken4, + output bit taken5, + output bit taken6, + output bit taken7, + output bit taken8, + output bit taken9, + output bit taken10, + output bit taken11, + output bit taken12, + output bit taken13, + output bit taken14, + output bit taken15, + output bit taken16, + output bit type1, + output bit type2, + output bit type3, + output bit type4, + output bit type5, + output bit type6, + output bit type7, + output bit type8, + output bit type9, + output bit type10, + output bit type11, + output bit type12, + output bit type13, + output bit type14, + output bit type15, + output bit type16, + + input longint redirectIdx, + output longint redirectpc1, + output longint redirectpc2, + output longint redirectpc3, + output longint redirectpc4, + output longint redirectpc5, + output longint redirectpc6, + output longint redirectpc7, + output longint redirectpc8, + output longint redirectpc9, + output longint redirectpc10, + output longint redirectpc11, + output longint redirectpc12, + output longint redirectpc13, + output longint redirectpc14, + output longint redirectpc15, + output longint redirectpc16 +); + +module BranchPredictionHelper( + input [63:0] rIdx, + output [64*16-1:0] pc, + output [15:0] taken, + output [64*16-1:0] target, + output [15:0] brtype, + input [63:0] redirectIdx, + output [64*16-1:0] redirectpc +); + + always @(*) begin + branch_prediction_helper( + rIdx, + target[63:0], target[64*2-1:64*1], + target[64*3-1:64*2], target[64*4-1:64*3], + target[64*5-1:64*4], target[64*6-1:64*5], + target[64*7-1:64*6], target[64*8-1:64*7], + target[64*9-1:64*8], target[64*10-1:64*9], + target[64*11-1:64*10], target[64*12-1:64*11], + target[64*13-1:64*12], target[64*14-1:64*13], + target[64*15-1:64*14], target[64*16-1:64*15], + pc[63:0], pc[64*2-1:64*1], + pc[64*3-1:64*2], pc[64*4-1:64*3], + pc[64*5-1:64*4], pc[64*6-1:64*5], + pc[64*7-1:64*6], pc[64*8-1:64*7], + pc[64*9-1:64*8], pc[64*10-1:64*9], + pc[64*11-1:64*10], pc[64*12-1:64*11], + pc[64*13-1:64*12], pc[64*14-1:64*13], + pc[64*15-1:64*14], pc[64*16-1:64*15], + taken[0], taken[1], + taken[2], taken[3], + taken[4], taken[5], + taken[6], taken[7], + taken[8], taken[9], + taken[10], taken[11], + taken[12], taken[13], + taken[14], taken[15], + brtype[0], brtype[1], + brtype[2], brtype[3], + brtype[4], brtype[5], + brtype[6], brtype[7], + brtype[8], brtype[9], + brtype[10], brtype[11], + brtype[12], brtype[13], + brtype[14], brtype[15], + redirectIdx, + redirectpc[63:0], redirectpc[64*2-1:64*1], + redirectpc[64*3-1:64*2], redirectpc[64*4-1:64*3], + redirectpc[64*5-1:64*4], redirectpc[64*6-1:64*5], + redirectpc[64*7-1:64*6], redirectpc[64*8-1:64*7], + redirectpc[64*9-1:64*8], redirectpc[64*10-1:64*9], + redirectpc[64*11-1:64*10], redirectpc[64*12-1:64*11], + redirectpc[64*13-1:64*12], redirectpc[64*14-1:64*13], + redirectpc[64*15-1:64*14], redirectpc[64*16-1:64*15] + ); + end + +endmodule + diff --git a/verilator.mk b/verilator.mk index 9f9fb7f4e..a42febd31 100644 --- a/verilator.mk +++ b/verilator.mk @@ -37,6 +37,10 @@ else CONSTANTIN_SRC = endif +ifeq ($(WITH_LIGHTQS), 1) +EMU_CXXFLAGS +=-DLIGHTQS +endif + EMU_LDFLAGS += -lpthread -lSDL2 -ldl -lz -lsqlite3 EMU_CXX_EXTRA_FLAGS ?=