Skip to content

Commit

Permalink
Merge pull request #19240 from hrydgard/ir-arena
Browse files Browse the repository at this point in the history
Store IR instructions in a bump-allocated vector instead of loose allocations
  • Loading branch information
hrydgard authored Jun 7, 2024
2 parents 3bbcde3 + a4c4fb4 commit 55fecce
Show file tree
Hide file tree
Showing 15 changed files with 183 additions and 100 deletions.
14 changes: 9 additions & 5 deletions Core/MIPS/ARM64/Arm64IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ static void NoBlockExits() {
_assert_msg_(false, "Never exited block, invalid IR?");
}

bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
bool Arm64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {
if (GetSpaceLeft() < 0x800)
return false;

IRBlock *block = irBlockCache->GetBlock(block_num);
BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32));

u32 startPC = block->GetOriginalStart();
Expand All @@ -92,12 +93,13 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
compilingBlockNum_ = block_num;
lastConstPC_ = 0;

regs_.Start(block);
regs_.Start(irBlockCache, block_num);

std::vector<const u8 *> addresses;
addresses.reserve(block->GetNumInstructions());
const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
for (int i = 0; i < block->GetNumInstructions(); ++i) {
const IRInst &inst = block->GetInstructions()[i];
const IRInst &inst = instructions[i];
regs_.SetIRIndex(i);
addresses.push_back(GetCodePtr());

Expand Down Expand Up @@ -156,10 +158,11 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
addressesLookup[addresses[i]] = i;

INFO_LOG(JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len);
const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
for (const u8 *p = blockStart; p < GetCodePointer(); ) {
auto it = addressesLookup.find(p);
if (it != addressesLookup.end()) {
const IRInst &inst = block->GetInstructions()[it->second];
const IRInst &inst = instructions[it->second];

char temp[512];
DisassembleIR(temp, sizeof(temp), inst);
Expand Down Expand Up @@ -319,7 +322,8 @@ void Arm64JitBackend::ClearAllBlocks() {
EraseAllLinks(-1);
}

void Arm64JitBackend::InvalidateBlock(IRBlock *block, int block_num) {
void Arm64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {
IRBlock *block = irBlockCache->GetBlock(block_num);
int offset = block->GetTargetOffset();
u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;

Expand Down
4 changes: 2 additions & 2 deletions Core/MIPS/ARM64/Arm64IRJit.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;

void GenerateFixedCode(MIPSState *mipsState) override;
bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
void ClearAllBlocks() override;
void InvalidateBlock(IRBlock *block, int block_num) override;
void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override;

void UpdateFCR31(MIPSState *mipsState) override;

Expand Down
2 changes: 2 additions & 0 deletions Core/MIPS/IR/IRInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ static const IRMeta irMeta[] = {
const IRMeta *metaIndex[256];

void InitIR() {
if (metaIndex[0])
return;
for (size_t i = 0; i < ARRAY_SIZE(irMeta); i++) {
metaIndex[(int)irMeta[i].op] = &irMeta[i];
}
Expand Down
119 changes: 88 additions & 31 deletions Core/MIPS/IR/IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ namespace MIPSComp {

IRJit::IRJit(MIPSState *mipsState) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState) {
// u32 size = 128 * 1024;
// blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline");
InitIR();

jo.optimizeForInterpreter = true;
Expand Down Expand Up @@ -91,7 +90,7 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) {
std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
for (int block_num : numbers) {
auto block = blocks_.GetBlock(block_num);
int cookie = block->GetTargetOffset() < 0 ? block_num : block->GetTargetOffset();
int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
block->Destroy(cookie);
}
}
Expand All @@ -103,13 +102,13 @@ void IRJit::Compile(u32 em_address) {
// Look to see if we've preloaded this block.
int block_num = blocks_.FindPreloadBlock(em_address);
if (block_num != -1) {
IRBlock *b = blocks_.GetBlock(block_num);
IRBlock *block = blocks_.GetBlock(block_num);
// Okay, let's link and finalize the block now.
int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset();
b->Finalize(cookie);
if (b->IsValid()) {
int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
block->Finalize(cookie);
if (block->IsValid()) {
// Success, we're done.
FinalizeTargetBlock(b, block_num);
FinalizeTargetBlock(&blocks_, block_num);
return;
}
}
Expand Down Expand Up @@ -139,27 +138,25 @@ bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32
return preload;
}

int block_num = blocks_.AllocateBlock(em_address);
int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions);
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
WARN_LOG(JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size());
// Out of block numbers. Caller will handle.
return false;
}

IRBlock *b = blocks_.GetBlock(block_num);
b->SetInstructions(instructions);
b->SetOriginalAddrSize(em_address, mipsBytes);
if (preload) {
// Hash, then only update page stats, don't link yet.
// TODO: Should we always hash? Then we can reuse blocks.
b->UpdateHash();
}
if (!CompileTargetBlock(b, block_num, preload))
if (!CompileTargetBlock(&blocks_, block_num, preload))
return false;
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num, preload);
if (!preload)
FinalizeTargetBlock(b, block_num);

FinalizeTargetBlock(&blocks_, block_num);
return true;
}

Expand Down Expand Up @@ -257,20 +254,21 @@ void IRJit::RunLoopUntil(u64 globalticks) {
u32 inst = Memory::ReadUnchecked_U32(mips->pc);
u32 opcode = inst & 0xFF000000;
if (opcode == MIPS_EMUHACK_OPCODE) {
IRBlock *block = blocks_.GetBlockUnchecked(inst & 0xFFFFFF);

u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
#ifdef IR_PROFILING
{
TimeSpan span;
mips->pc = IRInterpret(mips, block->GetInstructions());
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += span.ElapsedNanos();
}
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset));
TimeSpan span;
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
int64_t elapsedNanos = span.ElapsedNanos();
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += elapsedNanos;
#else
mips->pc = IRInterpret(mips, block->GetInstructions());
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
#endif
// Note: this will "jump to zero" on a badly constructed block missing exits.
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
int blockNum = blocks_.GetBlockNumFromOffset(offset);
IRBlock *block = blocks_.GetBlockUnchecked(blockNum);
Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP);
break;
}
Expand Down Expand Up @@ -300,11 +298,68 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {

void IRBlockCache::Clear() {
for (int i = 0; i < (int)blocks_.size(); ++i) {
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
blocks_[i].Destroy(cookie);
}
blocks_.clear();
byPage_.clear();
arena_.clear();
arena_.shrink_to_fit();
}

IRBlockCache::IRBlockCache() {
// For whatever reason, this makes things go slower?? Probably just a CPU cache alignment fluke.
// arena_.reserve(1024 * 1024 * 2);
}

int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &inst) {
// We have 24 bits to represent offsets with.
const u32 MAX_ARENA_SIZE = 0x1000000 - 1;
int offset = (int)arena_.size();
if (offset >= MAX_ARENA_SIZE) {
WARN_LOG(JIT, "Filled JIT arena, restarting");
return -1;
}
for (int i = 0; i < inst.size(); i++) {
arena_.push_back(inst[i]);
}
blocks_.push_back(IRBlock(emAddr, origSize, offset, (u16)inst.size()));
return (int)blocks_.size() - 1;
}

int IRBlockCache::GetBlockNumFromOffset(int offset) const {
// Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search.
int low = 0;
int high = (int)blocks_.size() - 1;
int found = -1;
while (low <= high) {
int mid = low + (high - low) / 2;
const int blockOffset = blocks_[mid].GetInstructionOffset();
if (blockOffset == offset) {
found = mid;
break;
}
if (blockOffset < offset) {
low = mid + 1;
} else {
high = mid - 1;
}
}

#ifndef _DEBUG
// Then, in debug builds, cross check the result.
return found;
#else
// TODO: Optimize if we need to call this often.
for (int i = 0; i < (int)blocks_.size(); i++) {
if (blocks_[i].GetInstructionOffset() == offset) {
_dbg_assert_(i == found);
return i;
}
}
#endif
_dbg_assert_(found == -1);
return -1;
}

std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 length) {
Expand All @@ -331,7 +386,7 @@ std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 leng

void IRBlockCache::FinalizeBlock(int i, bool preload) {
if (!preload) {
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
blocks_[i].Finalize(cookie);
}

Expand Down Expand Up @@ -372,16 +427,18 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) {
int IRBlockCache::FindByCookie(int cookie) {
if (blocks_.empty())
return -1;

// TODO: Maybe a flag to determine target offset mode?
if (blocks_[0].GetTargetOffset() < 0)
return cookie;
return GetBlockNumFromOffset(cookie);

// TODO: Now that we are using offsets in pure IR mode too, we can probably unify
// the two paradigms. Or actually no, we still need two offsets..
for (int i = 0; i < GetNumBlocks(); ++i) {
int offset = blocks_[i].GetTargetOffset();
if (offset == cookie)
return i;
}

return -1;
}

Expand All @@ -391,7 +448,7 @@ std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {

for (int number = 0; number < (int)blocks_.size(); ++number) {
IRBlock &b = blocks_[number];
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
result[number] = number;
} else {
Expand All @@ -412,7 +469,7 @@ void IRBlockCache::RestoreSavedEmuHackOps(const std::vector<u32> &saved) {
IRBlock &b = blocks_[number];
// Only if we restored it, write it back.
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
b.Finalize(cookie);
}
}
Expand All @@ -434,8 +491,9 @@ JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const {
}

debugInfo.irDisasm.reserve(ir.GetNumInstructions());
const IRInst *instructions = GetBlockInstructionPtr(ir);
for (int i = 0; i < ir.GetNumInstructions(); i++) {
IRInst inst = ir.GetInstructions()[i];
IRInst inst = instructions[i];
char buffer[256];
DisassembleIR(buffer, sizeof(buffer), inst);
debugInfo.irDisasm.push_back(buffer);
Expand All @@ -448,10 +506,9 @@ void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const {
double maxBloat = 0.0;
double minBloat = 1000000000.0;
for (const auto &b : blocks_) {
double codeSize = (double)b.GetNumInstructions() * sizeof(IRInst);
double codeSize = (double)b.GetNumInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst);
if (codeSize == 0)
continue;

u32 origAddr, mipsBytes;
b.GetRange(origAddr, mipsBytes);
double origSize = (double)mipsBytes;
Expand Down
Loading

0 comments on commit 55fecce

Please sign in to comment.