Skip to content

Commit

Permalink
PPU Analyzer: Analyze whole segment 0 when patches are applied
Browse files Browse the repository at this point in the history
Improve greedy instruction search.
  • Loading branch information
elad335 committed Aug 25, 2023
1 parent f579a52 commit 2f8a67c
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 49 deletions.
2 changes: 2 additions & 0 deletions rpcs3/Emu/Cell/Modules/cellPad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ void pad_info::save(utils::serial& ar)
USING_SERIALIZATION_VERSION(sys_io);

ar(max_connect, port_setting);

sys_io_serialize(ar);
}

extern void send_sys_io_connect_event(u32 index, u32 state);
Expand Down
124 changes: 97 additions & 27 deletions rpcs3/Emu/Cell/PPUAnalyser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1390,9 +1390,9 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
block.second = _ptr.addr() - block.first;
break;
}
else if (type == ppu_itype::TW || type == ppu_itype::TWI || type == ppu_itype::TD || type == ppu_itype::TDI)
else if (type & ppu_itype::trap)
{
if (op.opcode != ppu_instructions::TRAP())
if (op.bo != 31)
{
add_block(_ptr.addr());
}
Expand Down Expand Up @@ -1618,6 +1618,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
end = 0;
}

u32 per_instruction_bytes = 0;

for (auto&& [_, func] : as_rvalue(fmap))
{
if (func.attr & ppu_attr::no_size && entry)
Expand All @@ -1636,6 +1638,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
block.attr = ppu_attr::no_size;
}

per_instruction_bytes += utils::sub_saturate<u32>(lim, func.addr);
continue;
}

Expand Down Expand Up @@ -1716,11 +1719,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
u32 exp = start;
u32 lim = end;

// Start with full scan (disabled for PRX for now)
if (entry)
{
block_queue.emplace_back(exp, lim);
}
// Start with full scan
block_queue.emplace_back(exp, lim);

// Add entries from patches (on per-instruction basis)
for (u32 addr : applied)
Expand Down Expand Up @@ -1754,14 +1754,17 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
{
u32 i_pos = exp;

u32 block_edges[16];
u32 edge_count = 0;

bool is_good = true;
bool is_fallback = true;

for (; i_pos < lim; i_pos += 4)
{
const u32 opc = get_ref<u32>(i_pos);
const ppu_opcode_t op{get_ref<u32>(i_pos)};

switch (auto type = s_ppu_itype.decode(opc))
switch (auto type = s_ppu_itype.decode(op.opcode))
{
case ppu_itype::UNK:
case ppu_itype::ECIWX:
Expand All @@ -1771,10 +1774,20 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
is_good = false;
break;
}
case ppu_itype::TD:
case ppu_itype::TDI:
case ppu_itype::TW:
case ppu_itype::TWI:
{
if (op.ra == 1u || op.ra == 13u || op.ra == 2u)
{
// Non-user registers, checking them against a constant value makes no sense
is_good = false;
break;
}

[[fallthrough]];
}
case ppu_itype::TD:
case ppu_itype::TW:
case ppu_itype::B:
case ppu_itype::BC:
{
Expand All @@ -1785,14 +1798,14 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b

if (type == ppu_itype::B || type == ppu_itype::BC)
{
if (entry == 0 && ppu_opcode_t{opc}.aa)
if (entry == 0 && op.aa)
{
// Ignore absolute branches in PIC (PRX)
is_good = false;
break;
}

const u32 target = (opc & 2 ? 0 : i_pos) + (type == ppu_itype::B ? +ppu_opcode_t{opc}.bt24 : +ppu_opcode_t{opc}.bt14);
const u32 target = (op.aa ? 0 : i_pos) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);

if (target < segs[0].addr || target >= segs[0].addr + segs[0].size)
{
Expand All @@ -1801,9 +1814,43 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
break;
}

const ppu_opcode_t test_op{get_ref<u32>(target)};
const auto type0 = s_ppu_itype.decode(test_op.opcode);

if (type0 == ppu_itype::UNK)
{
is_good = false;
break;
}

// Test another instruction just in case (testing more is unlikely to improve results by much)
if (!(type0 & ppu_itype::branch))
{
if (target + 4 >= segs[0].addr + segs[0].size)
{
is_good = false;
break;
}

const auto type1 = s_ppu_itype.decode(get_ref<u32>(target + 4));

if (type1 == ppu_itype::UNK)
{
is_good = false;
break;
}
}
else if (u32 target0 = (test_op.aa ? 0 : target) + (type == ppu_itype::B ? +test_op.bt24 : +test_op.bt14);
target0 < segs[0].addr || target0 >= segs[0].addr + segs[0].size)
{
// Sanity check
is_good = false;
break;
}

if (target != i_pos && !fmap.contains(target))
{
if (block_set.count(target) == 0)
if (block_set.count(target) == 0 && std::count(block_edges, block_edges + edge_count, target) == 0)
{
ppu_log.trace("Block target found: 0x%x (i_pos=0x%x)", target, i_pos);
block_queue.emplace_back(target, 0);
Expand All @@ -1818,27 +1865,38 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
case ppu_itype::BCLR:
case ppu_itype::SC:
{
if (type == ppu_itype::SC && opc != ppu_instructions::SC(0))
if (type == ppu_itype::SC && op.opcode != ppu_instructions::SC(0))
{
// Strict garbage filter
is_good = false;
break;
}

if (type == ppu_itype::BCCTR && opc & 0xe000)
if (type == ppu_itype::BCCTR && op.opcode & 0xe000)
{
// Garbage filter
is_good = false;
break;
}

if (type == ppu_itype::BCLR && opc & 0xe000)
if (type == ppu_itype::BCLR && op.opcode & 0xe000)
{
// Garbage filter
is_good = false;
break;
}

if ((type & ppu_itype::branch && op.lk) || type & ppu_itype::trap || type == ppu_itype::BC)
{
// if farther instructions are valid: register all blocks
// Otherwise, register none (all or nothing)
if (edge_count < std::size(block_edges))
{
block_edges[edge_count++] = i_pos + 4;
continue;
}
}

// Good block terminator found, add single block
break;
}
Expand Down Expand Up @@ -1869,17 +1927,23 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b

if (is_good)
{
auto& block = fmap[exp];

if (!block.addr)
for (u32 it = 0, prev_addr = exp; it <= edge_count; it++)
{
block.addr = exp;
block.size = i_pos - exp;
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
const u32 block_end = it < edge_count ? block_edges[it] : i_pos;
const u32 block_begin = std::exchange(prev_addr, block_end);

auto& block = fmap[block_begin];

if (get_limit(exp) == end)
if (!block.addr)
{
block.attr += ppu_attr::no_size;
block.addr = block_begin;
block.size = block_end - block_begin;
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);

if (get_limit(block_begin) == end)
{
block.attr += ppu_attr::no_size;
}
}
}
}
Expand All @@ -1902,9 +1966,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
// Convert map to vector (destructive)
for (auto&& [_, block] : as_rvalue(std::move(fmap)))
{
if (block.attr & ppu_attr::no_size && block.size > 4 && entry)
if (block.attr & ppu_attr::no_size && block.size > 4)
{
// Disabled for PRX for now
ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size);

for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4)
Expand All @@ -1916,12 +1979,19 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
i.attr = ppu_attr::no_size;
}

per_instruction_bytes += block.size;
continue;
}

funcs.emplace_back(std::move(block));
}

if (per_instruction_bytes)
{
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div<u32>(funcs.size(), 128);
(error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4);
}

ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
return true;
}
Expand Down
15 changes: 11 additions & 4 deletions rpcs3/Emu/Cell/PPUAnalyser.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ struct ppu_pattern_matrix
struct ppu_itype
{
static constexpr struct branch_tag{} branch{}; // Branch Instructions
static constexpr struct trap_tag{} trap{}; // Branch Instructions

enum type
{
Expand Down Expand Up @@ -425,8 +426,6 @@ struct ppu_itype
VUPKLSB,
VUPKLSH,
VXOR,
TDI,
TWI,
MULLI,
SUBFIC,
CMPLI,
Expand Down Expand Up @@ -461,7 +460,6 @@ struct ppu_itype
RLDCL,
RLDCR,
CMP,
TW,
LVSL,
LVEBX,
SUBFC,
Expand All @@ -488,7 +486,6 @@ struct ppu_itype
LWZUX,
CNTLZD,
ANDC,
TD,
LVEWX,
MULHD,
MULHW,
Expand Down Expand Up @@ -784,6 +781,11 @@ struct ppu_itype
BC,
BCLR,
BCCTR, // branch_tag last

TD, // trap_tag first
TW,
TDI,
TWI, // trap_tag last
};

// Enable address-of operator for ppu_decoder<>
Expand All @@ -796,6 +798,11 @@ struct ppu_itype
{
return value >= B && value <= BCCTR;
}

friend constexpr bool operator &(type value, trap_tag)
{
return value >= TD && value <= TWI;
}
};

struct ppu_iname
Expand Down
Loading

0 comments on commit 2f8a67c

Please sign in to comment.