From 84067b7d97c3b04826701d82192c516dcc299a3b Mon Sep 17 00:00:00 2001 From: rafradek Date: Sun, 9 Jun 2024 00:46:52 +0200 Subject: [PATCH] More fixes for 64 bit --- AMBuilder | 1 + src/abi.h | 13 +- src/mem/detour.cpp | 296 +---------------------------- src/mem/func_copy.cpp | 278 +++++++++++++++++++++++++++ src/mem/func_copy.h | 24 +++ src/mem/patch.cpp | 11 ++ src/mem/patch.h | 19 +- src/mod/perf/hltv_optimize.cpp | 15 +- src/mod/perf/sendprop_optimize.cpp | 17 +- src/stub/server.cpp | 6 +- src/stub/server.h | 17 +- 11 files changed, 374 insertions(+), 323 deletions(-) create mode 100644 src/mem/func_copy.cpp create mode 100644 src/mem/func_copy.h diff --git a/AMBuilder b/AMBuilder index 2060d1c4..275e4028 100644 --- a/AMBuilder +++ b/AMBuilder @@ -42,6 +42,7 @@ sourceFiles += [ 'src/mem/alloc.cpp', 'src/mem/detour.cpp', + 'src/mem/func_copy.cpp', 'src/mem/extract.cpp', 'src/mem/hook.cpp', 'src/mem/patch.cpp', diff --git a/src/abi.h b/src/abi.h index d3ebf31f..361a13df 100644 --- a/src/abi.h +++ b/src/abi.h @@ -124,6 +124,11 @@ template using MemberPtrTypeConst = R template using MemberPtrTypeVa = RET (C::*)(PARAMS..., ...); template using MemberPtrTypeVaConst = RET (C::*)(PARAMS..., ...) const; +#if defined __GNUC__ && !defined __clang__ && !defined PLATFORM_64BITS +template using MemberPtrTypeRegcall = RET (C::*)(PARAMS...) __gcc_regcall; +template using MemberPtrTypeConstRegcall = RET (C::*)(PARAMS...) const __gcc_regcall; +#endif + #if defined __clang__ #error TODO @@ -183,7 +188,13 @@ inline void *GetAddrOfMemberFunc(MemberPtrTypeVaConst ptr) { return GetAddrOfMemberFunc(reinterpret_cast>(ptr)); } - +#if defined __GNUC__ && !defined __clang__ && !defined PLATFORM_64BITS +template +inline void *GetAddrOfMemberFunc(MemberPtrTypeRegcall ptr) +{ + return GetAddrOfMemberFunc(reinterpret_cast>(ptr)); +} +#endif template int GetVIdxOfMemberFunc(MemberPtrType ptr) diff --git a/src/mem/detour.cpp b/src/mem/detour.cpp index 6c8106ee..a9f09b5c 100644 --- a/src/mem/detour.cpp +++ b/src/mem/detour.cpp @@ -4,6 +4,7 @@ #include "mem/protect.h" #include "mem/opcode.h" #include "mem/wrapper.h" +#include "mem/func_copy.h" #include "util/backtrace.h" #include "util/demangle.h" #include "util/misc.h" @@ -14,8 +15,8 @@ #include -#if !(defined(__i386) || defined(_M_IX86) || defined(__x86_64__) ) -#error Architecture must be IA32 +#if !(PLATFORM_64BITS) +#error Architecture must be IA32/64 #endif @@ -29,243 +30,6 @@ #include "util/trace.h" -/* get number of instruction operands */ -static unsigned int UD86_num_operands(struct ud *ud) -{ - for (unsigned int i = 0; i < 4; ++i) { - if (ud_insn_opr(ud, i) == nullptr) return i; - } - return 4; -} - -CON_COMMAND(sig_test_udis, "") { - std::vector bytes; - int value; - - for (int i = 1; i < args.ArgC(); i++) { - value = strtol( args[i], nullptr, 16); - bytes.push_back(value); - } - - if (bytes.empty()) return; - - ud_t ud; - ud_init(&ud); -#ifdef PLATFORM_64BITS - ud_set_mode(&ud, 64); -#else - ud_set_mode(&ud, 32); -#endif - ud_set_pc(&ud, (uint64_t)bytes.data()); - ud_set_input_buffer(&ud, bytes.data(), 0x100); - - int len = ud_decode(&ud); - if (len == 0) { - Msg("Error decoding\n"); - } - - auto mnemonic = ud_insn_mnemonic(&ud); - Msg("Instr %s %d | length %d offset %d ptr %p opr count %d\n", ud_insn_asm(&ud), ud_insn_mnemonic(&ud), ud_insn_len(&ud), ud_insn_off(&ud), ud_insn_ptr(&ud), UD86_num_operands(&ud)); - for (int i = 0; i < UD86_num_operands(&ud); i++) { - const auto *op0 = ud_insn_opr(&ud, i); - Msg("op %d type %d size %d lval %llx base %d index %d access %d\n", i, op0->type, op0->size, op0->lval.uqword, op0->base, op0->index, op0->access); - } -} - -/* fix [rip + disp32] operands*/ -static bool UD86_insn_fix_disp32(struct ud *ud, const uint8_t *func = nullptr, uint8_t *dest = nullptr) -{ - auto mnemonic = ud_insn_mnemonic(ud); - if (ud_insn_opr(ud, 0) == nullptr) return false; - - //Msg("Instr %s %d | length %d offset %d ptr %p opr count %d\n", ud_insn_asm(ud), ud_insn_mnemonic(ud), ud_insn_len(ud), ud_insn_off(ud), ud_insn_ptr(ud), UD86_num_operands(ud)); - //if (ud_insn_len(ud) != 5) return false; - //if (UD86_num_operands(ud) != 1) return false; - - int32_t writeOffset = -1; - uint64_t dispValue = -1ULL; - int memArgs = 0; - int immArgSize = 0; - - for (unsigned int i = 0; i < UD86_num_operands(ud); i++) { - auto op = ud_insn_opr(ud, i); - if (op->type == UD_OP_MEM && op->base == UD_R_RIP) { - dispValue = op->lval.uqword; - memArgs++; - } - if (op->type == UD_OP_IMM) { - immArgSize = op->size / 8; - } - } - if (immArgSize > 0) { - Msg("Instr %s %d | length %d offset %d ptr %p opr count %d\n", ud_insn_asm(ud), ud_insn_mnemonic(ud), ud_insn_len(ud), ud_insn_off(ud), ud_insn_ptr(ud), UD86_num_operands(ud)); - for (int i = 0; i < UD86_num_operands(ud); i++) { - const auto *op0 = ud_insn_opr(ud, i); - Msg("op %d type %d size %d lval %llx base %d index %d access %d\n", i, op0->type, op0->size, op0->lval.uqword, op0->base, op0->index, op0->access); - } - } - writeOffset = ud_insn_len(ud) - 4 - immArgSize; - - if (memArgs != 1 || writeOffset == -1 || dispValue == -1ULL || (dispValue & 0xFFFFFFFF) != dispValue) return false; - - - if (dest == nullptr || func == nullptr) return true; - - int32_t diff = (intptr_t)(dest) - (intptr_t)func; - - memcpy(dest, (uint8_t *)ud_insn_off(ud), ud_insn_len(ud)); - - *(int32_t *)(dest + writeOffset) -= diff; - Msg("Prev bytes: %d Post bytes: %d offset %d\n", *(int32_t *)(ud_insn_off(ud)+writeOffset) + ud_insn_off(ud), *(int32_t *)(dest+writeOffset) + dest, diff); - - return true; -} - -/* fix instruction: '(jmp|call) ' */ -static bool UD86_insn_fix_jmpcall_rel_imm32(struct ud *ud, size_t &newLength, const uint8_t *func = nullptr, uint8_t *dest = nullptr) -{ - auto mnemonic = ud_insn_mnemonic(ud); - // if (mnemonic != UD_Ijmp && mnemonic != UD_Icall ) return false; - - // if (ud_insn_len(ud) != 5) return false; - if (UD86_num_operands(ud) != 1) return false; - - newLength = ud_insn_len(ud); - - const auto *op0 = ud_insn_opr(ud, 0); - if (op0->type != UD_OP_JIMM) return false; - - if (func == nullptr || dest == nullptr) return true; - - memcpy(dest, (uint8_t *)ud_insn_off(ud), ud_insn_len(ud)); - - if (op0->size != 32) { - // Convert jumps from 8 bit to 32 bit address - if (mnemonic == UD_Ijmp) { - dest[0] = OPCODE_JMP_REL_IMM32; - dest[2] = 0; - dest[3] = 0; - dest[4] = 0; - - newLength = 5; - } - else { - dest[5] = 0; - dest[4] = 0; - dest[3] = 0; - dest[2] = dest[1]; - dest[1] = (dest[0] & 0x0F) | 0x80; - dest[0] = OPCODE_JCC_REL_IMM32; - newLength = 6; - } - } - int32_t writeOffset = newLength - 4; - - int32_t diff = (intptr_t)(dest + (newLength - ud_insn_len(ud))) - (intptr_t)func; - - *(int32_t *)(dest + writeOffset) -= diff; - - return true; -} - -/* detect instruction: 'call ' */ -static bool UD86_insn_is_call_rel_imm32(struct ud *ud, const uint8_t **call_target = nullptr) -{ - auto mnemonic = ud_insn_mnemonic(ud); - if (mnemonic != UD_Icall) return false; - - if (ud_insn_len(ud) != 5) return false; - if (UD86_num_operands(ud) != 1) return false; - - const auto *op0 = ud_insn_opr(ud, 0); - if (op0->type != UD_OP_JIMM) return false; - if (op0->size != 32) return false; - - /* optional parameter: write out the call destination address */ - if (call_target != nullptr) { - *call_target = (const uint8_t *)(ud_insn_off(ud) + ud_insn_len(ud) + op0->lval.sdword); - } - - return true; -} - -/* detect instruction: 'mov e[acdb]x,[esp]' */ -static bool UD86_insn_is_mov_r32_rtnval(struct ud *ud, Reg *dest_reg = nullptr) -{ - auto mnemonic = ud_insn_mnemonic(ud); - if (mnemonic != UD_Imov) return false; - - if (ud_insn_len(ud) != 3) return false; - if (UD86_num_operands(ud) != 2) return false; - - const auto *op0 = ud_insn_opr(ud, 0); - if (op0->type != UD_OP_REG) return false; - if (op0->size != 32) return false; - - Reg reg; - switch (op0->base) { - case UD_R_EAX: reg = REG_AX; break; - case UD_R_ECX: reg = REG_CX; break; - case UD_R_EDX: reg = REG_DX; break; - case UD_R_EBX: reg = REG_BX; break; - default: return false; - } - - const auto *op1 = ud_insn_opr(ud, 1); - if (op1->type != UD_OP_MEM) return false; - if (op1->size != 32) return false; - if (op1->base != UD_R_ESP) return false; - if (op1->index != UD_NONE) return false; - if (op1->scale != UD_NONE) return false; - if (op1->offset != 0) return false; - - /* optional parameter: write out the first operand base register */ - if (dest_reg != nullptr) { - *dest_reg = reg; - } - - return true; -} - -/* detect instruction: 'ret' */ -static bool UD86_insn_is_ret(struct ud *ud) -{ - auto mnemonic = ud_insn_mnemonic(ud); - if (mnemonic != UD_Iret) return false; - - if (ud_insn_len(ud) != 1) return false; - if (UD86_num_operands(ud) != 0) return false; - - return true; -} - - -/* detect whether an instruction is a call to __i686.get_pc_thunk.(ax|cx|dx|bx) */ -static bool UD86_insn_is_call_to_get_pc_thunk(struct ud *ud, Reg *dest_reg = nullptr) -{ - const uint8_t *call_target; - if (!UD86_insn_is_call_rel_imm32(ud, &call_target)) return false; - - ud_t ux; - ud_init(&ux); -#ifdef PLATFORM_64BITS - ud_set_mode(&ux, 64); -#else - ud_set_mode(&ux, 32); -#endif - ud_set_pc(&ux, (uint64_t)call_target); - ud_set_input_buffer(&ux, call_target, 0x100); - - if (ud_decode(&ux) == 0) return false; - if (!UD86_insn_is_mov_r32_rtnval(&ux, dest_reg)) return false; - - if (ud_decode(&ux) == 0) return false; - if (!UD86_insn_is_ret(&ux)) return false; - - return true; -} - - /* analogous to asm.c copy_bytes() when dest == nullptr */ static size_t Trampoline_CalcNumBytesToCopy(size_t len_min, const uint8_t *func) { @@ -291,52 +55,6 @@ static size_t Trampoline_CalcNumBytesToCopy(size_t len_min, const uint8_t *func) return len_actual; } -/* analogous to asm.c copy_bytes() when dest != nullptr */ -static size_t Trampoline_CopyAndFixUpFuncBytes(size_t len_min, const uint8_t *func, uint8_t *trampoline) -{ - uint8_t *dest = trampoline; - - ud_t ud; - ud_init(&ud); -#ifdef PLATFORM_64BITS - ud_set_mode(&ud, 64); -#else - ud_set_mode(&ud, 32); -#endif - ud_set_pc(&ud, (uint64_t)func); - ud_set_input_buffer(&ud, func, 0x100); - - size_t len_actual = 0; - while (len_actual < len_min) { - size_t len_decoded = ud_decode(&ud); - assert(len_decoded != 0); - - // They typically determine end of function - if (ud_insn_mnemonic(&ud) == UD_Inop || ud_insn_mnemonic(&ud) == UD_Iint3) break; - - /* detect calls to __i686.get_pc_thunk.(ax|cx|dx|bx); - * convert them into direct-register-load operations */ - Reg reg; - if (UD86_insn_is_call_to_get_pc_thunk(&ud, ®)) { - uint32_t pc_value = (ud_insn_off(&ud) + ud_insn_len(&ud) + (trampoline - func)); - MovRegImm32(dest, reg, pc_value).Write(); - } else { - /* fixup jmp and call relative offsets */ - if (UD86_insn_fix_jmpcall_rel_imm32(&ud, len_decoded, func + len_actual, dest)) { - } - else if (UD86_insn_fix_disp32(&ud, func + len_actual, dest)) { - - } else { - memcpy(dest, (uint8_t *)ud_insn_off(&ud), len_decoded); - } - } - - len_actual += len_decoded; - dest += len_decoded; - } - return len_actual; -} - static bool Jump_ShouldUseRelativeJump(intptr_t from, intptr_t target) { #ifndef PLATFORM_64BITS @@ -349,7 +67,6 @@ static bool Jump_ShouldUseRelativeJump(intptr_t from, intptr_t target) static size_t Jump_CalculateSize(intptr_t from, intptr_t target) { auto size = Jump_ShouldUseRelativeJump(from, target) ? JmpRelImm32::Size() : JmpIndirectMem32::Size() + sizeof(intptr_t); - Msg("calculated size %d\n", size); return size; } @@ -361,7 +78,6 @@ static void Jump_WriteJump(uint8_t *from, uintptr_t target, size_t padSize) jmp.WritePadded(padSize); else jmp.Write(); - Msg("WriteRelative\n"); } else { auto pointerAddress = (uintptr_t)from + JmpIndirectMem32::Size(); @@ -371,7 +87,6 @@ static void Jump_WriteJump(uint8_t *from, uintptr_t target, size_t padSize) else jmp.Write(); - Msg("WriteAbsolute %x\n", (uint8_t) ModRM{ RM_DISP32, OP_FF_JMP_RM32, MOD_INDIRECT }); *(uintptr_t *)pointerAddress = target; } } @@ -896,14 +611,13 @@ void CDetouredFunc::CreateTrampoline() size_t len_trampoline; { MemProtModifier_RX_RWX(this->m_pTrampoline, len_trampoline_alloc); - len_trampoline = Trampoline_CopyAndFixUpFuncBytes(len_prologue, this->m_pFunc, this->m_pTrampoline); - Msg("len trampoline %zu alloc %zu jumps %zu prologue %zu\n", len_trampoline, len_trampoline_alloc, jumpInTrampolineSize, len_prologue); + len_trampoline = CopyAndFixUpFuncBytes(len_prologue, this->m_pFunc, this->m_pTrampoline); + Msg("trampoline addr %p len trampoline %zu alloc %zu jumps %zu prologue %zu\n", this->m_pTrampoline, len_trampoline, len_trampoline_alloc, jumpInTrampolineSize, len_prologue); TRACE_MSG("len_trampoline = %zu\n", len_trampoline); assert(len_trampoline >= len_prologue && len_trampoline + jumpInTrampolineSize <= len_trampoline_alloc); Jump_WriteJump(this->m_pTrampoline + len_trampoline, (uintptr_t)this->m_pFunc + len_prologue, 0); } - assert(this->m_TrampolineCheck.empty()); this->m_TrampolineCheck.resize(len_trampoline + jumpInTrampolineSize); memcpy(this->m_TrampolineCheck.data(), this->m_pTrampoline, len_trampoline + jumpInTrampolineSize); diff --git a/src/mem/func_copy.cpp b/src/mem/func_copy.cpp new file mode 100644 index 00000000..a47f617a --- /dev/null +++ b/src/mem/func_copy.cpp @@ -0,0 +1,278 @@ +#include "mem/alloc.h" +#include "mem/protect.h" +#include "mem/opcode.h" +#include "mem/wrapper.h" +#include "mem/func_copy.h" + +#include + +/* get number of instruction operands */ +static unsigned int UD86_num_operands(struct ud *ud) +{ + for (unsigned int i = 0; i < 4; ++i) { + if (ud_insn_opr(ud, i) == nullptr) return i; + } + return 4; +} + +/* get number of instruction operands */ +static void UD86_init_buffer(struct ud *ud, const uint8_t *input, size_t size) +{ + ud_init(ud); +#ifdef PLATFORM_64BITS + ud_set_mode(ud, 64); +#else + ud_set_mode(ud, 32); +#endif + ud_set_pc(ud, (uint64_t)input); + ud_set_input_buffer(ud, input, size); +} + +/* fix [rip + disp32] operands*/ +static bool UD86_insn_fix_disp32(struct ud *ud, const uint8_t *func = nullptr, const uint8_t *dest = nullptr, uint8_t *buffer = nullptr) +{ + auto mnemonic = ud_insn_mnemonic(ud); + if (ud_insn_opr(ud, 0) == nullptr) return false; + + //Msg("Instr %s %d | length %d offset %d ptr %p opr count %d\n", ud_insn_asm(ud), ud_insn_mnemonic(ud), ud_insn_len(ud), ud_insn_off(ud), ud_insn_ptr(ud), UD86_num_operands(ud)); + //if (ud_insn_len(ud) != 5) return false; + //if (UD86_num_operands(ud) != 1) return false; + + int32_t writeOffset = -1; + uint64_t dispValue = -1ULL; + int memArgs = 0; + int immArgSize = 0; + + for (unsigned int i = 0; i < UD86_num_operands(ud); i++) { + auto op = ud_insn_opr(ud, i); + if (op->type == UD_OP_MEM && op->base == UD_R_RIP) { + dispValue = op->lval.uqword; + memArgs++; + } + if (op->type == UD_OP_IMM) { + immArgSize = op->size / 8; + } + } + writeOffset = ud_insn_len(ud) - 4 - immArgSize; + + if (memArgs != 1 || writeOffset == -1 || dispValue == -1ULL || (dispValue & 0xFFFFFFFF) != dispValue) return false; + + + if (dest == nullptr || func == nullptr || buffer == nullptr) return true; + + int32_t diff = (intptr_t)(dest) - (intptr_t)func; + + memcpy(buffer, (uint8_t *)ud_insn_off(ud), ud_insn_len(ud)); + + *(int32_t *)(buffer + writeOffset) -= diff; + + return true; +} + +/* fix instruction: '(jmp|call) ' */ +static bool UD86_insn_fix_jmpcall_rel_imm32(struct ud *ud, size_t &newLength, const uint8_t *func = nullptr, const uint8_t *dest = nullptr, uint8_t *buffer = nullptr, int32_t shortJumpMax = 0, int32_t shortJumpMin = 0) +{ + auto mnemonic = ud_insn_mnemonic(ud); + // if (mnemonic != UD_Ijmp && mnemonic != UD_Icall ) return false; + + // if (ud_insn_len(ud) != 5) return false; + if (UD86_num_operands(ud) != 1) return false; + + newLength = ud_insn_len(ud); + + const auto *op0 = ud_insn_opr(ud, 0); + if (op0->type != UD_OP_JIMM) return false; + + int32_t jumpTarget = op0->size == 8 ? op0->lval.sbyte : op0->lval.sdword; + if (jumpTarget >= shortJumpMin && jumpTarget <= shortJumpMax) { + return false; + } + + if (buffer != nullptr) + memcpy(buffer, (uint8_t *)ud_insn_off(ud), ud_insn_len(ud)); + + if (op0->size != 32) { + // Convert jumps from 8 bit to 32 bit address + if (mnemonic == UD_Ijmp) { + if (buffer != nullptr) { + buffer[0] = OPCODE_JMP_REL_IMM32; + buffer[2] = 0; + buffer[3] = 0; + buffer[4] = 0; + } + + newLength = 5; + } + else { + if (buffer != nullptr) { + buffer[5] = 0; + buffer[4] = 0; + buffer[3] = 0; + buffer[2] = buffer[1]; + buffer[1] = (buffer[0] & 0x0F) | 0x80; + buffer[0] = OPCODE_JCC_REL_IMM32; + } + newLength = 6; + } + } + int32_t writeOffset = newLength - 4; + + if (func == nullptr || dest == nullptr || buffer == nullptr) return true; + + int32_t diff = (intptr_t)(dest + (newLength - ud_insn_len(ud))) - (intptr_t)func; + + *(int32_t *)(buffer + writeOffset) -= diff; + + return true; +} + +/* detect instruction: 'call ' */ +static bool UD86_insn_is_call_rel_imm32(struct ud *ud, const uint8_t **call_target = nullptr) +{ + auto mnemonic = ud_insn_mnemonic(ud); + if (mnemonic != UD_Icall) return false; + + if (ud_insn_len(ud) != 5) return false; + if (UD86_num_operands(ud) != 1) return false; + + const auto *op0 = ud_insn_opr(ud, 0); + if (op0->type != UD_OP_JIMM) return false; + if (op0->size != 32) return false; + + /* optional parameter: write out the call destination address */ + if (call_target != nullptr) { + *call_target = (const uint8_t *)(ud_insn_off(ud) + ud_insn_len(ud) + op0->lval.sdword); + } + + return true; +} + +/* detect instruction: 'mov e[acdb]x,[esp]' */ +static bool UD86_insn_is_mov_r32_rtnval(struct ud *ud, Reg *dest_reg = nullptr) +{ + auto mnemonic = ud_insn_mnemonic(ud); + if (mnemonic != UD_Imov) return false; + + if (ud_insn_len(ud) != 3) return false; + if (UD86_num_operands(ud) != 2) return false; + + const auto *op0 = ud_insn_opr(ud, 0); + if (op0->type != UD_OP_REG) return false; + if (op0->size != 32) return false; + + Reg reg; + switch (op0->base) { + case UD_R_EAX: reg = REG_AX; break; + case UD_R_ECX: reg = REG_CX; break; + case UD_R_EDX: reg = REG_DX; break; + case UD_R_EBX: reg = REG_BX; break; + default: return false; + } + + const auto *op1 = ud_insn_opr(ud, 1); + if (op1->type != UD_OP_MEM) return false; + if (op1->size != 32) return false; + if (op1->base != UD_R_ESP) return false; + if (op1->index != UD_NONE) return false; + if (op1->scale != UD_NONE) return false; + if (op1->offset != 0) return false; + + /* optional parameter: write out the first operand base register */ + if (dest_reg != nullptr) { + *dest_reg = reg; + } + + return true; +} + +/* detect instruction: 'ret' */ +static bool UD86_insn_is_ret(struct ud *ud) +{ + auto mnemonic = ud_insn_mnemonic(ud); + if (mnemonic != UD_Iret) return false; + + if (ud_insn_len(ud) != 1) return false; + if (UD86_num_operands(ud) != 0) return false; + + return true; +} + + +/* detect whether an instruction is a call to __i686.get_pc_thunk.(ax|cx|dx|bx) */ +static bool UD86_insn_is_call_to_get_pc_thunk(struct ud *ud, Reg *dest_reg = nullptr) +{ + const uint8_t *call_target; + if (!UD86_insn_is_call_rel_imm32(ud, &call_target)) return false; + + ud_t ux; + UD86_init_buffer(&ux, call_target, 0x100); + + if (ud_decode(&ux) == 0) return false; + if (!UD86_insn_is_mov_r32_rtnval(&ux, dest_reg)) return false; + + if (ud_decode(&ux) == 0) return false; + if (!UD86_insn_is_ret(&ux)) return false; + + return true; +} + +/* analogous to asm.c copy_bytes() when dest != nullptr */ +size_t CopyAndFixUpFuncBytes(size_t len_min, size_t len_max, const uint8_t *source, const uint8_t *destination_address, uint8_t *buffer, bool stop_at_nop) +{ + auto dest = destination_address; + + ud_t ud; + UD86_init_buffer(&ud, source, len_max); + + size_t len_calc = 0; + size_t len_min_calc = len_min; + while (len_calc < len_min_calc) { + size_t len_decoded = ud_decode(&ud); + size_t orig_len_decoded = len_decoded; + + if (len_decoded == 0) break; + + // They typically determine end of function + if (stop_at_nop && (ud_insn_mnemonic(&ud) == UD_Inop || ud_insn_mnemonic(&ud) == UD_Iint3)) break; + + UD86_insn_fix_jmpcall_rel_imm32(&ud, len_decoded, source + len_calc, dest, nullptr, len_min_calc - len_calc - len_decoded, -len_calc - len_decoded); + // In case a fixup increased instruction size + len_min_calc += len_decoded - orig_len_decoded; + len_calc += len_decoded; + dest += len_decoded; + } + + if (len_calc > len_max) return 0; + + if (buffer == nullptr) return len_calc; + + UD86_init_buffer(&ud, source, len_max); + + dest = destination_address; + size_t len_actual = 0; + while (len_actual < len_min) { + size_t len_decoded = ud_decode(&ud); + size_t orig_len_decoded = len_decoded; + + if (len_decoded == 0) break; + + // They typically determine end of function + if (stop_at_nop && (ud_insn_mnemonic(&ud) == UD_Inop || ud_insn_mnemonic(&ud) == UD_Iint3)) break; + + // fixup jmp and call relative offsets + if (UD86_insn_fix_jmpcall_rel_imm32(&ud, len_decoded, source + len_actual, dest, buffer, len_min - len_actual - len_decoded, -len_actual - len_decoded)) { + } + // fixup [rip+disp32] relative offsets + else if (UD86_insn_fix_disp32(&ud, source + len_actual, dest, buffer)){ + } + else { + memcpy(buffer, (uint8_t *)ud_insn_off(&ud), len_decoded); + } + len_min += len_decoded - orig_len_decoded; + len_actual += len_decoded; + dest += len_decoded; + buffer += len_decoded; + } + assert(len_calc == len_actual); + return len_actual; +} \ No newline at end of file diff --git a/src/mem/func_copy.h b/src/mem/func_copy.h new file mode 100644 index 00000000..d512cb14 --- /dev/null +++ b/src/mem/func_copy.h @@ -0,0 +1,24 @@ +#ifndef _INCLUDE_SIGSEGV_MEM_FUNC_COPY_H_ +#define _INCLUDE_SIGSEGV_MEM_FUNC_COPY_H_ + +// Copy function opcodes from source to destination, fixing up relative addresses. +// @param len_min Target amount of bytes to copy. Ideally this many bytes will be copied +// @param len_max Maximum amount of bytes to copy. Nothing will be copied if the copying operation would result in more bytes being written +// @param source Address of the function to copy +// @param destination_address Destination address of the copied function +// @param buffer Buffer to write the copied function to. If `buffer != destination_address`, the copied function need to be moved to `destination_address` manually before calling it. If buffer is null, nothing will be written +// @param stop_at_nop if the copying should stop after reading nop and int3 bytes. Use `false` if copying entire function +// @return amount of bytes copied +size_t CopyAndFixUpFuncBytes(size_t len_min, size_t len_max, const uint8_t *source, const uint8_t *destination_address, uint8_t *buffer, bool stop_at_nop = true); + +// Copy function opcodes from source to destination, fixing up relative addresses. +// @param len_min Minimum amount of bytes to copy. Ideally this many bytes will be copied +// @param source Address of the function to copy +// @param destination_address Destination address to where the function should be copied +// @param stop_at_nop if the copying should stop after reading nop and int3 bytes. Use `false` if copying entire function +// @return amount of bytes copied +inline size_t CopyAndFixUpFuncBytes(size_t len_min, const uint8_t *source, uint8_t *destination_address, bool stop_at_nop = true) { + return CopyAndFixUpFuncBytes(len_min, SIZE_MAX, source, destination_address, destination_address, stop_at_nop); +} + +#endif \ No newline at end of file diff --git a/src/mem/patch.cpp b/src/mem/patch.cpp index 44648611..72669a66 100644 --- a/src/mem/patch.cpp +++ b/src/mem/patch.cpp @@ -1,3 +1,4 @@ +#include "mem/func_copy.h" #include "mem/patch.h" #include "mem/scan.h" #include "mem/protect.h" @@ -190,3 +191,13 @@ void *CPatch::GetActualLocation() const assert(this->m_bFoundOffset); return (void *)((uintptr_t)this->m_pFuncAddr + this->m_iFuncOffActual); } + +bool CFuncReplace::GetPatchInfo(ByteBuf& buf, ByteBuf& mask) const +{ + size_t sizeCopied = CopyAndFixUpFuncBytes(this->m_zFuncSize, this->GetLength(), (uint8_t *) m_pFunc, (uint8_t *) this->GetFuncAddr(), buf.MPtr(), false); + if (sizeCopied == 0) { + ConColorMsg(Color(255, 60, 60), "Func Replace: \"%s\": copied function size larger than buffer\n", this->GetFuncName()); + } + mask.SetRange(0, sizeCopied, 0xff); + return true; +} \ No newline at end of file diff --git a/src/mem/patch.h b/src/mem/patch.h index fd8459f3..078b9f63 100644 --- a/src/mem/patch.h +++ b/src/mem/patch.h @@ -112,7 +112,7 @@ class CVerify : public CPatch class CFuncReplace : public CPatch { public: - CFuncReplace(size_t size, void *func, const char *func_name) : CPatch(size), m_szFuncName(func_name), m_pFunc(func) {} + CFuncReplace(size_t size, void *func, const char *func_name) : CPatch(size + 32), m_zFuncSize(size), m_szFuncName(func_name), m_pFunc(func) {} virtual const char *GetFuncName() const override { return m_szFuncName.c_str(); } virtual uint32_t GetFuncOffMin() const override { return 0x0000; } @@ -123,18 +123,12 @@ class CFuncReplace : public CPatch auto data = (uint8_t *) m_pFunc; buf.CopyFrom(data); - mask.SetAll(0x00); - + return true; } - virtual bool GetPatchInfo(ByteBuf& buf, ByteBuf& mask) const override - { - mask.SetAll(0xFF); - - return true; - } + virtual bool GetPatchInfo(ByteBuf& buf, ByteBuf& mask) const override; virtual bool AdjustPatchInfo(ByteBuf& buf) const override { @@ -142,6 +136,7 @@ class CFuncReplace : public CPatch } private: + size_t m_zFuncSize; std::string m_szFuncName; void *m_pFunc; }; @@ -166,14 +161,12 @@ class CFuncReplace : public CPatch // Replace original function code with provided function. Remember that: // 1. It must be placed outside of namespace -// 2. The original function code must be larger than the replacement. There is no protection against patching smaller original functions! -// 3. Non-virtual non-inline non-address calls to our functions are not allowed. You can make an address call by using auto ourfunc1 = &OurFunc; and calling ourfunc1 +// 2. The original function code + 16 byte alignment must be larger than the replacement. There is no protection against patching smaller original functions! #define REPLACE_FUNC_STATIC(ret, name, ...) REPLACE_FUNC_STATIC_ATTRIBUTES(,ret,name,__VA_ARGS__) // Replace original function code with provided function. Remember that: // 1. It must be placed outside of namespace -// 2. The original function code must be larger than the replacement. There is no protection against patching smaller original functions! -// 3. Non-virtual non-inline non-address calls to our functions are not allowed. You can make an address call by using auto ourfunc1 = &OurFunc; and calling ourfunc1 +// 2. The original function code + 16 byte alignment must be larger than the replacement. There is no protection against patching smaller original functions! #define REPLACE_FUNC_MEMBER(ret, name, ...) REPLACE_FUNC_MEMBER_ATTRIBUTES(,ret,name,__VA_ARGS__) // Same as REPLACE_FUNC_STATIC but the function is space optimized instead diff --git a/src/mod/perf/hltv_optimize.cpp b/src/mod/perf/hltv_optimize.cpp index 1c2a7dcf..052115a9 100644 --- a/src/mod/perf/hltv_optimize.cpp +++ b/src/mod/perf/hltv_optimize.cpp @@ -85,12 +85,13 @@ namespace Mod::Perf::HLTV_Optimize if (hasplayer && hltvclient != nullptr) { if (hltvServerEmpty) { int tickcount = 32.0f / (snapshotrate.GetFloat() * gpGlobals->interval_per_tick); - int framec = hltvserver->CountClientFrames() - 2; - for (int i = 0; i < framec; i++) - hltvserver->RemoveOldestFrame(); + int framec = rtti_scast(hltvserver)->CountClientFrames() - 2; + for (int i = 0; i < framec; i++) { + rtti_scast(hltvserver)->RemoveOldestFrame(); + } //DevMsg("SendNow %d\n", gpGlobals->tickcount % tickcount == 0/*reinterpret_cast(hltvclient)->ShouldSendMessages()*/); - if (gpGlobals->tickcount % tickcount != 0) - return; + //if (gpGlobals->tickcount % tickcount != 0) + // return; } } @@ -105,9 +106,9 @@ namespace Mod::Perf::HLTV_Optimize static ConVarRef delay("tv_delay"); int tickcount = 1.0f / (snapshotrate.GetFloat() * gpGlobals->interval_per_tick); if (delay.GetFloat() <= 0) { - int framec = hltvserver->CountClientFrames() - 2; + int framec = rtti_scast(hltvserver)->CountClientFrames() - 2; for (int i = 0; i < framec; i++) - hltvserver->RemoveOldestFrame(); + rtti_scast(hltvserver)->RemoveOldestFrame(); } //DevMsg("SendNow %d\n", gpGlobals->tickcount % tickcount == 0/*reinterpret_cast(hltvclient)->ShouldSendMessages()*/); // if (gpGlobals->tickcount % tickcount != 0) diff --git a/src/mod/perf/sendprop_optimize.cpp b/src/mod/perf/sendprop_optimize.cpp index 54aa5db8..25dda269 100644 --- a/src/mod/perf/sendprop_optimize.cpp +++ b/src/mod/perf/sendprop_optimize.cpp @@ -154,7 +154,11 @@ struct carea_t class CCollisionBSPData { public: +#ifdef PLATFORM_64BITS + uint8_t pad[0x2C0]; +#else uint8_t pad[0x224]; +#endif int numareas; carea_t *map_areas; }; @@ -195,16 +199,21 @@ REPLACE_FUNC_MEMBER(void, CBaseEntity_SetTransmit, CCheckTransmitInfo *pInfo, bo pInfo->m_pTransmitEdict->Set(index); if (transmitAlways) { - (*ourfunc1)(netProp, bAlways); + SetTransmitAlways(netProp, bAlways); } if (parentIndex >= MAX_EDICTS || pInfo->m_pTransmitEdict->Get(parentIndex)) return; - (*ourfunc2)(parentIndex, pInfo, bAlways); + CBaseEntity *parent = (CBaseEntity *)(g_pWorldEdict+parentIndex)->GetUnknown(); + + // Force our aiment and move parent to be sent. + if (parent != nullptr) { + parent->SetTransmit(pInfo, bAlways); + } } IChangeInfoAccessor *world_accessor = nullptr; CEdictChangeInfo *world_change_info = nullptr; -CSharedEdictChangeInfo *g_SharedEdictChangeInfo; +CSharedEdictChangeInfo *g_SharedEdictChangeInfo = nullptr; REPLACE_FUNC_MEMBER(IChangeInfoAccessor *, CBaseEdict_GetChangeAccessor) { @@ -2027,8 +2036,10 @@ namespace Mod::Perf::SendProp_Optimize virtual void LevelInitPostEntity() override { world_edict = INDEXENT(0); + auto world_accessor2 = engine->GetChangeAccessor(world_edict); world_accessor = static_cast(engine)->GetChangeAccessorStatic(world_edict); world_change_info = &g_SharedEdictChangeInfo->m_ChangeInfos[0]; + Msg("Set world change info %p %p %p %p\n", world_change_info, world_accessor, world_accessor2, world_edict); ConVarRef sv_parallel_packentities("sv_parallel_packentities"); ConVarRef sv_parallel_sendsnapshot("sv_parallel_sendsnapshot"); diff --git a/src/stub/server.cpp b/src/stub/server.cpp index 9a0d26a8..d1f52944 100644 --- a/src/stub/server.cpp +++ b/src/stub/server.cpp @@ -4,10 +4,10 @@ GlobalThunk hltv("hltv"); GlobalThunk g_HostState("g_HostState"); -MemberFuncThunk CHLTVServer::ft_StartMaster("CHLTVServer::StartMaster"); -MemberFuncThunk CHLTVServer::ft_CountClientFrames("CClientFrameManager::CountClientFrames"); -MemberFuncThunk CHLTVServer::ft_RemoveOldestFrame("CClientFrameManager::RemoveOldestFrame"); +MemberFuncThunk CClientFrameManager::ft_CountClientFrames("CClientFrameManager::CountClientFrames"); +MemberFuncThunk CClientFrameManager::ft_RemoveOldestFrame("CClientFrameManager::RemoveOldestFrame"); +MemberFuncThunk CHLTVServer::ft_StartMaster("CHLTVServer::StartMaster"); MemberFuncThunk CBaseServer::ft_CreateFakeClient("CBaseServer::CreateFakeClient"); diff --git a/src/stub/server.h b/src/stub/server.h index 3b620779..ac79ec17 100644 --- a/src/stub/server.h +++ b/src/stub/server.h @@ -47,11 +47,11 @@ class CBaseClient : public IGameEventListener2, public IClient, public IClientMe INetChannel *m_NetChannel; int m_nSignonState; + int pad; int m_nDeltaTick; int m_nStringTableAckTick; int m_nSignonTick; void * m_pLastSnapshot; //0x0dc - int pad; CFrameSnapshot *m_pBaseline; //0x0e0 int m_nBaselineUpdateTick; // 0x0e4 @@ -89,17 +89,24 @@ class CGameServer : public CBaseServer int GetNumEdicts() {return *(int *)((uintptr_t)(this) + 0x1E4);} }; -class CHLTVServer : public IGameEventListener2, public CBaseServer +class CClientFrameManager { public: - void StartMaster(CBaseClient *client) { ft_StartMaster(this, client); } int CountClientFrames() { return ft_CountClientFrames(this); } void RemoveOldestFrame() { ft_RemoveOldestFrame(this); } + +private: + static MemberFuncThunk ft_CountClientFrames; + static MemberFuncThunk ft_RemoveOldestFrame; +}; + +class CHLTVServer : public IGameEventListener2, public CBaseServer +{ +public: + void StartMaster(CBaseClient *client) { ft_StartMaster(this, client); } private: static MemberFuncThunk ft_StartMaster; - static MemberFuncThunk ft_CountClientFrames; - static MemberFuncThunk ft_RemoveOldestFrame; }; class CGameClient : public CBaseClient