diff --git a/src/engine/BytecodeIterator.v3 b/src/engine/BytecodeIterator.v3 index bc1ccaff..a35e7e04 100644 --- a/src/engine/BytecodeIterator.v3 +++ b/src/engine/BytecodeIterator.v3 @@ -50,7 +50,7 @@ class BytecodeIterator { // Read the first byte of the code var b = codeptr.read1(); - if (b == InternalOpcode.PROBE.code) { // probe is inserted here + if (b == InternalOpcode.PROBE.code || b == InternalOpcode.WHAMM_PROBE.code) { // probe is inserted here b = origptr.reset(func.orig_bytecode, pc, func.orig_bytecode.length).read1(); } // Query opcode attributes array @@ -101,9 +101,10 @@ class BytecodeIterator { // Read the first byte of the code var b = codeptr.read1(); - if (b == InternalOpcode.PROBE.code) { // probe is inserted here + if (b == InternalOpcode.PROBE.code || b == InternalOpcode.WHAMM_PROBE.code) { // probe is inserted here v.visitProbe(); b = origptr.reset(func.orig_bytecode, pc, func.orig_bytecode.length).read1(); + codeptr.reset(func.cur_bytecode, pc + 1, func.cur_bytecode.length); } // Query opcode attributes array var opcode: Opcode; diff --git a/src/engine/CodePtr.v3 b/src/engine/CodePtr.v3 index f0e617d2..e8bd9b6e 100644 --- a/src/engine/CodePtr.v3 +++ b/src/engine/CodePtr.v3 @@ -14,7 +14,7 @@ class CodePtr extends DataReader { } def read_opcode_but_skip_probe(func: FuncDecl) -> Opcode { var pc = pos, b = read1(); - if (b == InternalOpcode.PROBE.code) b = func.orig_bytecode[pc]; + if (b == InternalOpcode.PROBE.code || b == InternalOpcode.WHAMM_PROBE.code) b = func.orig_bytecode[pc]; var op = Opcodes.opcode_by_prefix[b]; if (op != Opcode.INVALID) return op; return if(Opcodes.isPrefix(b), Opcodes.find(b, read_uleb32())); diff --git a/src/engine/Engine.v3 b/src/engine/Engine.v3 index 0f643d25..9248b59c 100644 --- a/src/engine/Engine.v3 +++ b/src/engine/Engine.v3 @@ -25,7 +25,7 @@ class Engine { if (data == null) return FileLoadResult.FileNotFound(path); var limits = Limits.new().set(extensions); var bp = BinParser.new(extensions, limits, path); - bp.tiering = tiering_override; + bp.tiering = if(tiering_override != null, tiering_override, Execute.tiering); var r = bp.push(data, 0, data.length).finish(); match (r) { Ok(module) => diff --git a/src/engine/Instrumentation.v3 b/src/engine/Instrumentation.v3 index bbf9f83e..a3130ddd 100644 --- a/src/engine/Instrumentation.v3 +++ b/src/engine/Instrumentation.v3 @@ -38,11 +38,15 @@ component Instrumentation { match (probe) { l: ProbeList => { l.add(p); + func.activateProbingAt(offset, InternalOpcode.PROBE.code); Execute.tiering.onFuncProbeInsertN(module, func, offset, p); } null => { map[offset] = p; - func.activateProbingAt(offset); + func.activateProbingAt(offset, + if (WhammProbe.?(p) && FastIntTuning.enableWhammProbeTrampoline, + InternalOpcode.WHAMM_PROBE.code, + InternalOpcode.PROBE.code)); Execute.tiering.onFuncProbeInsert1(module, func, offset, p); } _ => { @@ -50,6 +54,7 @@ component Instrumentation { list.add(probe); list.add(p); map[offset] = list; + func.activateProbingAt(offset, InternalOpcode.PROBE.code); Execute.tiering.onFuncProbeInsert1(module, func, offset, p); } } diff --git a/src/engine/Module.v3 b/src/engine/Module.v3 index f756184c..79f09915 100644 --- a/src/engine/Module.v3 +++ b/src/engine/Module.v3 @@ -131,14 +131,15 @@ class FuncDecl(sig_index: int) extends Decl { def setOrigCode(code: Array) -> this { cur_bytecode = orig_bytecode = code; var tc: TargetCode; + var tr: TargetCode; target_code = tc; // reset target code as well sidetable = Sidetables.NO_SIDETABLE; } - def activateProbingAt(pc: int) { + def activateProbingAt(pc: int, probe_byte: byte) { if (pc == 0) return void(entry_probed = true); // special case for function entry // "orig" will become a copy of the original code, to allow in-place modification of old code if (cur_bytecode == orig_bytecode) orig_bytecode = Arrays.dup(orig_bytecode); - cur_bytecode[pc] = InternalOpcode.PROBE.code; + cur_bytecode[pc] = probe_byte; } def deactiveProbingAt(pc: int) { if (pc == 0) return; diff --git a/src/engine/Opcodes.v3 b/src/engine/Opcodes.v3 index 3cf7678e..ea03c328 100644 --- a/src/engine/Opcodes.v3 +++ b/src/engine/Opcodes.v3 @@ -677,6 +677,7 @@ component ImmSigs { // Internal opcodes used by the interpreter. enum InternalOpcode(code: u8, mnemonic: string) { PROBE(0x1E, ""), // Used to overwrite a bytecode where a probe has been inserted + WHAMM_PROBE(0x1D, ""), //PROBE_COUNTER //PROBE_COUNTER_n //PROBE_TOS_i @@ -1031,7 +1032,7 @@ class InstrTracer { op = Opcodes.find(b, b2); if (op == Opcode.INVALID) out.put2("%x %x ", b, b2); else out.puts(op.mnemonic); - } else if (b == InternalOpcode.PROBE.code) { + } else if (b == InternalOpcode.PROBE.code || b == InternalOpcode.WHAMM_PROBE.code) { out.put1("", b); return; } else { diff --git a/src/engine/Tuning.v3 b/src/engine/Tuning.v3 index 9fb101d2..352ba246 100644 --- a/src/engine/Tuning.v3 +++ b/src/engine/Tuning.v3 @@ -34,6 +34,7 @@ component FastIntTuning { def fourByteSidetable = true; // sidetable entries are 4-bytes def entryTierUpDecrement = 1; // "cost" of entering a function in the interpreter def loopTierUpDecrement = 1; // "cost" of looping in the interpreter + def enableWhammProbeTrampoline = true; } // Tuning settings for the single-pass compiler that have no effect on correctness. @@ -53,4 +54,5 @@ component SpcTuning { var inlineSmallFunc = true; // inline small functions, currently only applicable for whamm probes def probeCallFreesRegs = true; // probe calls frees registers in abstract state def runtimeCallFreesRegs = true; // runtime calls frees registers in abstract state + def intrinsifyMemoryProbes = true; } diff --git a/src/engine/Value.v3 b/src/engine/Value.v3 index db36823b..88bf85b5 100644 --- a/src/engine/Value.v3 +++ b/src/engine/Value.v3 @@ -10,6 +10,34 @@ type Value { case F32(bits: u32); case F64(bits: u64); case V128(low: u64, high: u64); + + def equal(that: Value) -> bool { + if (this == that) return true; + if (Value.Ref.?(this) == Value.Ref.?(that)) { + return Value.Ref.!(this).val == Value.Ref.!(that).val; + } + if (Value.I31.?(this) == Value.I31.?(that)) { + return Value.I31.!(this).val == Value.I31.!(that).val; + } + if (Value.I32.?(this) == Value.I32.?(that)) { + return Value.I32.!(this).val == Value.I32.!(that).val; + } + if (Value.I64.?(this) == Value.I64.?(that)) { + return Value.I64.!(this).val == Value.I64.!(that).val; + } + if (Value.F32.?(this) == Value.F32.?(that)) { + return Value.F32.!(this).bits == Value.F32.!(that).bits; + } + if (Value.F64.?(this) == Value.F64.?(that)) { + return Value.F64.!(this).bits == Value.F64.!(that).bits; + } + if (Value.V128.?(this) == Value.V128.?(that)) { + var a = Value.V128.!(this); + var b = Value.V128.!(that); + return a.low == b.low && a.high == b.high; + } + return false; + } } // Categorization of values into storage kinds. diff --git a/src/engine/compiler/MacroAssembler.v3 b/src/engine/compiler/MacroAssembler.v3 index cf238b81..61e40436 100644 --- a/src/engine/compiler/MacroAssembler.v3 +++ b/src/engine/compiler/MacroAssembler.v3 @@ -165,6 +165,7 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) { } // Architecture-specific load and store routines for Wasm load/store. + // if `intrinsify_probe` is true, writes the effective address to scratch register. def emit_loadbsx_r_r_r_i(kind: ValueKind, dst: Reg, base: Reg, index: Reg, offset: u32); def emit_loadbzx_r_r_r_i(kind: ValueKind, dst: Reg, base: Reg, index: Reg, offset: u32); def emit_loadwsx_r_r_r_i(kind: ValueKind, dst: Reg, base: Reg, index: Reg, offset: u32); @@ -173,9 +174,9 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) { def emit_loaddzx_r_r_r_i(kind: ValueKind, dst: Reg, base: Reg, index: Reg, offset: u32); def emit_load_r_r_r_i(kind: ValueKind, dst: Reg, base: Reg, index: Reg, offset: u32); - def emit_storeb_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32); - def emit_storew_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32); - def emit_store_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32); + def emit_storeb_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32); + def emit_storew_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32); + def emit_store_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32); def emit_mov_r_r(kind: ValueKind, reg: Reg, reg2: Reg); def emit_mov_r_m(kind: ValueKind, reg: Reg, addr: MasmAddr); @@ -246,6 +247,8 @@ class MacroAssembler(valuerep: Tagging, regConfig: RegConfig) { def emit_call_runtime_getFrameAccessorMetaRef(); def emit_increment_CountProbe(tmp: Reg, probe: CountProbe, increment: u64); def emit_call_OperandProbe_i_v_fire(probe: OperandProbe_i_v, value_reg: Reg); + def emit_call_MemoryReadProbe_fire(probe: MemoryReadProbe); + def emit_call_MemoryWriteProbe_fire(probe: MemoryWriteProbe); def emit_debugger_breakpoint(); diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 9144a70f..79da7aee 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -9,8 +9,10 @@ class SpcExecEnv { var vfp_slot: MasmAddr; var pc_slot: MasmAddr; var instance_slot: MasmAddr; + var inlined_instance_slot: MasmAddr; var wasm_func_slot: MasmAddr; var mem0_base_slot: MasmAddr; + var inlined_mem0_base_slot: MasmAddr; var accessor_slot: MasmAddr; // Register information. @@ -88,7 +90,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def state = SpcState.new(regAlloc); // Other state def trap_labels = Vector<(TrapReason, MasmLabel)>.new(); - var start_pos = 0; var module: Module; var func: FuncDecl; var sig: SigDecl; @@ -103,9 +104,18 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var last_probe = 0; var skip_to_end: bool; + // when function is inlined, we continue using caller's abstract state, and + // push callee's params/locals as needed, thus we need to track the base sp of the locals + // in the current context. + var local_base_sp: u31 = 0; + var is_inlined = false; // tracks the last masm writer offset to generate instruction trace for each bytecode. var codegen_offset: u64 = 0; + var intrinsified_read_probe: MemoryReadProbe = null; + var intrinsified_write_probe: MemoryWriteProbe = null; + var inline_count = 0; + new() { masm.unimplemented = unsupported; masm.newTrapLabel = newTrapLabel; // trap labels are per-pc @@ -162,6 +172,12 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl last_probe = 0; masm.source_loc = it.pc; it.dispatch(this); + if (Trace.compiler && Trace.asm) { + OUT.puts("JIT code: "); + masm.printCodeBytes(OUT, before_code_bytes, masm.curCodeBytes()); + before_code_bytes = masm.curCodeBytes(); + OUT.ln(); + } unrefRegs(); if (Debug.compiler) checkRegAlloc(); it.next(); @@ -319,6 +335,14 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Check for intrinsified probes. match (probe) { // TODO: emit code for multiple intrinsified probes. null => ; + x: MemoryReadProbe => if (SpcTuning.intrinsifyMemoryProbes && x.size <= 8) { + intrinsified_read_probe = x; + return; + } + x: MemoryWriteProbe => if (SpcTuning.intrinsifyMemoryProbes && x.size <= 8) { + intrinsified_write_probe = x; + return; + } x: CountProbe => if (SpcTuning.intrinsifyCountProbe) { // TODO: check for subclass override var tmp = allocTmp(ValueKind.REF); masm.emit_increment_CountProbe(tmp, x, 1); @@ -355,11 +379,9 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_debugger_breakpoint(); return; } - x: WhammProbe => { - if (SpcTuning.intrinsifyWhammProbe && WasmFunction.?(x.func)) { - emitWhammProbe(x); - return; - } + x: WhammProbe => if (SpcTuning.intrinsifyWhammProbe && WasmFunction.?(x.func)) { + emitWhammProbe(x); + return; } } // spill everything @@ -380,75 +402,229 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // saves the overhead of using a runtime call by directly invoking the wasm function associated with the whamm probe def emitWhammProbe(probe: WhammProbe) { - // spill entire value stack. - state.emitSaveAll(resolver, probeSpillMode); // set up args and push to frame slots. var whamm_sig = probe.sig; var offsets = masm.getOffsets(); + var inline_config = InlineConfig(false, false, false); + var new_local_base_sp = 0; + var orig_sp = state.sp; var callee_func = WasmFunction.!(probe.func); + + if (SpcTuning.inlineSmallFunc && inline_count < 100) { + inline_count++; + // TODO: can reuse when implementing inlining for SPC + inline_config = InlineConfig(probe.spc_swap_membase, probe.spc_swap_instance, probe.spc_inline_func); + if (!probe.inline_heuristic_checked) { + inline_config = funcCanInline(callee_func.decl); + probe.inline_heuristic_checked = true; + probe.spc_swap_instance = inline_config.swap_instance; + probe.spc_swap_membase = inline_config.swap_membase; + probe.spc_inline_func = inline_config.can_inline; + } + + if (inline_config.swap_instance) { // push whamm instance onto abstract stack directly + var whamm_instance_addr = Pointer.atObject(callee_func.instance) - Pointer.NULL; + masm.emit_mov_m_l(frame.inlined_instance_slot, whamm_instance_addr); + } + + // overwrite mem0_base with whamm instance's memory base, restore from frame slot later + if (inline_config.swap_membase) { + var memobj_addr = Pointer.atObject(callee_func.instance.memories[0]) - Pointer.NULL; + masm.emit_mov_r_l(regs.mem0_base, i64.view(memobj_addr)); + masm.emit_read_v3_mem_base(regs.mem0_base, regs.mem0_base); + masm.emit_mov_m_r(ValueKind.REF, frame.inlined_mem0_base_slot, regs.mem0_base); + } + } + + if (!inline_config.can_inline) { + state.emitSaveAll(resolver, probeSpillMode); + } else { + new_local_base_sp = int.view(state.sp); + } + for (i < whamm_sig.length) { var slot_tag_addr = masm.tagAddr(state.sp + u32.view(i)); var slot_addr = masm.slotAddr(state.sp + u32.view(i)); var kind: byte; match(whamm_sig[i]) { FrameAccessor => { + if (inline_config.can_inline) state.emitSaveAll(resolver, probeSpillMode); // spill entire value stack. masm.emit_call_runtime_getFrameAccessorMetaRef(); - masm.emit_mov_m_r(ValueKind.REF, slot_addr, xenv.runtime_ret0); + emit_reload_regs(); + if (inline_config.can_inline && !probeSpillMode.free_regs) state.emitRestoreAll(resolver); + + // move result to mem slot or reg, depending on inlining + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.REF); + masm.emit_mov_r_r(ValueKind.REF, reg, xenv.runtime_ret0); + state.push(KIND_REF | IN_REG, reg, 0); + } else { + masm.emit_mov_m_r(ValueKind.REF, slot_addr, xenv.runtime_ret0); + } kind = ValueKind.REF.code; } Val(val) => { var is_v128 = false; var low: u64, high: u64; match (val) { - I31(v) => { low = v; kind = ValueKind.REF.code; } - I32(v) => { low = v; kind = ValueKind.I32.code; } - I64(v) => { low = v; kind = ValueKind.I64.code; } - F32(v) => { low = v; kind = ValueKind.F32.code; } - F64(v) => { low = v; kind = ValueKind.F64.code; } + I31(v) => { + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.REF); + masm.emit_mov_r_i(reg, i32.view(v)); + state.push(KIND_REF | IN_REG, reg, 0); + } + low = v; + kind = ValueKind.REF.code; + } + I32(v) => { + low = v; + if (inline_config.can_inline) state.push(KIND_I32 | IS_CONST, NO_REG, i32.view(v)); + kind = ValueKind.I32.code; + } + I64(v) => { + low = v; + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.I64); + masm.emit_mov_r_l(reg, i64.view(v)); + state.push(KIND_I64 | IN_REG, reg, 0); + } + kind = ValueKind.I64.code; + } + F32(v) => { + low = v; + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.F32); + masm.emit_mov_r_f32(reg, v); + state.push(KIND_F32 | IN_REG, reg, 0); + } + kind = ValueKind.F32.code; + } + F64(v) => { + low = v; + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.F64); + masm.emit_mov_r_d64(reg, v); + state.push(KIND_F64 | IN_REG, reg, 0); + } + kind = ValueKind.F64.code; + } V128(l, h) => { low = l; high = h; is_v128 = true; + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.V128); + masm.emit_mov_r_q(reg, low, high); + state.push(KIND_V128 | IN_REG, reg, 0); + } kind = ValueKind.V128.code; } - Ref(val) => { low = u64.view(Pointer.atObject(val) - Pointer.NULL); kind = ValueKind.REF.code; } + Ref(v) => { + low = u64.view(Pointer.atObject(v) - Pointer.NULL); + if (inline_config.can_inline) { + var reg = allocRegTos(ValueKind.REF); + masm.emit_mov_r_l(reg, i64.view(low)); + state.push(KIND_REF | IN_REG, reg, 0); + } + kind = ValueKind.REF.code; + } } - masm.emit_mov_m_d(slot_addr, low); - if (is_v128) { - masm.emit_mov_m_d(slot_addr.plus(8), high); + if (!inline_config.can_inline) { + masm.emit_mov_m_d(slot_addr, low); + if (is_v128) { + masm.emit_mov_m_d(slot_addr.plus(8), high); + } } } Operand(_, i) => { - var slot = state.sp + u32.view(i) - 1; - kind = state.state[slot].kind().code; - masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(slot)); + var index = orig_sp + u32.view(i) - 1; + if (inline_config.can_inline) { + visit_LOCAL_GET(u31.view(index)); + } else { + masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(index)); + } + kind = state.state[index].kind().code; } Local(_, i) => { - var slot = u32.view(i); - kind = state.state[slot].kind().code; - masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(slot)); + if (inline_config.can_inline) { + visit_LOCAL_GET(u31.view(i)); + } else { + masm.emit_mov_m_m(ValueKind.REF, slot_addr, masm.slotAddr(u32.view(i))); + } + kind = state.state[u31.view(i)].kind().code; } } - masm.emit_mov_m_i(slot_tag_addr, kind); + if (!inline_config.can_inline) { + masm.emit_mov_m_i(slot_tag_addr, kind); + } } var whamm_instance = callee_func.instance; var func_id = callee_func.decl.func_index; + var whamm_module = whamm_instance.module; + var whamm_func_decl = callee_func.decl; + if (inline_config.can_inline) { + var orig_decl = it.func; + var orig_pc = it.pc; + var orig_module = module; + var orig_sig = sig; + + // prepare spc for inlining + this.local_base_sp = u31.view(new_local_base_sp); + this.module = whamm_module; + this.func = whamm_func_decl; + this.sig = whamm_func_decl.sig; + + // inline codegen + it.reset(this.func); + it.dispatchLocalDecls(this); + this.is_inlined = true; + if (Trace.compiler) Trace.OUT.puts("Start compiling inlined whamm probe").ln(); + while (it.more() && success) { + if (Trace.compiler) traceOpcodeAndStack(false); + last_probe = 0; + masm.source_loc = it.pc; + it.dispatch(this); + if (Trace.compiler && Trace.asm) { + OUT.puts("JIT code: "); + masm.printCodeBytes(OUT, codegen_offset, masm.curCodeBytes()); + codegen_offset = masm.curCodeBytes(); + OUT.ln(); + } + unrefRegs(); + if (Debug.compiler) checkRegAlloc(); + it.next(); + } + if (Trace.compiler) Trace.OUT.puts("Finished compiling inlined whamm probe").ln(); + + // restore spc after inlining + it.reset(orig_decl).at(orig_pc); + this.local_base_sp = 0; + this.is_inlined = false; + this.module = orig_module; + this.func = orig_decl; + this.sig = orig_sig; + if (inline_config.swap_membase) { + masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); + } - var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp); - var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg); - var tmp = allocTmp(ValueKind.REF); - - // Load the target code/entrypoint. - masm.emit_mov_r_l(func_reg, Pointer.atObject(whamm_instance.functions[func_id]) - Pointer.NULL); - masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(func_reg, offsets.WasmFunction_decl)); - masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(tmp, offsets.FuncDecl_target_code)); - // adjust vsp_reg to compute the "true" VSP, accounting for args to WhammProbe's WasmFunction - emit_compute_vsp(vsp_reg, state.sp + u32.view(whamm_sig.length)); - // Call to the entrypoint. - masm.emit_call_r(tmp); - emit_unwind_check(); - emit_reload_regs(); - if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver); + // clear callee params/locals from abstract state + dropN(state.sp - orig_sp); + } else { + var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp); + var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg); + var tmp = allocTmp(ValueKind.REF); + + // Load the target code/entrypoint. + masm.emit_mov_r_l(func_reg, Pointer.atObject(whamm_instance.functions[func_id]) - Pointer.NULL); + masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(func_reg, offsets.WasmFunction_decl)); + masm.emit_mov_r_m(ValueKind.REF, tmp, MasmAddr(tmp, offsets.FuncDecl_target_code)); + // adjust vsp_reg to compute the "true" VSP, accounting for args to WhammProbe's WasmFunction + emit_compute_vsp(vsp_reg, state.sp + u32.view(whamm_sig.length)); + // Call to the entrypoint. + masm.emit_call_r(tmp); + emit_reload_regs(); + if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver); + } } def visit_CRASH_EXEC() { @@ -512,35 +688,37 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl setUnreachable(); } def visit_END() { - var ctl_top = state.ctl_stack.peek(); - if (ctl_top.opcode == Opcode.LOOP.code) { - state.ctl_stack.pop(); - if (!ctl_top.reachable) setUnreachable(); - } else if (ctl_top.opcode == Opcode.IF.code) { - // simulate empty if-true block - state.emitFallthru(resolver); - masm.emit_br(ctl_top.label); - masm.bindLabel(ctl_top.else_label); - state.doElse(); - ctl_top.opcode = Opcode.ELSE.code; - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); - state.ctl_stack.pop(); - } else if (ctl_top.opcode == Opcode.BLOCK.code || ctl_top.opcode == Opcode.ELSE.code) { - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); - state.ctl_stack.pop(); - } else if (ctl_top.opcode == Opcode.RETURN.code) { - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); + if (!this.is_inlined) { + var ctl_top = state.ctl_stack.peek(); + if (ctl_top.opcode == Opcode.LOOP.code) { + state.ctl_stack.pop(); + if (!ctl_top.reachable) setUnreachable(); + } else if (ctl_top.opcode == Opcode.IF.code) { + // simulate empty if-true block + state.emitFallthru(resolver); + masm.emit_br(ctl_top.label); + masm.bindLabel(ctl_top.else_label); + state.doElse(); + ctl_top.opcode = Opcode.ELSE.code; + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); + state.ctl_stack.pop(); + } else if (ctl_top.opcode == Opcode.BLOCK.code || ctl_top.opcode == Opcode.ELSE.code) { + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); + state.ctl_stack.pop(); + } else if (ctl_top.opcode == Opcode.RETURN.code) { + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); + emitProbe(); + if (ctl_top.merge_count > 1) emitReturn(ctl_top); + state.ctl_stack.pop(); + } emitProbe(); - if (ctl_top.merge_count > 1) emitReturn(ctl_top); - state.ctl_stack.pop(); } - emitProbe(); } def visit_BR(depth: u31) { var target = state.getControl(depth); @@ -595,7 +773,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Compute the value stack pointer. emit_compute_vsp(vsp_reg, state.sp); - if (func.imp != null) { // A call to imported function must first check for WasmFunction. masm.emit_br_r(func_reg, MasmBrCond.IS_WASM_FUNC, wasmcall_label); @@ -816,6 +993,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl dropN(valcount); } def visit_LOCAL_GET(index: u31) { + index = index + local_base_sp; var lv = state.get(index); if (lv.inReg()) { regAlloc.assign(lv.reg, int.!(state.sp)); @@ -833,6 +1011,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } } def visit_LOCAL_SET(index: u31) { + index = index + local_base_sp; var lv = state.get(index); var sv = state.pop(); if (sv.inReg()) { @@ -857,6 +1036,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } } def visit_LOCAL_TEE(index: u31) { + index = index + local_base_sp; var lv = state.get(index); regAlloc.unassign(lv.reg, index); // unref existing register var sv = state.peek(); @@ -947,7 +1127,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl if (i32.view(val) == val) { state.push(KIND_I64 | IS_CONST, NO_REG, i32.view(val)); } else { - var tos = state.sp; var reg = allocRegTos(ValueKind.I64); masm.emit_mov_r_l(reg, val); state.push(KIND_I64 | IN_REG, reg, 0); @@ -1304,11 +1483,19 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // XXX: recompute VFP from VSP - #slots? masm.emit_mov_r_m(ValueKind.REF, regs.vfp, frame.vfp_slot); if (module.memories.length > 0) { - masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); + if (is_inlined) { + masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.inlined_mem0_base_slot); + } else { + masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); + } } } def emit_load_instance(reg: Reg) { - masm.emit_mov_r_m(ValueKind.REF, reg, frame.instance_slot); + if (is_inlined) { // inline compilation + masm.emit_mov_r_m(ValueKind.REF, reg, frame.inlined_instance_slot); + } else { + masm.emit_mov_r_m(ValueKind.REF, reg, frame.instance_slot); + } } def emitLoad(kind: ValueKind, imm: MemArg, meth: (ValueKind, Reg, Reg, Reg, u32) -> ()) { var base_reg = regs.mem0_base; @@ -1336,8 +1523,29 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var nflags = IN_REG | SpcConsts.kindToFlags(kind); if (kind == ValueKind.I32) nflags |= (iv.flags & TAG_STORED); // tag may already be stored for index state.push(nflags, dest, 0); + + if (intrinsified_read_probe != null) { + // spill everything + state.emitSaveAll(resolver, probeSpillMode); + + // load RT args (addr, val) and call RT + if (index_reg.index == 0) { // fixed addr + index_reg = allocTmp(ValueKind.I32); + masm.emit_mov_r_i(index_reg, i32.!(offset)); + } else { + masm.emit_addw_r_i(index_reg, i32.!(offset)); + } + var arg1 = masm.getV3ParamReg(ValueKind.REF, 1); + var arg2 = masm.getV3ParamReg(ValueKind.REF, 2); + masm.emit_mov_r_r(ValueKind.REF, arg1, index_reg); + masm.emit_mov_r_m(ValueKind.REF, arg2, masm.slotAddr(state.sp - 1)); + masm.emit_call_MemoryReadProbe_fire(intrinsified_read_probe); + emit_reload_regs(); + if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver); + intrinsified_read_probe = null; + } } - def emitStore(kind: ValueKind, imm: MemArg, meth: (ValueKind, Reg, Reg, Reg, u32) -> ()) { + def emitStore(kind: ValueKind, imm: MemArg, meth: (ValueKind, Reg, Reg, Reg, Reg, u32) -> ()) { var base_reg = regs.mem0_base; if (imm.memory_index != 0) { // XXX: cache the base register for memories > 0 @@ -1360,7 +1568,49 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } else { index_reg = popReg().reg; } - meth(kind, sv.reg, base_reg, index_reg, u32.!(offset)); // TODO: memory64 + var mirror_base: Reg; + if (intrinsified_write_probe != null && intrinsified_write_probe.writeMirror != null) { // setup write mirror base + var mirror_mem_addr = Pointer.atObject(intrinsified_write_probe.writeMirror) - Pointer.NULL; + mirror_base = allocTmp(ValueKind.REF); + masm.emit_mov_r_l(mirror_base, mirror_mem_addr); + masm.emit_read_v3_mem_base(mirror_base, mirror_base); + } + meth(kind, sv.reg, base_reg, mirror_base, index_reg, u32.!(offset)); // TODO: memory64 + if (intrinsified_write_probe != null && intrinsified_write_probe.has_fire_probe) { + // temporarily push values back in order to preserve the register values across RT call + state.push((byte.view(sv.kind().tag) << 4) | IN_REG, sv.reg, 0); + // spill everything + state.emitSaveAll(resolver, probeSpillMode); + // load RT args (addr, val) and call RT + if (index_reg.index == 0) { // fixed addr + index_reg = allocTmp(ValueKind.I32); + masm.emit_mov_r_i(index_reg, i32.!(offset)); + } else { + masm.emit_addw_r_i(index_reg, i32.!(offset)); + } + var arg1 = masm.getV3ParamReg(ValueKind.REF, 1); + var arg2 = masm.getV3ParamReg(ValueKind.REF, 2); + masm.emit_mov_r_r(ValueKind.REF, arg1, index_reg); + masm.emit_mov_r_m(ValueKind.REF, arg2, masm.slotAddr(state.sp - 1)); + masm.emit_call_MemoryWriteProbe_fire(intrinsified_write_probe); + emit_reload_regs(); + if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver); + pop(); + } + intrinsified_write_probe = null; + } + + def emitIntrinsifiedMemoryReadProbe() { + // spill everything + state.emitSaveAll(resolver, probeSpillMode); + // compute VSP for potential frame access + emit_compute_vsp(regs.vsp, state.sp); + emit_spill_vsp(regs.vsp); + masm.emit_store_curstack_vsp(regs.vsp); + + + emit_reload_regs(); + if (!probeSpillMode.free_regs) state.emitRestoreAll(resolver); } //==================================================================== @@ -1538,9 +1788,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } def traceOpcode(orig: bool) { OUT.flush(); - var pc = it.pc - start_pos; instrTracer.instr_width = Opcodes.longestName + 1; - instrTracer.putPcAndInstr(OUT, module, func, pc, orig); + instrTracer.putPcAndInstr(OUT, module, func, it.pc, orig); if (Trace.asm) { OUT.puts("JIT code: "); masm.printCodeBytes(OUT, codegen_offset, masm.curCodeBytes()); @@ -2191,4 +2440,32 @@ class MoveNode { var src: MoveNode; // source of the value for this node var dstList: MoveNode; // head of destination list var dstNext: MoveNode; // next in a list of successors -} \ No newline at end of file +} + +// checks function bytecode to see if it can be inlined based on +// simple heuristics: length <= 50 and straightline code. +def funcCanInline(decl: FuncDecl) -> InlineConfig { + var default = InlineConfig(false, false, false); + if (decl.orig_bytecode.length > 50 || decl.sig.params.length > 10) return default; + var bi = BytecodeIterator.new().reset(decl); + var swap_instance = false; + var swap_membase = false; + while (bi.more()) { + var op = bi.current(); + match (op) { + IF, BR, BR_IF, BR_TABLE, BR_ON_NULL, BR_ON_NON_NULL, BR_ON_CAST, BR_ON_CAST_FAIL, RETURN => return default; + THROW, CALL, CALL_INDIRECT, MEMORY_INIT, MEMORY_SIZE, MEMORY_GROW, MEMORY_COPY, MEMORY_FILL, REF_FUNC, DATA_DROP, + ELEM_DROP, TABLE_INIT, TABLE_SIZE, TABLE_COPY, TABLE_GROW, GLOBAL_SET, GLOBAL_GET, TABLE_SET, TABLE_GET => swap_instance = true; + I32_STORE, I64_STORE, F32_STORE, F64_STORE, I32_STORE8, I32_STORE16, I64_STORE8, I64_STORE16, I64_STORE32, + V128_STORE, I32_LOAD, I64_LOAD, F32_LOAD, F64_LOAD, I32_LOAD8_S, I32_LOAD8_U, I32_LOAD16_S, I32_LOAD16_U, + I64_LOAD8_S, I64_LOAD8_U, I64_LOAD16_S, I64_LOAD16_U, I64_LOAD32_S, I64_LOAD32_U, V128_LOAD => { + swap_membase = true; + } + _ => ; + } + bi.next(); + } + return InlineConfig(swap_membase, swap_instance, true); +} + +type InlineConfig(swap_membase: bool, swap_instance: bool, can_inline: bool); diff --git a/src/engine/v3/V3Interpreter.v3 b/src/engine/v3/V3Interpreter.v3 index 4b45c9c4..bc3ded55 100644 --- a/src/engine/v3/V3Interpreter.v3 +++ b/src/engine/v3/V3Interpreter.v3 @@ -206,7 +206,7 @@ class V3Interpreter extends WasmStack { // Read the opcode. var b = codeptr.peek1(); var opcode: Opcode; - if (b == InternalOpcode.PROBE.code) { + if (b == InternalOpcode.PROBE.code || b == InternalOpcode.WHAMM_PROBE.code) { // First local probes. var throwable = Instrumentation.fireLocalProbes(DynamicLoc(func, pc, TargetFrame(frame))); if (throwable != null) { @@ -1599,7 +1599,7 @@ class V3Interpreter extends WasmStack { var module = if(frame.func.instance != null, frame.func.instance.module); var opcode = codeptr.data[codeptr.pos]; if (instrTracer == null) instrTracer = InstrTracer.new(); - if (opcode == InternalOpcode.PROBE.code) { + if (opcode == InternalOpcode.PROBE.code || opcode == InternalOpcode.WHAMM_PROBE.code) { OUT.puts(" "); var prev = (codeptr.data, codeptr.pos, codeptr.limit); codeptr.reset(frame.func.decl.orig_bytecode, prev.1, prev.2); diff --git a/src/engine/x86-64/V3Offsets.v3 b/src/engine/x86-64/V3Offsets.v3 index 966074c2..bb7d848f 100644 --- a/src/engine/x86-64/V3Offsets.v3 +++ b/src/engine/x86-64/V3Offsets.v3 @@ -17,6 +17,7 @@ class V3Offsets { private def acc = X86_64FrameAccessor.new(vs, Pointer.NULL, decl); private def ha = HeapArray.new(null, []); private def cnt = CountProbe.new(); + private def whamm_Probe = WhammProbe.new(null, []); def Function_sig = int.view(Pointer.atField(wf.sig) - Pointer.atObject(wf)); def WasmFunction_instance = int.view(Pointer.atField(wf.instance) - Pointer.atObject(wf)); @@ -29,6 +30,7 @@ class V3Offsets { def FuncDecl_target_code = int.view(Pointer.atField(decl.target_code) - Pointer.atObject(decl)); def FuncDecl_tierup_trigger = int.view(Pointer.atField(decl.tierup_trigger) - Pointer.atObject(decl)); def FuncDecl_entry_probed = int.view(Pointer.atField(decl.entry_probed) - Pointer.atObject(decl)); + def FuncDecl_func_index = int.view(Pointer.atField(decl.func_index) - Pointer.atObject(decl)); def SigDecl_params = int.view(Pointer.atField(sig.params) - Pointer.atObject(sig)); def SigDecl_results = int.view(Pointer.atField(sig.results) - Pointer.atObject(sig)); @@ -39,6 +41,7 @@ class V3Offsets { def Instance_sig_ids = int.view(Pointer.atField(i.sig_ids) - Pointer.atObject(i)); def Instance_dropped_elems = int.view(Pointer.atField(i.dropped_elems) - Pointer.atObject(i)); def Instance_dropped_data = int.view(Pointer.atField(i.dropped_data) - Pointer.atObject(i)); + def Instance_module = int.view(Pointer.atField(i.module) - Pointer.atObject(i)); def Table_funcs = int.view(Pointer.atField(t.funcs) - Pointer.atObject(t)); def Table_elems = int.view(Pointer.atField(t.elems) - Pointer.atObject(t)); @@ -66,7 +69,10 @@ class V3Offsets { def HeapArray_vals = int.view(Pointer.atField(ha.vals) - Pointer.atObject(ha)); def CountProbe_count = int.view(Pointer.atField(cnt.count) - Pointer.atObject(cnt)); + def WhammProbe_trampoline = int.view(Pointer.atField(whamm_Probe.trampoline) - Pointer.atObject(whamm_Probe)); + def WhammProbe_func = int.view(Pointer.atField(whamm_Probe.func) - Pointer.atObject(whamm_Probe)); + def Module_probes = int.view(Pointer.atField(module.probes) - Pointer.atObject(module)); // Try to future-proof for compressed references someday and use REF_SIZE everywhere def REF_SIZE = byte.!(Pointer.atElement(mems, 1) - Pointer.atElement(mems, 0)); def INT_SIZE: byte = 4; diff --git a/src/engine/x86-64/X86_64Interpreter.v3 b/src/engine/x86-64/X86_64Interpreter.v3 index a5690a83..1f03770b 100644 --- a/src/engine/x86-64/X86_64Interpreter.v3 +++ b/src/engine/x86-64/X86_64Interpreter.v3 @@ -2172,6 +2172,37 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) { asm.sub_r_i(origIp, 1); genDispatch0(origIp.indirect(), dispatchTables[0].1, false); } + // specialized handler for whamm probes + if (FastIntTuning.enableWhammProbeTrampoline) { + writeDispatchEntry(dispatchTables[0].1, InternalOpcode.WHAMM_PROBE.code, w.atEnd().pos); { + // call runtime to get the WhammProbe object + computeCurIpFromIp(-1); + computePcFromCurIp(); + saveCallerIVars(); + asm.movq_r_m(r_tmp0, m_wasm_func); + callRuntime(refRuntimeCall(RT.runtime_GET_LOCAL_PROBE), [r_tmp0, r_curpc], false); + + // jump to the trampoline + asm.movq_r_m(r_vsp, m_vsp); + masm.emit_mov_r_m(ValueKind.REF, xenv.func_arg, MasmAddr(xenv.runtime_ret0, masm.offsets.WhammProbe_func)); + masm.emit_mov_r_m(ValueKind.REF, xenv.runtime_ret0, MasmAddr(xenv.runtime_ret0, masm.offsets.WhammProbe_trampoline)); + masm.emit_jump_r(xenv.runtime_ret0); + + // reentry point from the trampoline + ic.header.whammReentryOffset = w.atEnd().pos; + restoreCallerIVars(); + // Compute a pointer to the original code at this pc offset + var pc = r_tmp1; // = IP - CODE + asm.movq_r_r(pc, r_ip); + asm.sub_r_m(pc, m_code); + var origIp = r_tmp0; // FUNC_DECL.orig_bytecode + pc - 1 + asm.movq_r_m(origIp, r_func_decl.plus(offsets.FuncDecl_orig_bytecode)); + + asm.add_r_r(origIp, pc); + asm.sub_r_i(origIp, 1); + genDispatch0(origIp.indirect(), dispatchTables[0].1, false); + } + } } def genGlobalProbeSupport() { var offset = w.atEnd().pos; diff --git a/src/engine/x86-64/X86_64MacroAssembler.v3 b/src/engine/x86-64/X86_64MacroAssembler.v3 index eeaaa376..4591c61f 100644 --- a/src/engine/x86-64/X86_64MacroAssembler.v3 +++ b/src/engine/x86-64/X86_64MacroAssembler.v3 @@ -152,11 +152,11 @@ class X86_64MacroAssembler extends MacroAssembler { var b = G(base), t = handle_large_offset(index, offset); recordCurSourceLoc(); match (kind) { - I32 => asm.movd_r_m(G(dst), X86_64Addr.new(b, t.0, 1, t.1)); - REF, I64 => asm.movq_r_m(G(dst), X86_64Addr.new(b, t.0, 1, t.1)); - F32 => asm.movss_s_m(X(dst), X86_64Addr.new(b, t.0, 1, t.1)); - F64 => asm.movsd_s_m(X(dst), X86_64Addr.new(b, t.0, 1, t.1)); - V128 => asm.movdqu_s_m(X(dst), X86_64Addr.new(b, t.0, 1, t.1)); + I32 => asm.movd_r_m(G(dst), X86_64Addr.new(G(base), t.0, 1, t.1)); + REF, I64 => asm.movq_r_m(G(dst), X86_64Addr.new(G(base), t.0, 1, t.1)); + F32 => asm.movss_s_m(X(dst), X86_64Addr.new(G(base), t.0, 1, t.1)); + F64 => asm.movsd_s_m(X(dst), X86_64Addr.new(G(base), t.0, 1, t.1)); + V128 => asm.movdqu_s_m(X(dst), X86_64Addr.new(G(base), t.0, 1, t.1)); } } def emit_v128_load_lane_r_m(dst: Reg, src: X86_64Addr, asm_mov_r_m: (X86_64Gpr, X86_64Addr) -> T) { @@ -171,25 +171,41 @@ class X86_64MacroAssembler extends MacroAssembler { var t = handle_large_offset(index, offset); return X86_64Addr.new(G(base), t.0, 1, t.1); } - def emit_storeb_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32) { + def emit_storeb_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32) { var t = handle_large_offset(index, offset); recordCurSourceLoc(); + if (mirror_base.index != 0) { + asm.q.movb_m_r(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), G(val)); + } asm.q.movb_m_r(X86_64Addr.new(G(base), t.0, 1, t.1), G(val)); } - def emit_storew_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32) { + def emit_storew_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32) { var t = handle_large_offset(index, offset); recordCurSourceLoc(); + if (mirror_base.index != 0) { + asm.q.movw_m_r(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), G(val)); + } asm.q.movw_m_r(X86_64Addr.new(G(base), t.0, 1, t.1), G(val)); } - def emit_store_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, index: Reg, offset: u32) { + def emit_store_r_r_r_i(kind: ValueKind, val: Reg, base: Reg, mirror_base: Reg, index: Reg, offset: u32) { var b = G(base), t = handle_large_offset(index, offset); recordCurSourceLoc(); + var addr = X86_64Addr.new(G(base), t.0, 1, t.1); + if (mirror_base.index != 0) { + match (kind) { + I32 => asm.movd_m_r(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), G(val)); + REF, I64 => asm.movq_m_r(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), G(val)); + F32 => asm.movss_m_s(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), X(val)); + F64 => asm.movsd_m_s(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), X(val)); + V128 => asm.movdqu_m_s(X86_64Addr.new(G(mirror_base), t.0, 1, t.1), X(val)); + } + } match (kind) { - I32 => asm.movd_m_r(X86_64Addr.new(b, t.0, 1, t.1), G(val)); - REF, I64 => asm.movq_m_r(X86_64Addr.new(b, t.0, 1, t.1), G(val)); - F32 => asm.movss_m_s(X86_64Addr.new(b, t.0, 1, t.1), X(val)); - F64 => asm.movsd_m_s(X86_64Addr.new(b, t.0, 1, t.1), X(val)); - V128 => asm.movdqu_m_s(X86_64Addr.new(b, t.0, 1, t.1), X(val)); + I32 => asm.movd_m_r(X86_64Addr.new(G(base), t.0, 1, t.1), G(val)); + REF, I64 => asm.movq_m_r(X86_64Addr.new(G(base), t.0, 1, t.1), G(val)); + F32 => asm.movss_m_s(X86_64Addr.new(G(base), t.0, 1, t.1), X(val)); + F64 => asm.movsd_m_s(X86_64Addr.new(G(base), t.0, 1, t.1), X(val)); + V128 => asm.movdqu_m_s(X86_64Addr.new(G(base), t.0, 1, t.1), X(val)); } } @@ -723,6 +739,22 @@ class X86_64MacroAssembler extends MacroAssembler { asm.icall_r(scratch); recordRetSourceLoc(); } + def emit_call_MemoryReadProbe_fire(probe: MemoryReadProbe) { + var codePtr = CiRuntime.unpackClosure(probe.fire_probe).0; + var refOffset = asm.movq_r_p(Target.V3_PARAM_GPRS[0], Pointer.atObject(probe) - Pointer.NULL); + addEmbeddedRefOffset(refOffset); + asm.movq_r_l(scratch, codePtr - Pointer.NULL); // XXX: make direct call to runtime if within 2GB + asm.icall_r(scratch); + recordRetSourceLoc(); + } + def emit_call_MemoryWriteProbe_fire(probe: MemoryWriteProbe) { + var codePtr = CiRuntime.unpackClosure(probe.fire_probe).0; + var refOffset = asm.movq_r_p(Target.V3_PARAM_GPRS[0], Pointer.atObject(probe) - Pointer.NULL); + addEmbeddedRefOffset(refOffset); + asm.movq_r_l(scratch, codePtr - Pointer.NULL); // XXX: make direct call to runtime if within 2GB + asm.icall_r(scratch); + recordRetSourceLoc(); + } def emit_call_HostCallStub() { var ic = X86_64PreGenStubs.getInterpreterCode(); asm.movq_r_l(scratch, (ic.start + ic.header.hostCallStubOffset) - Pointer.NULL); diff --git a/src/engine/x86-64/X86_64MasmRegs.v3 b/src/engine/x86-64/X86_64MasmRegs.v3 index ce5b2717..f4eb4e82 100644 --- a/src/engine/x86-64/X86_64MasmRegs.v3 +++ b/src/engine/x86-64/X86_64MasmRegs.v3 @@ -145,18 +145,17 @@ component X86_64MasmRegs { def m = MasmAddr(xspc.sp, _); - xint.accessor_slot = xspc.accessor_slot = m(X86_64InterpreterFrame.accessor.offset); - xint.instance_slot = xspc.instance_slot = m(X86_64InterpreterFrame.instance.offset); - xint.mem0_base_slot = xspc.mem0_base_slot = m(X86_64InterpreterFrame.mem0_base.offset); - xint.pc_slot = xspc.pc_slot = m(X86_64InterpreterFrame.curpc.offset); - xint.vfp_slot = xspc.vfp_slot = m(X86_64InterpreterFrame.vfp.offset); - xint.vsp_slot = xspc.vsp_slot = m(X86_64InterpreterFrame.vsp.offset); - xint.wasm_func_slot = xspc.wasm_func_slot = m(X86_64InterpreterFrame.wasm_func.offset); - - xint.func_decl_slot = m(X86_64InterpreterFrame.func_decl.offset); - xint.ip_slot = m(X86_64InterpreterFrame.ip.offset); - xint.stp_slot = m(X86_64InterpreterFrame.stp.offset); - xint.wasm_func_slot = m(X86_64InterpreterFrame.wasm_func.offset); + xint.accessor_slot = xspc.accessor_slot = m(X86_64InterpreterFrame.accessor.offset); + xint.instance_slot = xspc.instance_slot = m(X86_64InterpreterFrame.instance.offset); + xint.mem0_base_slot = xspc.mem0_base_slot = m(X86_64InterpreterFrame.mem0_base.offset); + xint.pc_slot = xspc.pc_slot = m(X86_64InterpreterFrame.curpc.offset); + xint.vfp_slot = xspc.vfp_slot = m(X86_64InterpreterFrame.vfp.offset); + xint.vsp_slot = xspc.vsp_slot = m(X86_64InterpreterFrame.vsp.offset); + xint.wasm_func_slot = xspc.wasm_func_slot = m(X86_64InterpreterFrame.wasm_func.offset); + xint.ip_slot = xspc.inlined_mem0_base_slot = m(X86_64InterpreterFrame.ip.offset); + xint.stp_slot = xspc.inlined_instance_slot = m(X86_64InterpreterFrame.stp.offset); + + xint.func_decl_slot = m(X86_64InterpreterFrame.func_decl.offset); xint.code_slot = m(X86_64InterpreterFrame.code.offset); xint.eip_slot = m(X86_64InterpreterFrame.eip.offset); diff --git a/src/engine/x86-64/X86_64PreGenStubs.v3 b/src/engine/x86-64/X86_64PreGenStubs.v3 index 047a4278..58312545 100644 --- a/src/engine/x86-64/X86_64PreGenStubs.v3 +++ b/src/engine/x86-64/X86_64PreGenStubs.v3 @@ -29,6 +29,7 @@ layout X86_64PreGenHeader { +40 oobMemoryHandlerOffset: i32; // handler for signals caused by OOB memory access +44 divZeroHandlerOffset: i32; // handler for signals caused by divide by zero +48 stackOverflowHandlerOffset: i32; // handler for signals caused by (value- or call-) stack overflow + +52 whammReentryOffset: i32; +56 hostCallStubOffset: i32; // host call stub that calls runtime +60 hostCallStubEnd: i32; // host call stub that calls runtime +64 codeEnd: i32; // end of all executable code diff --git a/src/engine/x86-64/X86_64Runtime.v3 b/src/engine/x86-64/X86_64Runtime.v3 index 8fb0db24..bb32e82e 100644 --- a/src/engine/x86-64/X86_64Runtime.v3 +++ b/src/engine/x86-64/X86_64Runtime.v3 @@ -106,6 +106,9 @@ component X86_64Runtime { if (ret != null) return stack.throw(ret); return ret; } + def runtime_GET_LOCAL_PROBE(func: WasmFunction, pc: int) -> Probe { + return func.instance.module.probes[func.decl.func_index][pc]; + } def runtime_getFrameAccessorMetaRef() -> FrameAccessorRef { var rsp = CiRuntime.callerSp(); var frame = TargetFrame(rsp); diff --git a/src/engine/x86-64/X86_64Target.v3 b/src/engine/x86-64/X86_64Target.v3 index 4a6794f2..fd2fc4d0 100644 --- a/src/engine/x86-64/X86_64Target.v3 +++ b/src/engine/x86-64/X86_64Target.v3 @@ -158,6 +158,10 @@ class X86_64ExecutionStrategy extends ExecutionStrategy { // One tier: fast-int, modules require no pre-processing. class X86_64InterpreterOnlyStrategy extends X86_64ExecutionStrategy { + def call(func: Function, args: Range) -> Result { + return X86_64StackManager.runOnFreshStack(func, args); + } + def onFuncValidationFinish(module: Module, func: FuncDecl, err: ErrorGen) { if (err != null && !err.ok()) return; Target.setUnconditionalInterpreterEntryIfMultiTier(func); @@ -165,6 +169,11 @@ class X86_64InterpreterOnlyStrategy extends X86_64ExecutionStrategy { def onNewFunction(wf: WasmFunction, err: ErrorGen) { Target.setUnconditionalInterpreterEntryIfMultiTier(wf.decl); } + + def onFuncProbeInsert1(module: Module, func: FuncDecl, offset: int, p: Probe) { + if (FastIntTuning.enableWhammProbeTrampoline && WhammProbe.?(p)) + X86_64WhammTrampoline.genSingleProbe(WhammProbe.!(p), X86_64PreGenStubs.getInterpreterCode()); + } } // Base class of all strategies that use SPC. diff --git a/src/engine/x86-64/X86_64WhammProbeTrampoline.v3 b/src/engine/x86-64/X86_64WhammProbeTrampoline.v3 new file mode 100644 index 00000000..e921cae3 --- /dev/null +++ b/src/engine/x86-64/X86_64WhammProbeTrampoline.v3 @@ -0,0 +1,124 @@ +def env = X86_64MasmRegs.INT_EXEC_ENV; +def PAGE_SIZE_i: int = 4096; +def default_target_code: X86_64SpcModuleCode; +var trampoline_code: X86_64SpcModuleCode; +var trampoline_entries: List<(Array, Pointer)>; + +component X86_64WhammTrampoline { + def genSingleProbe(probe: WhammProbe, ic: X86_64InterpreterCode) { + if (trampoline_code == default_target_code) allocateCodeForTrampoline(); + var whamm_sig = probe.sig; + var entry_ptr = getEntry(whamm_sig); + if (entry_ptr == Pointer.NULL) { + var w = DataWriter.new(); + var masm = X86_64MacroAssembler.new(w, X86_64MasmRegs.CONFIG); + var valuerep = masm.valuerep; + var offsets = masm.getOffsets(); + for (i < whamm_sig.length) { + var slot_tag_addr = MasmAddr(env.vsp, i * valuerep.slot_size); + var slot_addr = MasmAddr(env.vsp, i * valuerep.slot_size + valuerep.tag_size); + match(whamm_sig[i]) { + FrameAccessor => { + masm.emit_call_runtime_getFrameAccessorMetaRef(); + masm.emit_mov_m_r(ValueKind.REF, slot_addr, env.runtime_ret0); + masm.emit_mov_m_i(slot_tag_addr, ValueKind.REF.code); + } + Val(val) => { + var kind: byte; + var is_v128 = false; + var low: u64, high: u64; + match (val) { + I31(v) => { low = v; kind = ValueKind.REF.code; } + I32(v) => { low = v; kind = ValueKind.I32.code; } + I64(v) => { low = v; kind = ValueKind.I64.code; } + F32(v) => { low = v; kind = ValueKind.F32.code; } + F64(v) => { low = v; kind = ValueKind.F64.code; } + V128(l, h) => { + low = l; + high = h; + is_v128 = true; + kind = ValueKind.V128.code; + } + Ref(val) => { low = u64.view(Pointer.atObject(val) - Pointer.NULL); kind = ValueKind.REF.code; } + } + masm.emit_mov_m_d(slot_addr, low); + if (is_v128) { + masm.emit_mov_m_d(slot_addr.plus(8), high); + } + masm.emit_mov_m_i(slot_tag_addr, kind); + } + Operand(_, i) => { + var src_addr = MasmAddr(env.vsp, (i - 1) * valuerep.slot_size + valuerep.tag_size); + var src_tag_addr = MasmAddr(env.vsp, (i - 1) * valuerep.slot_size); + masm.emit_mov_m_m(ValueKind.REF, slot_addr, src_addr); + masm.emit_mov_m_m(ValueKind.REF, slot_tag_addr, src_tag_addr); + } + Local(_, i) => { + var src_addr = MasmAddr(env.vfp, i * valuerep.slot_size + valuerep.tag_size); + var src_tag_addr = MasmAddr(env.vfp, i * valuerep.slot_size); + masm.emit_mov_m_m(ValueKind.REF, slot_addr, src_addr); + masm.emit_mov_m_m(ValueKind.REF, slot_tag_addr, src_tag_addr); + } + } + } + // update vsp and call the probe function within interpreter + masm.emit_addw_r_i(env.vsp, whamm_sig.length * valuerep.slot_size); + masm.asm.movq_r_l(masm.scratch, (ic.start + ic.header.intIntEntryOffset) - Pointer.NULL); + masm.asm.icall_r(masm.scratch); + + // jump back to whamm probe handler + masm.emit_mov_r_l(env.tmp0, (ic.start + ic.header.whammReentryOffset) - Pointer.NULL); + masm.emit_jump_r(env.tmp0); + var addr = setTrampolineCode(masm); + trampoline_entries = List<(Array, Pointer)>.new((whamm_sig, addr), trampoline_entries); + entry_ptr = addr; + } + probe.trampoline = TargetCode(entry_ptr); + } +} + +def getEntry(sig: Array) -> Pointer { + for (entry = trampoline_entries; entry != null; entry = entry.tail) { + if (sigArraysMatch(entry.head.0, sig)) { + return entry.head.1; + } + } + return Pointer.NULL; +} + +def sigArraysMatch(x: Array, y: Array) -> bool { + if (x == y) return true; + if (x.length != y.length) return false; + for (i < x.length) if (!x[i].equal(y[i])) return false; + return true; +} + +def allocateCodeForTrampoline() { + // Allocate 10 pages to ensure that we have enough space for all trampoline code. + var code_size = PAGE_SIZE_i * 10; + // Allocate a read/write/execute mapping for code. + var mapping = Mmap.reserve(u64.!(code_size), Mmap.PROT_WRITE | Mmap.PROT_READ | Mmap.PROT_EXEC); + var code = X86_64SpcModuleCode.new(mapping); + RiRuntime.registerUserCode(code); + code.keepAlive(); + if (Trace.compiler) Trace.OUT.put2("Reserved 0x%x ... 0x%x for WhammProbe trampoline jit code", + (mapping.range.start - Pointer.NULL), (mapping.range.end - Pointer.NULL)).ln(); + trampoline_code = code; +} + +def setTrampolineCode(masm: X86_64MacroAssembler) -> Pointer { + var addr = trampoline_code.appendCode(masm); + var end = addr + masm.w.atEnd().pos; + if (Trace.compiler) { + Trace.OUT.put1("Single WhammProbe trampoline code: break *0x%x", addr - Pointer.NULL) + .put2(" disass 0x%x, 0x%x", addr - Pointer.NULL, end - Pointer.NULL).ln(); + var cur_byte = addr; + Trace.OUT.puts("JIT code: "); + while (cur_byte < end) { + Trace.OUT.put1("%x ", cur_byte.load()); + cur_byte++; + } + Trace.OUT.ln(); + } + return addr; +} \ No newline at end of file diff --git a/src/monitors/MemAccessMonitor.v3 b/src/monitors/MemAccessMonitor.v3 new file mode 100644 index 00000000..a2b38a02 --- /dev/null +++ b/src/monitors/MemAccessMonitor.v3 @@ -0,0 +1,71 @@ +// Copyright 2024 Wizard Authors. All rights reserved. +// See LICENSE for details of Apache 2.0 license. + +def monitor_ = MonitorRegistry.add( + "mem-access", "tracks memory access pattern.", + MemAccessMonitor.new()); + + +class MemAccessMonitor extends Monitor { + def bi = BytecodeIterator.new(); + var block_access: HashMap; + var mod: Module; + + def onParse(module: Module, err: ErrorGen) { + mod = module; + block_access = HashMap.new(int.!, int.==); + MemAccessBytecodeInstrumenter.new(block_access, module).run(); + } +} + +private class MemAccessBytecodeInstrumenter extends BytecodeInstrumenter { + def var block_access: HashMap; + + new(block_access: HashMap, module: Module) super(module) { + this.block_access = block_access; + } + + def visitLoad(op: Opcode, imm: MemArg, size: u8) { + if (op == Opcode.I32_LOAD || op == Opcode.I64_LOAD) { + var bi = this.bi; + insertProbeHere(AccessReadProbe.new(this.block_access, imm, size)); + } + } + def visitStore(op: Opcode, imm: MemArg, size: u8) { + if (op == Opcode.I32_STORE || op == Opcode.I64_STORE) { + var bi = this.bi; + insertProbeHere(AccessWriteProbe.new(this.block_access, imm, size)); + } + } +} + +private class AccessWriteProbe extends MemoryWriteProbe { + def var block_access: HashMap; + + new(block_access: HashMap, imm: MemArg, size: u8) { + this.imm = imm; + this.size = size; + this.block_access = block_access; + this.has_fire_probe = true; + } + + def fire_probe(addr: u64, val: u64, unused_val: u64) { + var block = int.!(addr / 10000); + block_access[block]++; + } +} + +private class AccessReadProbe extends MemoryReadProbe { + def var block_access: HashMap; + + new(block_access: HashMap, imm: MemArg, size: u8) { + this.imm = imm; + this.size = size; + this.block_access = block_access; + } + + def fire_probe(addr: u64, val: u64, unused_val: u64) { + var block = int.!(addr / 10000); + block_access[block]++; + } +} diff --git a/src/monitors/R3Monitor.v3 b/src/monitors/R3Monitor.v3 index 2daa4ec0..96e4bd78 100644 --- a/src/monitors/R3Monitor.v3 +++ b/src/monitors/R3Monitor.v3 @@ -26,8 +26,6 @@ class R3Monitor extends Monitor { var mm = ModuleInstrumenter.new(module); R3MonitorBytecodeInstrumenter.new(handler, module).runMatching(filterReplayFunctions); mm.forEachFuncMatching(filterReplayFunctions, instrumentFunctionEnter); - mm.beforeMemReadMatching(filterReplayFunctions, handler.onMemoryRead); - mm.beforeMemWriteMatching(filterReplayFunctions, handler.onMemoryWrite); mm.beforeMemGrowMatching(filterReplayFunctions, handler.onMemoryGrow); } @@ -62,6 +60,14 @@ private class R3MonitorBytecodeInstrumenter extends BytecodeInstrumenter { handler = handler; } + def visitLoad(op: Opcode, imm: MemArg, size: u8) { + checkCallReturnAndInsertProbe(R3MemoryReadProbe.new(handler, imm, size)); + } + + def visitStore(op: Opcode, imm: MemArg, size: u8) { + checkCallReturnAndInsertProbe(R3MemoryWriteProbe.new(handler, imm, size)); + } + def visitOp(op: Opcode) { if (last_op_is_call) { insertProbeHere(ReturnProbe.new(handler)); @@ -613,6 +619,54 @@ private class MemoryFillProbe(handler: EventHandler, mem_index: u31) extends Pro } } +private class R3MemoryWriteProbe extends MemoryWriteProbe { + def var handler: EventHandler; + + new(handler: EventHandler, imm: MemArg, size: u8) { + this.imm = imm; + this.size = size; + this.handler = handler; + this.writeMirror = handler.shadowMems[imm.memory_index]; + this.has_fire_probe = false; + } +} + +private class R3MemoryReadProbe extends MemoryReadProbe { + def var handler: EventHandler; + + new(handler: EventHandler, imm: MemArg, size: u8) { + this.imm = imm; + this.size = size; + this.handler = handler; + } + + def fire_probe(addr: u64, val_lower: u64, val_upper: u64) { + var size = u32.view(size); + var shadow_mem = handler.shadowMems[imm.memory_index]; + var shadow_ptr = shadow_mem.range_ol_64(addr, size).result; + var shadow_data_lo: u64, shadow_data_hi: u64; + match (size) { + 1 => shadow_data_lo = DataReaders.read_range_u8(shadow_ptr); + 2 => shadow_data_lo = DataReaders.read_range_u16(shadow_ptr); + 4 => shadow_data_lo = DataReaders.read_range_u32(shadow_ptr); + 8 => shadow_data_lo = DataReaders.read_range_u64(shadow_ptr); + 16 => { + var shadow_result = DataReaders.read_range_u128(shadow_ptr); + shadow_data_lo = shadow_result.0; + shadow_data_hi = shadow_result.1; + } + _ => ; + } + var val_range = Array.new(16); + DataWriters.write_range_u64(val_range, val_lower); + DataWriters.write_range_u64(val_range[8 ... 16], val_upper); + if (shadow_data_lo != val_lower || (size == 16 && (shadow_data_hi != val_upper))) { + handler.trace.put(WasmEvent.Load(imm.memory_index, addr, Arrays.range(val_range, 0, int.view(size)))); + shadow_mem.copyIn(u32.view(addr), val_range, 0, size); + } + } +} + private type CallStackEntry(func_id: int, kind: CallKind); private enum CallKind {INT, EXT} diff --git a/src/util/BasicTracing.v3 b/src/util/BasicTracing.v3 index e20bf859..21a87a8d 100644 --- a/src/util/BasicTracing.v3 +++ b/src/util/BasicTracing.v3 @@ -52,7 +52,7 @@ class TraceInstrProbe extends Probe { var opcode = codeptr.data[codeptr.pos]; OUT.mark(); - if (opcode == InternalOpcode.PROBE.code) { + if (opcode == InternalOpcode.PROBE.code || opcode == InternalOpcode.WHAMM_PROBE.code) { OUT.puts(" "); var prev = (codeptr.data, codeptr.pos, codeptr.limit); codeptr.reset(func.decl.orig_bytecode, prev.1, prev.2); diff --git a/src/util/ProbeUtil.v3 b/src/util/ProbeUtil.v3 index a9877f26..d9668f8e 100644 --- a/src/util/ProbeUtil.v3 +++ b/src/util/ProbeUtil.v3 @@ -103,3 +103,87 @@ class ExternalDebuggerBreakpointProbe extends Probe { return Resumption.Continue; // TODO: currently only has effect in SPC code } } + +// A probe that instruments memory read, intrinsified by SPC. +class MemoryReadProbe extends Probe { + var imm: MemArg; + var size: u8; + // fallback invocation when not intrinsified. + def fire(dynamicLoc: DynamicLoc) -> Resumption { + var accessor = dynamicLoc.frame.getFrameAccessor(); + var address: u64 = Values.unbox_u(accessor.getTopOfStack()); + address += imm.offset; + var memory = dynamicLoc.func.instance.memories[imm.memory_index]; + var size = u32.view(size); + var mem_ptr_trap = memory.range_ol_64(address, size); + if (mem_ptr_trap.trapped()) + return Resumption.Trap(mem_ptr_trap.reason, true); + var low: u64, high: u64; + var mem_ptr = mem_ptr_trap.result; + match (size) { + 1 => { + low = DataReaders.read_range_u8(mem_ptr); + } + 2 => { + low = DataReaders.read_range_u16(mem_ptr); + } + 4 => { + low = DataReaders.read_range_u32(mem_ptr); + } + 8 => { + low = DataReaders.read_range_u64(mem_ptr); + } + 16 => { + var data_result = DataReaders.read_range_u128(mem_ptr); + low = data_result.0; + high = data_result.1; + } + _ => ; + } + fire_probe(address, low, high); + return Resumption.Continue; + } + def fire_probe(addr: u64, val_lower: u64, val_upper: u64) {} +} + +// A probe that instruments memory write, intrinsified by SPC. +// if writeMirror is set, any probed writes to the main memory will also be performed on this mirror. +class MemoryWriteProbe extends Probe { + var imm: MemArg, size: u8, writeMirror: Memory, has_fire_probe: bool; + + // fallback invocation when not intrinsified. + def fire(dynamicLoc: DynamicLoc) -> Resumption { + var accessor = dynamicLoc.frame.getFrameAccessor(); + var address: u64 = Values.unbox_u(accessor.getOperand(-1)); + address += imm.offset; + var size = u32.view(size); + var newval_lower: u64, newval_upper: u64; + match (accessor.getTopOfStack()) { + I32(v) => newval_lower = v; + I64(v) => newval_lower = v; + F32(v) => newval_lower = v; + F64(v) => newval_lower = v; + V128(low, high) => { + newval_lower = low; + newval_upper = high; + } + _ => ; + } + // apply write mirroring + if (writeMirror != null) { + var mirror_ptr = writeMirror.range_ol_64(address, size); + if (mirror_ptr.ok()) { + match (size) { + 1 => DataWriters.write_range_u8(mirror_ptr.result, u8.view(newval_lower)); + 2 => DataWriters.write_range_u16(mirror_ptr.result, u16.view(newval_lower)); + 4 => DataWriters.write_range_u32(mirror_ptr.result, u32.view(newval_lower)); + 8 => DataWriters.write_range_u64(mirror_ptr.result, newval_lower); + 16 => DataWriters.write_range_u128(mirror_ptr.result, (newval_lower, newval_upper)); + } + } + } + fire_probe(address, newval_lower, newval_upper); + return Resumption.Continue; + } + def fire_probe(addr: u64, newval_lower: u64, newval_upper: u64) {} +} \ No newline at end of file diff --git a/src/util/Whamm.v3 b/src/util/Whamm.v3 index 3f4f2d7e..9600a8f3 100644 --- a/src/util/Whamm.v3 +++ b/src/util/Whamm.v3 @@ -41,6 +41,25 @@ type WhammArg { case Val(v: Value); case Operand(t: ValueType, i: int); case Local(t: ValueType, i: int); + + def equal(that: WhammArg) -> bool { + if (this == that) return true; + if (WhammArg.FrameAccessor.?(this) == WhammArg.FrameAccessor.?(that)) return true; + if (WhammArg.Val.?(this) == WhammArg.Val.?(that)) { + return WhammArg.Val.!(this).v.equal(WhammArg.Val.!(that).v); + } + if (WhammArg.Operand.?(this) == WhammArg.Operand.?(that)) { + var a = WhammArg.Operand.!(this); + var b = WhammArg.Operand.!(that); + return ValueTypes.kind(a.t).code == ValueTypes.kind(b.t).code && a.i == b.i; + } + if (WhammArg.Local.?(this) == WhammArg.Local.?(that)) { + var a = WhammArg.Local.!(this); + var b = WhammArg.Local.!(that); + return ValueTypes.kind(a.t).code == ValueTypes.kind(b.t).code && a.i == b.i; + } + return false; + } } class WhammPredicate(call: WhammParam.Call) { } @@ -113,6 +132,7 @@ component Whamm { // A probe that adapts a Wasm function to be called by the engine-internal probing mechanism. class WhammProbe(func: Function, sig: Array) extends Probe { + var trampoline: TargetCode; // properties set by the spc to make inlining optimization decisions. var inline_heuristic_checked = false; var spc_inline_func = false;