diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index df9692e35c6d78..65e623a4a03104 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -2884,6 +2884,22 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * * this/static * ret/void * 16 arguments -> 64 functions. */ +#if HOST_BROWSER +/* + * For the jiterpreter, we want to record a hit count for interp_entry wrappers that can + * be jitted, but not for ones that can't. As a result we need to put this in its own + * macro instead of in INTERP_ENTRY_BASE, so that the generic wrappers don't have to + * call it on every invocation. + * Once this gets called a few hundred times, the wrapper will be jitted so we'll stop + * paying the cost of the hit counter and the entry will become faster. + */ +#define INTERP_ENTRY_UPDATE_HIT_COUNT(_method) \ + if (mono_opt_jiterpreter_interp_entry_enabled) \ + mono_interp_record_interp_entry (_method) +#else +#define INTERP_ENTRY_UPDATE_HIT_COUNT(_method) +#endif + #define INTERP_ENTRY_BASE(_method, _this_arg, _res) \ InterpEntryData data; \ (data).rmethod = (_method); \ @@ -2891,30 +2907,34 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * (data).this_arg = (_this_arg); \ (data).many_args = NULL; +#define INTERP_ENTRY_BASE_WITH_HIT_COUNT(_method, _this_arg, _res) \ + INTERP_ENTRY_BASE (_method, _this_arg, _res) \ + INTERP_ENTRY_UPDATE_HIT_COUNT (_method); + #define INTERP_ENTRY0(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ interp_entry (&data); \ } #define INTERP_ENTRY1(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ interp_entry (&data); \ } #define INTERP_ENTRY2(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ interp_entry (&data); \ } #define INTERP_ENTRY3(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ interp_entry (&data); \ } #define INTERP_ENTRY4(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ @@ -2922,7 +2942,7 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * interp_entry (&data); \ } #define INTERP_ENTRY5(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ @@ -2931,7 +2951,7 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * interp_entry (&data); \ } #define INTERP_ENTRY6(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ @@ -2941,7 +2961,7 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * interp_entry (&data); \ } #define INTERP_ENTRY7(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ @@ -2952,7 +2972,7 @@ init_arglist (InterpFrame *frame, MonoMethodSignature *sig, stackval *sp, char * interp_entry (&data); \ } #define INTERP_ENTRY8(_this_arg, _res, _method) { \ - INTERP_ENTRY_BASE (_method, _this_arg, _res); \ + INTERP_ENTRY_BASE_WITH_HIT_COUNT (_method, _this_arg, _res); \ (data).args [0] = arg1; \ (data).args [1] = arg2; \ (data).args [2] = arg3; \ diff --git a/src/mono/mono/mini/interp/jiterpreter.h b/src/mono/mono/mini/interp/jiterpreter.h index 6856b6d58a4be0..689e2c4238d52c 100644 --- a/src/mono/mono/mini/interp/jiterpreter.h +++ b/src/mono/mono/mini/interp/jiterpreter.h @@ -41,6 +41,9 @@ jiterp_insert_entry_points (void *td); void mono_jiterp_register_jit_call_thunk (void *cinfo, WasmJitCallThunk thunk); +extern void +mono_interp_record_interp_entry (void *fn_ptr); + // jiterpreter-interp-entry.ts // HACK: Pass void* so that this header can include safely in files without definition for InterpMethod extern gpointer diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index bf9cc2c890a5fb..5f8ca2ac67f4f4 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -103,14 +103,21 @@ DEFINE_BOOL(jiterpreter_estimate_heat, "jiterpreter-estimate-heat", FALSE, "Main DEFINE_BOOL(jiterpreter_count_bailouts, "jiterpreter-count-bailouts", FALSE, "Maintain accurate count of all trace bailouts based on cause") // Dump the wasm blob for all compiled traces DEFINE_BOOL(jiterpreter_dump_traces, "jiterpreter-dump-traces", FALSE, "Dump the wasm blob for all compiled traces to the console") +// Use runtime imports for pointer constants +// Currently reduces performance significantly :( +DEFINE_BOOL(jiterpreter_use_constants, "jiterpreter-use-constants", FALSE, "Use runtime imports for pointer constants") // any trace that doesn't have at least this many meaningful (non-nop) opcodes in it will be rejected DEFINE_INT(jiterpreter_minimum_trace_length, "jiterpreter-minimum-trace-length", 8, "Reject traces shorter than this number of meaningful opcodes") // once a trace entry point is inserted, we only actually JIT code for it once it's been hit this many times -DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-count", 10000, "JIT trace entry points once they are hit this many times") +DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-count", 5000, "JIT trace entry points once they are hit this many times") // After a do_jit_call call site is hit this many times, we will queue it to be jitted -DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 3000, "Queue specialized do_jit_call trampoline for JIT after this many hits") +DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 2000, "Queue specialized do_jit_call trampoline for JIT after this many hits") // After a do_jit_call call site is hit this many times without being jitted, we will flush the JIT queue -DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 10000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") +DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 8000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") +// After a generic interp_entry wrapper is hit this many times, we will queue it to be jitted +DEFINE_INT(jiterpreter_interp_entry_trampoline_hit_count, "jiterpreter-interp-entry-hit-count", 250, "Queue specialized interp_entry wrapper for JIT after this many hits") +// After a generic interp_entry wrapper is hit this many times without being jitted, we will flush the JIT queue +DEFINE_INT(jiterpreter_interp_entry_queue_flush_threshold, "jiterpreter-interp-entry-queue-flush-threshold", 1000, "Flush the interp_entry JIT queue after an unJITted call site has this many hits") #endif // HOST_BROWSER /* Cleanup */ diff --git a/src/mono/wasm/runtime/es6/dotnet.es6.lib.js b/src/mono/wasm/runtime/es6/dotnet.es6.lib.js index 929887d1b06860..fb184f0533ff08 100644 --- a/src/mono/wasm/runtime/es6/dotnet.es6.lib.js +++ b/src/mono/wasm/runtime/es6/dotnet.es6.lib.js @@ -80,6 +80,7 @@ const linked_functions = [ // jiterpreter.c / interp.c / transform.c "mono_interp_tier_prepare_jiterpreter", + "mono_interp_record_interp_entry", "mono_interp_jit_wasm_entry_trampoline", "mono_interp_jit_wasm_jit_call_trampoline", "mono_interp_invoke_wasm_jit_call_trampoline", diff --git a/src/mono/wasm/runtime/exports-linker.ts b/src/mono/wasm/runtime/exports-linker.ts index 4f3b2ce45f6e18..8329035ae71bfa 100644 --- a/src/mono/wasm/runtime/exports-linker.ts +++ b/src/mono/wasm/runtime/exports-linker.ts @@ -8,7 +8,7 @@ import { mono_wasm_load_icu_data, mono_wasm_get_icudt_name } from "./icu"; import { mono_wasm_bind_cs_function } from "./invoke-cs"; import { mono_wasm_bind_js_function, mono_wasm_invoke_bound_function, mono_wasm_invoke_import } from "./invoke-js"; import { mono_interp_tier_prepare_jiterpreter } from "./jiterpreter"; -import { mono_interp_jit_wasm_entry_trampoline } from "./jiterpreter-interp-entry"; +import { mono_interp_jit_wasm_entry_trampoline, mono_interp_record_interp_entry } from "./jiterpreter-interp-entry"; import { mono_interp_jit_wasm_jit_call_trampoline, mono_interp_invoke_wasm_jit_call_trampoline, mono_interp_flush_jitcall_queue, mono_jiterp_do_jit_call_indirect } from "./jiterpreter-jit-call"; import { mono_wasm_typed_array_from_ref } from "./net6-legacy/buffers"; import { @@ -57,6 +57,7 @@ export function export_linker(): any { // interp.c and jiterpreter.c mono_interp_tier_prepare_jiterpreter, + mono_interp_record_interp_entry, mono_interp_jit_wasm_entry_trampoline, mono_interp_jit_wasm_jit_call_trampoline, mono_interp_invoke_wasm_jit_call_trampoline, diff --git a/src/mono/wasm/runtime/jiterpreter-interp-entry.ts b/src/mono/wasm/runtime/jiterpreter-interp-entry.ts index a715dd352ecadd..c481045734e986 100644 --- a/src/mono/wasm/runtime/jiterpreter-interp-entry.ts +++ b/src/mono/wasm/runtime/jiterpreter-interp-entry.ts @@ -12,7 +12,8 @@ import cwraps from "./cwraps"; import { WasmValtype, WasmBuilder, addWasmFunctionPointer, _now, elapsedTimes, counters, getRawCwrap, importDef, - getWasmFunctionTable, recordFailure + getWasmFunctionTable, recordFailure, getOptions, + JiterpreterOptions, shortNameBase } from "./jiterpreter-support"; // Controls miscellaneous diagnostic output. @@ -45,6 +46,7 @@ let trampImports : Array<[string, string, Function]> | undefined; let fnTable : WebAssembly.Table; let jitQueueTimeout = 0; const jitQueue : TrampolineInfo[] = []; +const infoTable : { [ptr: number] : TrampolineInfo } = {}; /* const enum WasmReftype { @@ -81,6 +83,7 @@ class TrampolineInfo { defaultImplementation: number; result: number; + hitCount: number; constructor ( imethod: number, method: MonoMethod, argumentCount: number, pParamTypes: NativePointer, @@ -112,9 +115,37 @@ class TrampolineInfo { subName = `${this.imethod.toString(16)}_${subName}`; } this.traceName = subName; + this.hitCount = 0; } } +let mostRecentOptions : JiterpreterOptions | undefined = undefined; + +export function mono_interp_record_interp_entry (imethod: number) { + // clear the unbox bit + imethod = imethod & ~0x1; + + const info = infoTable[imethod]; + // This shouldn't happen but it's not worth crashing over + if (!info) + return; + + if (!mostRecentOptions) + mostRecentOptions = getOptions(); + + info.hitCount++; + if (info.hitCount === mostRecentOptions!.interpEntryFlushThreshold) + flush_wasm_entry_trampoline_jit_queue(); + else if (info.hitCount !== mostRecentOptions!.interpEntryHitCount) + return; + + jitQueue.push(info); + if (jitQueue.length >= maxJitQueueLength) + flush_wasm_entry_trampoline_jit_queue(); + else + ensure_jit_is_scheduled(); +} + // returns function pointer export function mono_interp_jit_wasm_entry_trampoline ( imethod: number, method: MonoMethod, argumentCount: number, pParamTypes: NativePointer, @@ -133,19 +164,15 @@ export function mono_interp_jit_wasm_entry_trampoline ( if (!fnTable) fnTable = getWasmFunctionTable(); - jitQueue.push(info); - // We start by creating a function pointer for this interp_entry trampoline, but instead of // compiling it right away, we make it point to the default implementation for that signature // This gives us time to wait before jitting it so we can jit multiple trampolines at once. + // Some entry wrappers are also only called a few dozen times, so it's valuable to wait + // until a wrapper is called a lot before wasting time/memory jitting it. const defaultImplementationFn = fnTable.get(defaultImplementation); info.result = addWasmFunctionPointer(defaultImplementationFn); - if (jitQueue.length >= maxJitQueueLength) - flush_wasm_entry_trampoline_jit_queue(); - else - ensure_jit_is_scheduled(); - + infoTable[imethod] = info; return info.result; } @@ -172,11 +199,14 @@ function flush_wasm_entry_trampoline_jit_queue () { if (jitQueue.length <= 0) return; + // If the function signature contains types that need stackval_from_data, that'll use + // some constant slots, so make some extra space + const constantSlots = (4 * jitQueue.length) + 1; let builder = trampBuilder; if (!builder) - trampBuilder = builder = new WasmBuilder(); + trampBuilder = builder = new WasmBuilder(constantSlots); else - builder.clear(); + builder.clear(constantSlots); const started = _now(); let compileStarted = 0; let rejected = true, threw = false; @@ -239,7 +269,7 @@ function flush_wasm_entry_trampoline_jit_queue () { // Emit function imports for (let i = 0; i < trampImports.length; i++) { mono_assert(trampImports[i], () => `trace #${i} missing`); - const wasmName = compress ? i.toString(16) : undefined; + const wasmName = compress ? i.toString(shortNameBase) : undefined; builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], wasmName); } @@ -275,6 +305,7 @@ function flush_wasm_entry_trampoline_jit_queue () { builder.beginFunction(info.traceName, { "sp_args": WasmValtype.i32, "need_unbox": WasmValtype.i32, + "scratchBuffer": WasmValtype.i32, }); const ok = generate_wasm_body(builder, info); @@ -293,16 +324,17 @@ function flush_wasm_entry_trampoline_jit_queue () { const traceModule = new WebAssembly.Module(buffer); const imports : any = { - h: (Module).asm.memory }; // Place our function imports into the import dictionary for (let i = 0; i < trampImports.length; i++) { - const wasmName = compress ? i.toString(16) : trampImports[i][0]; + const wasmName = compress ? i.toString(36) : trampImports[i][0]; imports[wasmName] = trampImports[i][2]; } const traceInstance = new WebAssembly.Instance(traceModule, { - i: imports + i: imports, + c: builder.getConstants(), + m: { h: (Module).asm.memory }, }); // Now that we've jitted the trampolines, go through and fix up the function pointers @@ -432,7 +464,7 @@ function append_stackval_from_data ( default: { // Call stackval_from_data to copy the value and get its size - builder.i32_const(type); + builder.ptr_const(type); // result builder.local("sp_args"); // value @@ -454,6 +486,12 @@ function generate_wasm_body ( ) : boolean { // FIXME: This is not thread-safe, but the alternative of alloca makes the trampoline // more expensive + // The solution is likely to put the address of the scratch buffer in a global that we provide + // at module instantiation time, so each thread can malloc its own copy of the buffer + // and then pass it in when instantiating instead of compiling the constant into the module + // FIXME: Pre-allocate these buffers and their constant slots at the start before we + // generate function bodies, so that even if we run out of constant slots for MonoType we + // will always have put the buffers in a constant slot. This will be necessary for thread safety const scratchBuffer = Module._malloc(sizeOfJiterpEntryData); _zero_region(scratchBuffer, sizeOfJiterpEntryData); @@ -479,7 +517,8 @@ function generate_wasm_body ( } // Populate the scratch buffer containing call data - builder.i32_const(scratchBuffer); + builder.ptr_const(scratchBuffer); + builder.local("scratchBuffer", WasmOpcode.tee_local); builder.local("rmethod"); // Clear the unbox-this-reference flag if present (see above) so that rmethod is a valid ptr @@ -492,7 +531,7 @@ function generate_wasm_body ( // prologue takes data->rmethod and initializes data->context, then returns a value for sp_args // prologue also performs thread attach - builder.i32_const(scratchBuffer); + builder.local("scratchBuffer"); // prologue takes this_arg so it can handle delegates if (info.hasThisReference) builder.local("this_arg"); @@ -530,7 +569,7 @@ function generate_wasm_body ( append_stackval_from_data(builder, type, `arg${i}`); } - builder.i32_const(scratchBuffer); + builder.local("scratchBuffer"); builder.local("sp_args"); if (info.hasReturnValue) builder.local("res"); diff --git a/src/mono/wasm/runtime/jiterpreter-jit-call.ts b/src/mono/wasm/runtime/jiterpreter-jit-call.ts index 64eb7f6d2b514f..9581dde517394e 100644 --- a/src/mono/wasm/runtime/jiterpreter-jit-call.ts +++ b/src/mono/wasm/runtime/jiterpreter-jit-call.ts @@ -10,7 +10,8 @@ import { import { WasmOpcode } from "./jiterpreter-opcodes"; import { WasmValtype, WasmBuilder, addWasmFunctionPointer as addWasmFunctionPointer, - _now, elapsedTimes, counters, getWasmFunctionTable, applyOptions, recordFailure + _now, elapsedTimes, counters, getWasmFunctionTable, applyOptions, + recordFailure, shortNameBase } from "./jiterpreter-support"; import cwraps from "./cwraps"; @@ -243,9 +244,9 @@ export function mono_interp_flush_jitcall_queue () : void { let builder = trampBuilder; if (!builder) - trampBuilder = builder = new WasmBuilder(); + trampBuilder = builder = new WasmBuilder(0); else - builder.clear(); + builder.clear(0); if (builder.options.enableWasmEh) { if (!getIsWasmEhSupported()) { @@ -302,7 +303,7 @@ export function mono_interp_flush_jitcall_queue () : void { const compress = true; // Emit function imports for (let i = 0; i < trampImports.length; i++) { - const wasmName = compress ? i.toString(16) : undefined; + const wasmName = compress ? i.toString(shortNameBase) : undefined; builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], wasmName); } builder.generateImportSection(); @@ -352,16 +353,17 @@ export function mono_interp_flush_jitcall_queue () : void { const traceModule = new WebAssembly.Module(buffer); const imports : any = { - h: (Module).asm.memory }; // Place our function imports into the import dictionary for (let i = 0; i < trampImports.length; i++) { - const wasmName = compress ? i.toString(16) : trampImports[i][0]; + const wasmName = compress ? i.toString(shortNameBase) : trampImports[i][0]; imports[wasmName] = trampImports[i][2]; } const traceInstance = new WebAssembly.Instance(traceModule, { - i: imports + i: imports, + c: builder.getConstants(), + m: { h: (Module).asm.memory }, }); for (let i = 0; i < jitQueue.length; i++) { diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index 322378efc9ccf6..82e18cb9d0b367 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -1,14 +1,15 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -import { NativePointer, VoidPtr } from "./types/emscripten"; +import { NativePointer, ManagedPointer, VoidPtr } from "./types/emscripten"; import { Module } from "./imports"; import { WasmOpcode } from "./jiterpreter-opcodes"; import cwraps from "./cwraps"; export const maxFailures = 2, maxMemsetSize = 64, - maxMemmoveSize = 64; + maxMemmoveSize = 64, + shortNameBase = 36; // uint16 export declare interface MintOpcodePtr extends NativePointer { @@ -34,13 +35,15 @@ export class WasmBuilder { traceBuf: Array = []; branchTargets = new Set(); options!: JiterpreterOptions; + constantSlots: Array = []; + nextConstantSlot = 0; - constructor () { + constructor (constantSlotCount: number) { this.stack = [new BlobBuilder()]; - this.clear(); + this.clear(constantSlotCount); } - clear () { + clear (constantSlotCount: number) { this.options = getOptions(); this.stackSize = 1; this.inSection = false; @@ -58,6 +61,10 @@ export class WasmBuilder { this.traceBuf.length = 0; this.branchTargets.clear(); this.activeBlocks = 0; + this.nextConstantSlot = 0; + this.constantSlots.length = this.options.useConstants ? constantSlotCount : 0; + for (let i = 0; i < this.constantSlots.length; i++) + this.constantSlots[i] = 0; } push () { @@ -134,9 +141,28 @@ export class WasmBuilder { this.appendU8(WasmOpcode.return_); } - i32_const (value: number) { + i32_const (value: number | ManagedPointer | NativePointer) { this.appendU8(WasmOpcode.i32_const); - this.appendLeb(value); + this.appendLeb(value); + } + + ptr_const (pointer: number | ManagedPointer | NativePointer) { + let idx = this.options.useConstants ? this.constantSlots.indexOf(pointer) : -1; + if ( + this.options.useConstants && + (idx < 0) && (this.nextConstantSlot < this.constantSlots.length) + ) { + idx = this.nextConstantSlot++; + this.constantSlots[idx] = pointer; + } + + if (idx >= 0) { + this.appendU8(WasmOpcode.get_global); + this.appendLeb(idx); + } else { + // console.log(`Warning: no constant slot for ${pointer} (${this.nextConstantSlot} slots used)`); + this.i32_const(pointer); + } } ip_const (value: MintOpcodePtr, highBit?: boolean) { @@ -206,7 +232,7 @@ export class WasmBuilder { generateImportSection () { // Import section this.beginSection(2); - this.appendULeb(1 + this.importsToEmit.length); + this.appendULeb(1 + this.importsToEmit.length + this.constantSlots.length); for (let i = 0; i < this.importsToEmit.length; i++) { const tup = this.importsToEmit[i]; @@ -216,7 +242,15 @@ export class WasmBuilder { this.appendULeb(tup[3]); } - this.appendName("i"); + for (let i = 0; i < this.constantSlots.length; i++) { + this.appendName("c"); + this.appendName(i.toString(shortNameBase)); + this.appendU8(0x03); // global + this.appendU8(WasmValtype.i32); // all constants are pointers right now + this.appendU8(0x00); // constant + } + + this.appendName("m"); this.appendName("h"); // memtype (limits = { min=0x01, max=infinity }) this.appendU8(0x02); @@ -444,6 +478,13 @@ export class WasmBuilder { throw new Error("Stack not empty"); return this.stack[0].getArrayView(fullCapacity); } + + getConstants () { + const result : { [key: string]: number } = {}; + for (let i = 0; i < this.constantSlots.length; i++) + result[i.toString(shortNameBase)] = this.constantSlots[i]; + return result; + } } export class BlobBuilder { @@ -857,8 +898,14 @@ export type JiterpreterOptions = { countBailouts: boolean; // Dump the wasm blob for all compiled traces dumpTraces: boolean; + // Use runtime imports for pointer constants + useConstants: boolean; minimumTraceLength: number; minimumTraceHitCount: number; + jitCallHitCount: number; + jitCallFlushThreshold: number; + interpEntryHitCount: number; + interpEntryFlushThreshold: number; } const optionNames : { [jsName: string] : string } = { @@ -873,8 +920,13 @@ const optionNames : { [jsName: string] : string } = { "estimateHeat": "jiterpreter-estimate-heat", "countBailouts": "jiterpreter-count-bailouts", "dumpTraces": "jiterpreter-dump-traces", + "useConstants": "jiterpreter-use-constants", "minimumTraceLength": "jiterpreter-minimum-trace-length", - "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count" + "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count", + "jitCallHitCount": "jiterpreter-jit-call-hit-count", + "jitCallFlushThreshold": "jiterpreter-jit-call-queue-flush-threshold", + "interpEntryHitCount": "jiterpreter-interp-entry-hit-count", + "interpEntryFlushThreshold": "jiterpreter-interp-entry-queue-flush-threshold", }; let optionsVersion = -1; diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 65db771debddf7..71bb67fd43fd83 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -16,7 +16,7 @@ import { copyIntoScratchBuffer, _now, elapsedTimes, append_memset_dest, append_memmove_dest_src, counters, getRawCwrap, importDef, JiterpreterOptions, getOptions, recordFailure, try_append_memset_fast, - try_append_memmove_fast + try_append_memmove_fast, shortNameBase } from "./jiterpreter-support"; // Controls miscellaneous diagnostic output. @@ -311,11 +311,17 @@ function generate_wasm ( startOfBody: MintOpcodePtr, sizeOfBody: MintOpcodePtr, methodFullName: string | undefined ) : number { + // Pre-allocate a decent number of constant slots - this adds fixed size bloat + // to the trace but will make the actual pointer constants in the trace smaller + // If we run out of constant slots it will transparently fall back to i32_const + // For System.Runtime.Tests we only run out of slots ~50 times in 9100 test cases + const constantSlotCount = 8; + let builder = traceBuilder; if (!builder) - traceBuilder = builder = new WasmBuilder(); + traceBuilder = builder = new WasmBuilder(constantSlotCount); else - builder.clear(); + builder.clear(constantSlotCount); mostRecentOptions = builder.options; @@ -507,7 +513,7 @@ function generate_wasm ( // Emit function imports for (let i = 0; i < traceImports.length; i++) { mono_assert(traceImports[i], () => `trace #${i} missing`); - const wasmName = compress ? i.toString(16) : undefined; + const wasmName = compress ? i.toString(shortNameBase) : undefined; builder.defineImportedFunction("i", traceImports[i][0], traceImports[i][1], wasmName); } @@ -552,6 +558,10 @@ function generate_wasm ( if (getU16(ip) !== MintOpcode.MINT_TIER_PREPARE_JITERPRETER) throw new Error(`Expected *ip to be MINT_TIER_PREPARE_JITERPRETER but was ${getU16(ip)}`); + // TODO: Call generate_wasm_body before generating any of the sections and headers. + // This will allow us to do things like dynamically vary the number of locals, in addition + // to using global constants and figuring out how many constant slots we need in advance + // since a long trace might need many slots and that bloats the header. const opcodes_processed = generate_wasm_body( frame, traceName, ip, endOfBody, builder, instrumentedTraceId @@ -578,7 +588,6 @@ function generate_wasm ( const traceModule = new WebAssembly.Module(buffer); const imports : any = { - h: (Module).asm.memory }; // Place our function imports into the import dictionary for (let i = 0; i < traceImports.length; i++) { @@ -586,12 +595,14 @@ function generate_wasm ( const iname = traceImports[i][0]; if (!ifn || (typeof (ifn) !== "function")) throw new Error(`Import '${iname}' not found or not a function`); - const wasmName = compress ? i.toString(16) : iname; + const wasmName = compress ? i.toString(shortNameBase) : iname; imports[wasmName] = ifn; } const traceInstance = new WebAssembly.Instance(traceModule, { - i: imports + i: imports, + c: builder.getConstants(), + m: { h: (Module).asm.memory }, }); // Get the exported trace function @@ -850,7 +861,7 @@ function generate_wasm_body ( // We need to make sure to notify the interpreter about tiering opcodes // so that tiering up will still happen const iMethod = getU32(frame + offsetOfImethod); - builder.i32_const(iMethod); + builder.ptr_const(iMethod); // increase_entry_count will return 1 if we can continue, otherwise // we need to bail out into the interpreter so it can perform tiering builder.callImport("entry"); @@ -891,7 +902,7 @@ function generate_wasm_body ( // frame->imethod->data_items [ip [2]] const data = get_imethod_data(frame, getArgU16(ip, 2)); - builder.i32_const(data); + builder.ptr_const(data); append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); break; @@ -901,7 +912,7 @@ function generate_wasm_body ( const klass = get_imethod_data(frame, getArgU16(ip, 3)); append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load); append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); - builder.i32_const(klass); + builder.ptr_const(klass); builder.callImport("value_copy"); break; } @@ -916,7 +927,7 @@ function generate_wasm_body ( const klass = get_imethod_data(frame, getArgU16(ip, 3)); append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load); append_ldloca(builder, getArgU16(ip, 2)); - builder.i32_const(klass); + builder.ptr_const(klass); builder.callImport("value_copy"); break; } @@ -1007,7 +1018,7 @@ function generate_wasm_body ( case MintOpcode.MINT_LDTSFLDA: { append_ldloca(builder, getArgU16(ip, 1)); // This value is unsigned but I32 is probably right - builder.i32_const(getArgI32(ip, 2)); + builder.ptr_const(getArgI32(ip, 2)); builder.callImport("ldtsflda"); break; } @@ -1051,7 +1062,7 @@ function generate_wasm_body ( append_ldloca(builder, getArgU16(ip, 1)); append_ldloca(builder, getArgU16(ip, 2)); // klass - builder.i32_const(get_imethod_data(frame, getArgU16(ip, 3))); + builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3))); // opcode builder.i32_const(opcode); builder.callImport("cast"); @@ -1066,7 +1077,7 @@ function generate_wasm_body ( case MintOpcode.MINT_BOX: case MintOpcode.MINT_BOX_VT: { // MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]]; - builder.i32_const(get_imethod_data(frame, getArgU16(ip, 3))); + builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3))); // dest, src append_ldloca(builder, getArgU16(ip, 1)); append_ldloca(builder, getArgU16(ip, 2)); @@ -1077,7 +1088,7 @@ function generate_wasm_body ( case MintOpcode.MINT_UNBOX: { builder.block(); // MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]]; - builder.i32_const(get_imethod_data(frame, getArgU16(ip, 3))); + builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3))); // dest, src append_ldloca(builder, getArgU16(ip, 1)); append_ldloca(builder, getArgU16(ip, 2)); @@ -1094,7 +1105,7 @@ function generate_wasm_body ( builder.block(); // MonoObject *o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (vtable->klass)); append_ldloca(builder, getArgU16(ip, 1)); - builder.i32_const(get_imethod_data(frame, getArgU16(ip, 2))); + builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 2))); // LOCAL_VAR (ip [1], MonoObject*) = o; builder.callImport("newobj_i"); // If the newobj operation succeeded, continue, otherwise bailout @@ -1537,7 +1548,7 @@ function append_vtable_initialize (builder: WasmBuilder, pVtable: NativePointer, // TODO: Actually initialize the vtable instead of just checking and bailing out? builder.block(); // FIXME: This will prevent us from reusing traces between runs since the vtables can move - builder.i32_const(pVtable + get_offset_of_vtable_initialized_flag()); + builder.ptr_const(pVtable + get_offset_of_vtable_initialized_flag()); builder.appendU8(WasmOpcode.i32_load8_u); builder.appendMemarg(0, 0); builder.appendU8(WasmOpcode.br_if); @@ -1647,7 +1658,7 @@ function emit_fieldop ( case MintOpcode.MINT_STSFLD_O: // dest if (isStatic) { - builder.i32_const(pStaticData); + builder.ptr_const(pStaticData); } else { builder.local("cknull_ptr"); builder.i32_const(offsetBytes); @@ -1664,7 +1675,7 @@ function emit_fieldop ( append_ldloca(builder, valueOffset); // src if (isStatic) { - builder.i32_const(pStaticData); + builder.ptr_const(pStaticData); } else { builder.local("cknull_ptr"); builder.i32_const(offsetBytes); @@ -1681,7 +1692,7 @@ function emit_fieldop ( builder.appendU8(WasmOpcode.i32_add); // src = locals + ip [2] append_ldloca(builder, valueOffset); - builder.i32_const(klass); + builder.ptr_const(klass); builder.callImport("value_copy"); return true; } @@ -1689,7 +1700,7 @@ function emit_fieldop ( const sizeBytes = getArgU16(ip, 4); // dest if (isStatic) { - builder.i32_const(pStaticData); + builder.ptr_const(pStaticData); } else { builder.local("cknull_ptr"); builder.i32_const(offsetBytes); @@ -1705,7 +1716,7 @@ function emit_fieldop ( case MintOpcode.MINT_LDSFLDA: builder.local("pLocals"); if (isStatic) { - builder.i32_const(pStaticData); + builder.ptr_const(pStaticData); } else { // cknull_ptr isn't always initialized here append_ldloc(builder, objectOffset, WasmOpcode.i32_load); @@ -1722,7 +1733,7 @@ function emit_fieldop ( builder.local("pLocals"); if (isStatic) { - builder.i32_const(pStaticData); + builder.ptr_const(pStaticData); if (isLoad) { builder.appendU8(getter); builder.appendMemarg(offsetBytes, 0);