-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stack Allocation Enhancements #104936
Comments
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
For the delegate case, if we add index 89e28c5978c..a069d98503e 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -719,12 +719,23 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
case GT_CALL:
{
- GenTreeCall* asCall = parent->AsCall();
+ GenTreeCall* const call = parent->AsCall();
- if (asCall->IsHelperCall())
+ if (call->IsHelperCall())
{
canLclVarEscapeViaParentStack =
- !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(asCall->gtCallMethHnd));
+ !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(call->gtCallMethHnd));
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ // Delegate invoke won't escape the delegate which is passed as "this"
+ // And gets expanded inline later.
+ //
+ if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0)
+ {
+ GenTree* const thisArg = call->gtArgs.GetThisArg()->GetNode();
+ canLclVarEscapeViaParentStack = thisArg != tree;
+ } Then the example above becomes ; Method Y:Test():int (FullOpts)
G_M53607_IG01: ;; offset=0x0000
sub rsp, 88
vxorps xmm4, xmm4, xmm4
vmovdqu ymmword ptr [rsp+0x20], ymm4
vmovdqa xmmword ptr [rsp+0x40], xmm4
xor eax, eax
mov qword ptr [rsp+0x50], rax
;; size=27 bbWeight=1 PerfScore 5.83
G_M53607_IG02: ;; offset=0x001B
mov rcx, 0x7FFD4BB04580 ; Y+<>c__DisplayClass0_0
call CORINFO_HELP_NEWSFAST
mov dword ptr [rax+0x08], 100
mov rcx, 0x7FFD4BB04B30 ; System.Func`1[int]
mov qword ptr [rsp+0x20], rcx
mov gword ptr [rsp+0x28], rax
mov rcx, 0x7FFD4B8783F0 ; code for Y+<>c__DisplayClass0_0:<Test>b__0():int:this
mov qword ptr [rsp+0x38], rcx
lea rax, [rsp+0x20]
mov rcx, gword ptr [rax+0x08]
call [rax+0x18]System.Func`1[int]:Invoke():int:this
nop
;; size=70 bbWeight=1 PerfScore 11.50
G_M53607_IG03: ;; offset=0x0061
add rsp, 88
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 102 where the closure is still on the heap and we're invoking the delegate func "directly" but via a convoluted path where we store the func's (indirection cell) address to the stack allocated delegate and then fetch it back and indirect through it. Ideally we'd like to be able to inline and perhaps realize the closure doesn't escape either, but that seems far off. Perhaps we can just summarily claim the closure can't escape. I am not sure. Moving delegate invoke expansion earlier does not look to be simple -- currently there is some prep work in morph and then the actual expansion in lower, and tail calls are a complication. |
@AndyAyersMS With the array (non-gc elems) support + my field analysis prototype + the above delegate handling (branch at https://github.com/hez2010/runtime/tree/field-stackalloc), the codegen becomes: G_M30166_IG01: ;; offset=0x0000
sub rsp, 104
vxorps xmm4, xmm4, xmm4
vmovdqu ymmword ptr [rsp+0x20], ymm4
vmovdqa xmmword ptr [rsp+0x40], xmm4
xor eax, eax
mov qword ptr [rsp+0x50], rax
;; size=27 bbWeight=1 PerfScore 5.83
G_M30166_IG02: ;; offset=0x001B
xor ecx, ecx
mov qword ptr [rsp+0x58], rcx
mov dword ptr [rsp+0x60], ecx
mov rcx, 0x7FFEBB4211B8 ; Y+<>c__DisplayClass7_0
mov qword ptr [rsp+0x58], rcx
mov dword ptr [rsp+0x60], 100
mov rcx, 0x7FFEBB420EE8 ; System.Func`1[int]
mov qword ptr [rsp+0x20], rcx
lea rcx, [rsp+0x58]
mov qword ptr [rsp+0x28], rcx
mov rcx, 0x7FFEBB3F0678 ; code for Y+<>c__DisplayClass7_0:<Test>b__0():int:this
mov qword ptr [rsp+0x38], rcx
lea rax, [rsp+0x20]
mov rcx, gword ptr [rax+0x08]
call [rax+0x18]System.Func`1[int]:Invoke():int:this
nop
;; size=87 bbWeight=1 PerfScore 14.25
G_M30166_IG03: ;; offset=0x0072
add rsp, 104
ret |
Added |
Put more useful links: An overview of the impl of esacpe analysis including interprocedural analysis in JVM: https://cr.openjdk.org/~cslucas/escape-analysis/EscapeAnalysis.html Some insights on allocating objects in a loop and etc.: https://devblogs.microsoft.com/java/improving-openjdk-scalar-replacement-part-2-3/ |
Stack allocation of non-escaping ref classes and boxed value classes was enabled in #103361, but only works in limited cases. This issue tracks further enhancements (see also #11192).
Abilities:
a small tweak to escape analysis gets the delegate on the stack, but the invoke expansion currently happens in lower so we don't get any physical promotion. We would need to move this earlier.
See note below.
localloc
instead of fixed allocations (at least for non-gc types; for GC types there's currently no way to do proper GC reporting)Analysis:
Implementation:
ALLOCOBJ
ALLOCOBJ
assigned to single-def temp in importerClassLayout
instances with GC refs #103362NAOT:
Advanced:
Diagnostics:
FYI @dotnet/jit-contrib
The text was updated successfully, but these errors were encountered: