Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Allow spill-at-single-def for pure defs #85251

Merged
merged 1 commit into from
Apr 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3423,15 +3423,11 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition
}
}

// Only handle the singledef intervals whose firstRefPosition is RefTypeDef and is not yet marked as spillAfter.
// The singledef intervals whose firstRefPositions are already marked as spillAfter, no need to mark them as
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to understand the situation where we should spill at single-def regardless of spillAfter is marked. Can you show a before and after def too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a diff:

@@ -1,142 +1,136 @@
 ; Assembly listing for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex
 ; Emitting BLENDED_CODE for X64 CPU with AVX512 - Windows
 ; optimized code
 ; rsp based frame
 ; partially interruptible
 ; No matching PGO data
 ; 0 inlinees with PGO data; 8 single block inlinees; 0 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 RetBuf       [V00,T02] (  7,  5   )   byref  ->  rsi         single-def
 ;  V01 arg0         [V01,T00] (  4,  8   )   byref  ->  rdx         single-def
 ;  V02 arg1         [V02,T01] (  4,  8   )   byref  ->   r8         single-def
-;  V03 loc0         [V03,T04] (  5,  3.50)  double  ->  [rsp+48H]  
+;  V03 loc0         [V03,T04] (  5,  3.50)  double  ->  [rsp+48H]   spill-single-def
 ;  V04 loc1         [V04,T08] (  3,  2.50)  double  ->  mm0        
 ;  V05 OutArgs      [V05    ] (  1,  1   )  struct (32) [rsp+00H]   do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V06 tmp1         [V06    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
 ;* V07 tmp2         [V07    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
 ;* V08 tmp3         [V08    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
 ;* V09 tmp4         [V09    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
 ;* V10 tmp5         [V10    ] (  0,  0   )  double  ->  zero-ref    "Inlining Arg"
 ;  V11 tmp6         [V11,T03] (  2,  4   )   byref  ->  rax         single-def "Single return block return value"
 ;  V12 tmp7         [V12,T05] (  5,  3.50)  double  ->  [rsp+40H]   spill-single-def V16.m_real(offs=0x00) P-INDEP "field V01.m_real (fldOffset=0x0)"
 ;  V13 tmp8         [V13,T09] (  3,  2.50)  double  ->  [rsp+38H]   spill-single-def V16.m_imaginary(offs=0x08) P-INDEP "field V01.m_imaginary (fldOffset=0x8)"
-;  V14 tmp9         [V14,T07] (  4,  3   )  double  ->  [rsp+30H]   V17.m_real(offs=0x00) P-INDEP "field V02.m_real (fldOffset=0x0)"
-;  V15 tmp10        [V15,T06] (  5,  3.50)  double  ->  [rsp+28H]   V17.m_imaginary(offs=0x08) P-INDEP "field V02.m_imaginary (fldOffset=0x8)"
+;  V14 tmp9         [V14,T07] (  4,  3   )  double  ->  [rsp+30H]   spill-single-def V17.m_real(offs=0x00) P-INDEP "field V02.m_real (fldOffset=0x0)"
+;  V15 tmp10        [V15,T06] (  5,  3.50)  double  ->  [rsp+28H]   spill-single-def V17.m_imaginary(offs=0x08) P-INDEP "field V02.m_imaginary (fldOffset=0x8)"
 ;* V16 tmp11        [V16    ] (  0,  0   )  struct (16) zero-ref    "Promoted implicit byref"
 ;* V17 tmp12        [V17    ] (  0,  0   )  struct (16) zero-ref    "Promoted implicit byref"
 ;
 ; Lcl frame size = 80
 
 G_M13999_IG01:        ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
        push     rsi
        sub      rsp, 80
        vzeroupper 
        mov      rsi, rcx
        ; byrRegs +[rsi]
 						;; size=11 bbWeight=1 PerfScore 2.50
 G_M13999_IG02:        ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0144 {rdx rsi r8}, byref, isz
        ; byrRegs +[rdx r8]
        vmovsd   xmm0, qword ptr [r8]
        vmovsd   qword ptr [rsp+30H], xmm0
        vmovsd   xmm1, qword ptr [r8+08H]
        vmovsd   qword ptr [rsp+28H], xmm1
        vmovsd   xmm2, qword ptr [rdx]
        vmovsd   qword ptr [rsp+40H], xmm2
        vmovsd   xmm3, qword ptr [rdx+08H]
        vmovsd   qword ptr [rsp+38H], xmm3
        vmovaps  xmm0, xmm2
        vmovaps  xmm1, xmm3
        call     [System.Numerics.Complex:Hypot(double,double):double]
        ; byrRegs -[rdx r8]
        ; gcr arg pop 0
        vmovsd   qword ptr [rsp+48H], xmm0
        vmovsd   xmm0, qword ptr [rsp+30H]
        vmovsd   xmm1, qword ptr [rsp+28H]
        call     [System.Numerics.Complex:Hypot(double,double):double]
        ; gcr arg pop 0
        vmovsd   xmm1, qword ptr [rsp+48H]
        vucomisd xmm1, xmm0
        ja       SHORT G_M13999_IG04
 						;; size=94 bbWeight=1 PerfScore 39.50
 G_M13999_IG03:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
        vucomisd xmm1, xmm1
        jp       SHORT G_M13999_IG04
        je       SHORT G_M13999_IG05
 						;; size=8 bbWeight=0.50 PerfScore 2.00
-G_M13999_IG04:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
+G_M13999_IG04:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
        vmovsd   xmm0, qword ptr [rsp+40H]
        vmovsd   qword ptr [rsi], xmm0
        vmovsd   xmm1, qword ptr [rsp+38H]
        vmovsd   qword ptr [rsi+08H], xmm1
-       jmp      G_M13999_IG08
-						;; size=26 bbWeight=0.50 PerfScore 6.00
+       jmp      SHORT G_M13999_IG08
+						;; size=23 bbWeight=0.50 PerfScore 6.00
 G_M13999_IG05:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
        vucomisd xmm1, xmm0
        jp       SHORT G_M13999_IG07
        jne      SHORT G_M13999_IG07
        vmovsd   xmm0, qword ptr [rsp+30H]
        vmovd    rax, xmm0
        test     rax, rax
        jge      SHORT G_M13999_IG06
        vmovsd   xmm1, qword ptr [rsp+28H]
        vmovd    rax, xmm1
        test     rax, rax
-       vmovsd   qword ptr [rsp+28H], xmm1
-       vmovsd   qword ptr [rsp+30H], xmm0
        jl       SHORT G_M13999_IG04
        vmovsd   xmm2, qword ptr [rsp+40H]
        vmovd    rax, xmm2
        test     rax, rax
        jge      SHORT G_M13999_IG04
        jmp      SHORT G_M13999_IG07
-						;; size=70 bbWeight=0.50 PerfScore 13.38
+						;; size=58 bbWeight=0.50 PerfScore 12.38
 G_M13999_IG06:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
        vmovsd   xmm1, qword ptr [rsp+28H]
        vmovd    rax, xmm1
        test     rax, rax
        jge      SHORT G_M13999_IG10
        vmovsd   xmm2, qword ptr [rsp+40H]
        vmovd    rax, xmm2
        test     rax, rax
-       vmovsd   qword ptr [rsp+28H], xmm1
-       vmovsd   qword ptr [rsp+30H], xmm0
-       jge      G_M13999_IG04
-						;; size=48 bbWeight=0.50 PerfScore 7.25
+       jge      SHORT G_M13999_IG04
+						;; size=32 bbWeight=0.50 PerfScore 6.25
 G_M13999_IG07:        ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
        vmovsd   xmm0, qword ptr [rsp+30H]
        vmovsd   qword ptr [rsi], xmm0
        vmovsd   xmm1, qword ptr [rsp+28H]
        vmovsd   qword ptr [rsi+08H], xmm1
 						;; size=21 bbWeight=0.50 PerfScore 5.00
 G_M13999_IG08:        ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
        mov      rax, rsi
        ; byrRegs +[rax]
 						;; size=3 bbWeight=1 PerfScore 0.25
 G_M13999_IG09:        ; bbWeight=1, epilog, nogc, extend
        add      rsp, 80
        pop      rsi
        ret      
 						;; size=6 bbWeight=1 PerfScore 1.75
 G_M13999_IG10:        ; bbWeight=0.25, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, gcvars, byref, isz
        ; byrRegs -[rax]
-       vmovsd   qword ptr [rsp+28H], xmm1
-       vmovsd   qword ptr [rsp+30H], xmm0
        jmp      SHORT G_M13999_IG07
-						;; size=14 bbWeight=0.25 PerfScore 1.00
+						;; size=2 bbWeight=0.25 PerfScore 0.50
 
-; Total bytes of code 301, prolog size 8, PerfScore 108.73, instruction count 69, allocated bytes for code 301 (MethodHash=3c1ac950) for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex
+; Total bytes of code 258, prolog size 8, PerfScore 101.93, instruction count 63, allocated bytes for code 258 (MethodHash=3c1ac950) for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex

From what I understand, spillAfter == false means spill at the def but the value computed in the register will also be used subsequently. spillAfter == true means spill at the def without such a subsequent use ("pure def")

// singleDefSpill because they will always get spilled at firstRefPosition.
// This helps in spilling the singleDef at definition
// Only handle the singledef intervals whose firstRefPosition is RefTypeDef.
//
// Note: Only mark "singleDefSpill" for those intervals who ever get spilled. The intervals that are never spilled
// will not be marked as "singleDefSpill" and hence won't get spilled at the first definition.
if (interval->isSingleDef && RefTypeIsDef(interval->firstRefPosition->refType) &&
!interval->firstRefPosition->spillAfter)
if (interval->isSingleDef && RefTypeIsDef(interval->firstRefPosition->refType))
{
// TODO-CQ: Check if it is beneficial to spill at def, meaning, if it is a hot block don't worry about
// doing the spill. Another option is to track number of refpositions and a interval has more than X
Expand Down Expand Up @@ -6280,6 +6276,11 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreeLclVar* treeNode, Ref
varDsc->SetRegNum(REG_STK);
interval->physReg = REG_NA;
writeLocalReg(treeNode->AsLclVar(), interval->varNum, REG_NA);

if (currentRefPosition->singleDefSpill)
{
varDsc->lvSpillAtSingleDef = true;
}
}
else // Not reload and Not pure-def that's spillAfter
{
Expand Down