Skip to content

Commit

Permalink
JIT: Transform multi-reg args to FIELD_LIST in physical promotion (#1…
Browse files Browse the repository at this point in the history
…04370)

This allows promoted fields to stay in registers when they are passed as
multi-reg arguments.

Example linux-x64 diff with `DOTNET_JitStressModeNames=STRESS_NO_OLD_PROMOTION`:

```csharp
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Test(int[] arr)
{
    Foo(arr);
}
```

Base:
```asm
; Assembly listing for method Program:Test(int[]) (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  5,  4   )     ref  ->  rdi         class-hnd single-def <int[]>
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V02 tmp1         [V02    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[S] "spilled call-like call argument" <System.Span`1[int]>
;  V03 tmp2         [V03,T00] (  4,  8   )  struct (16) [rbp-0x20]  do-not-enreg[SFA] multireg-arg must-init ld-addr-op "NewObj constructor temp" <System.Span`1[int]>
;  V04 tmp3         [V04,T02] (  3,  1.50)   byref  ->  rbx         "V03.[000..008)"
;  V05 tmp4         [V05,T03] (  3,  1.50)     int  ->  r15         "V03.[008..012)"
;
; Lcl frame size = 16

G_M55702_IG01:  ;; offset=0x0000
       push     rbp
       push     r15
       push     rbx
       sub      rsp, 16
       lea      rbp, [rsp+0x20]
       xor      eax, eax
       mov      qword ptr [rbp-0x20], rax
						;; size=19 bbWeight=1 PerfScore 5.00
G_M55702_IG02:  ;; offset=0x0013
       test     rdi, rdi
       je       SHORT G_M55702_IG06
						;; size=5 bbWeight=1 PerfScore 1.25
G_M55702_IG03:  ;; offset=0x0018
       lea      rbx, bword ptr [rdi+0x10]
       mov      r15d, dword ptr [rdi+0x08]
						;; size=8 bbWeight=0.50 PerfScore 1.25
G_M55702_IG04:  ;; offset=0x0020
       mov      bword ptr [rbp-0x20], rbx
       mov      dword ptr [rbp-0x18], r15d
       mov      rdi, bword ptr [rbp-0x20]
       mov      rsi, qword ptr [rbp-0x18]
       mov      rax, 0x7FF97F8FC648      ; code for Program:Foo(System.Span`1[int])
       call     [rax]Program:Foo(System.Span`1[int])
       nop      
						;; size=29 bbWeight=1 PerfScore 7.50
G_M55702_IG05:  ;; offset=0x003D
       add      rsp, 16
       pop      rbx
       pop      r15
       pop      rbp
       ret      
						;; size=9 bbWeight=1 PerfScore 2.75
G_M55702_IG06:  ;; offset=0x0046
       xor      rbx, rbx
       xor      r15d, r15d
       jmp      SHORT G_M55702_IG04
						;; size=7 bbWeight=0 PerfScore 0.00

; Total bytes of code 77, prolog size 19, PerfScore 17.75, instruction count 26, allocated bytes for code 77 (MethodHash=340b2669) for method Program:Test(int[]) (FullOpts)
; ============================================================
```

Diff:
```asm
; Assembly listing for method Program:Test(int[]) (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  5,  4   )     ref  ->  rdi         class-hnd single-def <int[]>
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V02 tmp1         [V02    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[S] "spilled call-like call argument" <System.Span`1[int]>
;* V03 tmp2         [V03    ] (  0,  0   )  struct (16) zero-ref    do-not-enreg[SF] ld-addr-op "NewObj constructor temp" <System.Span`1[int]>
;  V04 tmp3         [V04,T01] (  3,  1.50)   byref  ->  rax         "V03.[000..008)"
;  V05 tmp4         [V05,T02] (  3,  1.50)     int  ->  rsi         "V03.[008..012)"
;
; Lcl frame size = 0

G_M55702_IG01:  ;; offset=0x0000
       push     rbp
       mov      rbp, rsp
						;; size=4 bbWeight=1 PerfScore 1.25
G_M55702_IG02:  ;; offset=0x0004
       test     rdi, rdi
       je       SHORT G_M55702_IG06
						;; size=5 bbWeight=1 PerfScore 1.25
G_M55702_IG03:  ;; offset=0x0009
       lea      rax, bword ptr [rdi+0x10]
       mov      esi, dword ptr [rdi+0x08]
						;; size=7 bbWeight=0.50 PerfScore 1.25
G_M55702_IG04:  ;; offset=0x0010
       mov      rdi, rax
       mov      rax, 0x7FF97F91C648      ; code for Program:Foo(System.Span`1[int])
       call     [rax]Program:Foo(System.Span`1[int])
       nop      
						;; size=16 bbWeight=1 PerfScore 3.75
G_M55702_IG05:  ;; offset=0x0020
       pop      rbp
       ret      
						;; size=2 bbWeight=1 PerfScore 1.50
G_M55702_IG06:  ;; offset=0x0022
       xor      rax, rax
       xor      esi, esi
       jmp      SHORT G_M55702_IG04
						;; size=6 bbWeight=0 PerfScore 0.00

; Total bytes of code 40, prolog size 4, PerfScore 9.00, instruction count 15, allocated bytes for code 40 (MethodHash=340b2669) for method Program:Test(int[]) (FullOpts)
; ============================================================
```

Diffs aren't super large with old promotion enabled because most structs that
are candidates to be multi-register arguments are handled just fine by old
promotion. However, this moves us closer to being able to remove old promotion.
  • Loading branch information
jakobbotsch authored Jul 19, 2024
1 parent e0ecd1f commit 767e416
Show file tree
Hide file tree
Showing 5 changed files with 419 additions and 62 deletions.
3 changes: 3 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1389,6 +1389,7 @@ CallArgs::CallArgs()
, m_hasRetBuffer(false)
, m_isVarArgs(false)
, m_abiInformationDetermined(false)
, m_newAbiInformationDetermined(false)
, m_hasRegArgs(false)
, m_hasStackArgs(false)
, m_argsComplete(false)
Expand Down Expand Up @@ -2568,6 +2569,8 @@ bool GenTreeCall::Equals(GenTreeCall* c1, GenTreeCall* c2)
//
void CallArgs::ResetFinalArgsAndABIInfo()
{
m_newAbiInformationDetermined = false;

if (!IsAbiInformationDetermined())
{
return;
Expand Down
23 changes: 17 additions & 6 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4824,10 +4824,11 @@ class CallArgs
// made for this call.
unsigned m_padStkAlign;
#endif
bool m_hasThisPointer : 1;
bool m_hasRetBuffer : 1;
bool m_isVarArgs : 1;
bool m_abiInformationDetermined : 1;
bool m_hasThisPointer : 1;
bool m_hasRetBuffer : 1;
bool m_isVarArgs : 1;
bool m_abiInformationDetermined : 1;
bool m_newAbiInformationDetermined : 1;
// True if we have one or more register arguments.
bool m_hasRegArgs : 1;
// True if we have one or more stack arguments.
Expand Down Expand Up @@ -4885,8 +4886,10 @@ class CallArgs
PushFront(comp, arg);
}

void ResetFinalArgsAndABIInfo();
void AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call);
void ResetFinalArgsAndABIInfo();

void DetermineNewABIInfo(Compiler* comp, GenTreeCall* call);

void ArgsComplete(Compiler* comp, GenTreeCall* call);
void EvalArgsToTemps(Compiler* comp, GenTreeCall* call);
Expand All @@ -4902,7 +4905,15 @@ class CallArgs
void SetIsVarArgs() { m_isVarArgs = true; }
void ClearIsVarArgs() { m_isVarArgs = false; }
bool IsAbiInformationDetermined() const { return m_abiInformationDetermined; }
bool AreArgsComplete() const { return m_argsComplete; }
bool IsNewAbiInformationDetermined() const { return m_newAbiInformationDetermined; }

// TODO-Remove: Workaround for bad codegen in MSVC versions < 19.41, see
// https://github.com/dotnet/runtime/pull/104370#issuecomment-2222910359
#ifdef _MSC_VER
__declspec(noinline)
#endif
bool AreArgsComplete() const { return m_argsComplete; }

bool HasRegArgs() const { return m_hasRegArgs; }
bool HasStackArgs() const { return m_hasStackArgs; }
bool NeedsTemps() const { return m_needsTemps; }
Expand Down
134 changes: 105 additions & 29 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1087,8 +1087,9 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
// In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so
// here we consider spilling it into a local. We also need to spill it in case we have a node that we do not
// currently handle in multi-reg morphing.
// This logic can be skipped when the arg is already in the right multireg arg shape.
//
if (varTypeIsStruct(argx) && !arg.m_needTmp)
if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST))
{
if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1))
{
Expand Down Expand Up @@ -1650,36 +1651,64 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call)
noway_assert(argx->gtType != TYP_STRUCT);
#endif

unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect"));

setupArg = comp->gtNewTempStore(tmpVarNum, argx);

LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum);
var_types lclVarType = genActualType(argx->gtType);
var_types scalarType = TYP_UNKNOWN;

if (setupArg->OperIsCopyBlkOp())
if (argx->OperIs(GT_FIELD_LIST))
{
setupArg = comp->fgMorphCopyBlock(setupArg);
#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT))
GenTreeFieldList* fieldList = argx->AsFieldList();
fieldList->gtFlags &= ~GTF_ALL_EFFECT;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
scalarType = arg.AbiInfo.ArgType;
unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect"));
GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode());

if (setupArg == nullptr)
{
setupArg = store;
}
else
{
setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, store);
}

use.SetNode(comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode())));
fieldList->AddAllEffectsFlags(use.GetNode());
}
#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
}

// scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 =>
// 8)
if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
{
// Create a GT_LCL_FLD using the wider type to go to the late argument list
defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0);
// Keep the field list in the late list
defArg = fieldList;
}
else
{
// Create a copy of the temp to go to the late argument list
defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType);
unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect"));

setupArg = comp->gtNewTempStore(tmpVarNum, argx);

LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum);
var_types lclVarType = genActualType(argx->gtType);
var_types scalarType = TYP_UNKNOWN;

if (setupArg->OperIsCopyBlkOp())
{
setupArg = comp->fgMorphCopyBlock(setupArg);
#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT))
{
scalarType = arg.AbiInfo.ArgType;
}
#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
}

// scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 =>
// 8)
if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
{
// Create a GT_LCL_FLD using the wider type to go to the late argument list
defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0);
}
else
{
// Create a copy of the temp to go to the late argument list
defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType);
}
}

#ifdef DEBUG
Expand Down Expand Up @@ -2270,7 +2299,54 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
}
#endif

m_abiInformationDetermined = true;
m_abiInformationDetermined = true;
m_newAbiInformationDetermined = true;
}

//------------------------------------------------------------------------
// DetermineNewABIInfo:
// Determine the new ABI info for all call args without making any IR
// changes.
//
// Parameters:
// comp - The compiler object.
// call - The call to which the CallArgs belongs.
//
void CallArgs::DetermineNewABIInfo(Compiler* comp, GenTreeCall* call)
{
ClassifierInfo info;
info.CallConv = call->GetUnmanagedCallConv();
// X86 tailcall helper is considered varargs, but not for ABI classification purposes.
info.IsVarArgs = call->IsVarargs() && !call->IsTailCallViaJitHelper();
info.HasThis = call->gtArgs.HasThisPointer();
info.HasRetBuff = call->gtArgs.HasRetBuffer();
PlatformClassifier classifier(info);

for (CallArg& arg : Args())
{
const var_types argSigType = arg.GetSignatureType();
const CORINFO_CLASS_HANDLE argSigClass = arg.GetSignatureClassHandle();
ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass);

// Some well known args have custom register assignment.
// These should not affect the placement of any other args or stack space required.
// Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
// TODO-Cleanup: Integrate this into the new style ABI classifiers.
regNumber nonStdRegNum = GetCustomRegister(comp, call->GetUnmanagedCallConv(), arg.GetWellKnownArg());

if (nonStdRegNum == REG_NA)
{
arg.NewAbiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg());
}
else
{
ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE);
arg.NewAbiInfo = ABIPassingInformation::FromSegment(comp, segment);
}
}

m_argsStackSize = classifier.StackSize();
m_newAbiInformationDetermined = true;
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -2436,10 +2512,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
GenTree* argObj = argx->gtEffectiveVal();
bool makeOutArgCopy = false;

if (isStructArg && !reMorphing)
if (isStructArg && !reMorphing && !argObj->OperIs(GT_FIELD_LIST))
{
unsigned originalSize;
if (argObj->TypeGet() == TYP_STRUCT)
if (argObj->TypeIs(TYP_STRUCT))
{
assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD));
originalSize = argObj->GetLayout(this)->GetSize();
Expand Down Expand Up @@ -2763,12 +2839,12 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
{
if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.AbiInfo.PassedByRef)
{
foundStructArg = true;
GenTree*& argx = (arg.GetLateNode() != nullptr) ? arg.LateNodeRef() : arg.EarlyNodeRef();

if (!argx->OperIs(GT_FIELD_LIST))
{
argx = fgMorphMultiregStructArg(&arg);
foundStructArg = true;
argx = fgMorphMultiregStructArg(&arg);
}
}
}
Expand Down
Loading

0 comments on commit 767e416

Please sign in to comment.