Skip to content

Commit

Permalink
Use bytes in fgArgTabEntry.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergey Andreenko committed Sep 24, 2020
1 parent 49ed384 commit 1ca1a42
Show file tree
Hide file tree
Showing 5 changed files with 354 additions and 151 deletions.
124 changes: 106 additions & 18 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1459,11 +1459,21 @@ struct fgArgTabEntry
unsigned structFloatRegs;
#endif // UNIX_AMD64_ABI

#if defined(DEBUG_NOT_OSX_ARM64_ABI)
// A slot is a pointer sized region in the OutArg area.
unsigned slotNum; // When an argument is passed in the OutArg area this is the slot number in the OutArg area
unsigned numSlots; // Count of number of slots that this argument uses
#endif // DEBUG_NOT_OSX_ARM64_ABI

unsigned alignment; // 1 or 2 (slots/registers)
// Return number of stack slots that this argument is taking.
// TODO-Cleanup: this funtion does not align with arm64 apple model,
// delete it.
unsigned GetStackSlotsNumber() const
{
return roundUp(GetStackByteSize(), TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
}

unsigned byteAlignment; // usually 8 or 16 bytes (slots/registers).
private:
unsigned _lateArgInx; // index into gtCallLateArgs list; UINT_MAX if this is not a late arg.
public:
Expand Down Expand Up @@ -1614,9 +1624,24 @@ struct fgArgTabEntry
}

// Get the number of bytes that this argument is occyping on the stack.
unsigned GetStackSize() const
unsigned GetStackByteSize() const
{
return (TARGET_POINTER_SIZE * this->numSlots);
if (IsHfaRegArg())
{
return 0;
}
assert(!IsHfaArg() || !IsSplit());

if (!IsSplit() && numRegs > 0)
{
return 0;
}
assert(GetByteSize() > TARGET_POINTER_SIZE * numRegs);
unsigned stackByteSize = GetByteSize() - TARGET_POINTER_SIZE * numRegs;
#if !defined(OSX_ARM64_ABI)

#endif
return GetByteSize() - TARGET_POINTER_SIZE * numRegs;
}

var_types GetHfaType() const
Expand All @@ -1634,7 +1659,7 @@ struct fgArgTabEntry
if (type != TYP_UNDEF)
{
// We must already have set the passing mode.
assert(numRegs != 0 || numSlots != 0);
assert(numRegs != 0 || GetStackSlotsNumber() != 0);
// We originally set numRegs according to the size of the struct, but if the size of the
// hfaType is not the same as the pointer size, we need to correct it.
// Note that hfaSlots is the number of registers we will use. For ARM, that is twice
Expand Down Expand Up @@ -1709,11 +1734,13 @@ struct fgArgTabEntry
#endif
}

bool isSingleRegOrSlot() const
// Can we replace the struct type of this node with a primitive type for argument passing?
bool TryPassAsPrimitive() const
{
return !IsSplit() && ((numRegs == 1) || (numSlots == 1));
return !IsSplit() && ((numRegs == 1) || (m_byteSize <= TARGET_POINTER_SIZE));
}

#if defined(DEBUG_NOT_OSX_ARM64_ABI)
// Returns the number of "slots" used, where for this purpose a
// register counts as a slot.
unsigned getSlotCount() const
Expand All @@ -1734,7 +1761,9 @@ struct fgArgTabEntry
}
return numSlots + numRegs;
}
#endif

#if defined(DEBUG_NOT_OSX_ARM64_ABI)
// Returns the size as a multiple of pointer-size.
// For targets without HFAs, this is the same as getSlotCount().
unsigned getSize() const
Expand Down Expand Up @@ -1770,6 +1799,43 @@ struct fgArgTabEntry
return size;
}

#endif // DEBUG && !OSX_ARM64_ABI

private:
unsigned m_byteOffset;
unsigned m_byteSize;

public:
void SetByteOffset(unsigned byteOffset)
{
DEBUG_NOT_OSX_ARM64_ASSERT(byteOffset / TARGET_POINTER_SIZE == slotNum);
m_byteOffset = byteOffset;
}

unsigned GetByteOffset() const
{
DEBUG_NOT_OSX_ARM64_ASSERT(m_byteOffset / TARGET_POINTER_SIZE == slotNum);
return m_byteOffset;
}

void SetByteSize(unsigned byteSize)
{
#if defined(DEBUG_NOT_OSX_ARM64_ABI)
assert(byteAlignment != 0);
if (!isStruct)
{
const unsigned alignedByteSize = roundUp(byteSize, byteAlignment);
assert(alignedByteSize == getSlotCount() * TARGET_POINTER_SIZE);
}
#endif
m_byteSize = byteSize;
}

unsigned GetByteSize() const
{
return m_byteSize;
}

// Set the register numbers for a multireg argument.
// There's nothing to do on x64/Ux because the structDesc has already been used to set the
// register numbers.
Expand Down Expand Up @@ -1799,6 +1865,7 @@ struct fgArgTabEntry
#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
}

#ifdef DEBUG
// Check that the value of 'isStruct' is consistent.
// A struct arg must be one of the following:
// - A node of struct type,
Expand All @@ -1816,7 +1883,8 @@ struct fgArgTabEntry
// This is the case where we are passing a struct as a primitive type.
// On most targets, this is always a single register or slot.
// However, on ARM this could be two slots if it is TYP_DOUBLE.
bool isPassedAsPrimitiveType = ((numRegs == 1) || ((numRegs == 0) && (numSlots == 1)));
bool isPassedAsPrimitiveType =
((numRegs == 1) || ((numRegs == 0) && (GetByteSize() <= TARGET_POINTER_SIZE)));
#ifdef TARGET_ARM
if (!isPassedAsPrimitiveType)
{
Expand All @@ -1835,7 +1903,6 @@ struct fgArgTabEntry
}
}

#ifdef DEBUG
void Dump() const;
#endif
};
Expand All @@ -1848,11 +1915,14 @@ struct fgArgTabEntry

class fgArgInfo
{
Compiler* compiler; // Back pointer to the compiler instance so that we can allocate memory
GenTreeCall* callTree; // Back pointer to the GT_CALL node for this fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
Compiler* compiler; // Back pointer to the compiler instance so that we can allocate memory
GenTreeCall* callTree; // Back pointer to the GT_CALL node for this fgArgInfo
unsigned argCount; // Updatable arg count value
#if defined(DEBUG_NOT_OSX_ARM64_ABI)
unsigned nextSlotNum; // Updatable slot count value
#endif
unsigned nextStackByteOffset;
unsigned stkLevel; // Stack depth when we make this call (for x86)

#if defined(UNIX_X86_ABI)
bool alignmentDone; // Updateable flag, set to 'true' after we've done any required alignment.
Expand Down Expand Up @@ -1887,7 +1957,8 @@ class fgArgInfo
GenTreeCall::Use* use,
regNumber regNum,
unsigned numRegs,
unsigned alignment,
unsigned byteSize,
unsigned byteAlignment,
bool isStruct,
bool isVararg = false);

Expand All @@ -1897,7 +1968,8 @@ class fgArgInfo
GenTreeCall::Use* use,
regNumber regNum,
unsigned numRegs,
unsigned alignment,
unsigned byteSize,
unsigned byteAlignment,
const bool isStruct,
const bool isVararg,
const regNumber otherRegNum,
Expand All @@ -1910,7 +1982,8 @@ class fgArgInfo
GenTree* node,
GenTreeCall::Use* use,
unsigned numSlots,
unsigned alignment,
unsigned byteSize,
unsigned byteAlignment,
bool isStruct,
bool isVararg = false);

Expand All @@ -1936,10 +2009,19 @@ class fgArgInfo
{
return argTable;
}

#if defined(DEBUG_NOT_OSX_ARM64_ABI)
unsigned GetNextSlotNum() const
{
return nextSlotNum;
}
#endif

unsigned GetNextSlotByteOffset() const
{
return nextStackByteOffset;
}

bool HasRegArgs() const
{
return hasRegArgs;
Expand Down Expand Up @@ -3212,6 +3294,12 @@ class Compiler
unsigned lvaOutgoingArgSpaceVar; // dummy TYP_LCLBLK var for fixed outgoing argument space
PhasedVar<unsigned> lvaOutgoingArgSpaceSize; // size of fixed outgoing argument space
#endif // FEATURE_FIXED_OUT_ARGS

static unsigned GetOutgoingArgByteSize(unsigned sizeWithoutPadding)
{
return roundUp(sizeWithoutPadding, TARGET_POINTER_SIZE);
}

// Variable representing the return address. The helper-based tailcall
// mechanism passes the address of the return address to a runtime helper
// where it is used to detect tail-call chains.
Expand Down Expand Up @@ -5727,8 +5815,8 @@ class Compiler
SpecialCodeKind acdKind; // what kind of a special block is this?
#if !FEATURE_FIXED_OUT_ARGS
bool acdStkLvlInit; // has acdStkLvl value been already set?
unsigned acdStkLvl;
#endif // !FEATURE_FIXED_OUT_ARGS
unsigned acdStkLvl; // stack level in stack slots.
#endif // !FEATURE_FIXED_OUT_ARGS
};

private:
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/src/jit/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,20 @@
#define UNIX_AMD64_ABI_ONLY(x)
#endif // defined(UNIX_AMD64_ABI)

#if defined(DEBUG) && !defined(OSX_ARM64_ABI)
#define DEBUG_NOT_OSX_ARM64_ABI
#endif

#if defined(DEBUG_NOT_OSX_ARM64_ABI)
#define DEBUG_NOT_OSX_ARM64_ARG(x) , x
#define DEBUG_NOT_OSX_ARM64(x) x
#define DEBUG_NOT_OSX_ARM64_ASSERT(x) assert(x)
#else
#define DEBUG_NOT_OSX_ARM64_ARG(x)
#define DEBUG_NOT_OSX_ARM64(x)
#define DEBUG_NOT_OSX_ARM64_ASSERT(x)
#endif

#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64)
#define FEATURE_PUT_STRUCT_ARG_STK 1
#define PUT_STRUCT_ARG_STK_ONLY_ARG(x) , x
Expand Down
32 changes: 26 additions & 6 deletions src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1041,8 +1041,15 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
#endif // TARGET_ARM
}

unsigned slotNumber = info->GetByteOffset() / TARGET_POINTER_SIZE;
#if defined(FEATURE_PUT_STRUCT_ARG_STK)
unsigned numberOfStackSlots = info->GetStackSlotsNumber();
DEBUG_NOT_OSX_ARM64_ASSERT(numberOfStackSlots == info->numSlots);
#endif
DEBUG_NOT_OSX_ARM64_ASSERT(slotNumber == info->slotNum);

putArg = new (comp, GT_PUTARG_SPLIT)
GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs,
GenTreePutArgSplit(arg, slotNumber PUT_STRUCT_ARG_STK_ONLY_ARG(numberOfStackSlots), info->numRegs,
call->IsFastTailCall(), call);

// If struct argument is morphed to GT_FIELD_LIST node(s),
Expand Down Expand Up @@ -1126,12 +1133,16 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
// Mark this one as tail call arg if it is a fast tail call.
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef DEBUG
// Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce
// a result. So the type of its operand must be the correct type to push on the stack.
// For a FIELD_LIST, this will be the type of the field (not the type of the arg),
// but otherwise it is generally the type of the operand.
info->checkIsStruct();
#endif

if ((arg->OperGet() != GT_FIELD_LIST))
{
#if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
Expand All @@ -1145,10 +1156,16 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
assert(genActualType(arg->TypeGet()) == type);
}
}
unsigned slotNumber = info->GetByteOffset() / TARGET_POINTER_SIZE;
#if defined(FEATURE_PUT_STRUCT_ARG_STK)
unsigned numberOfStackSlots = info->GetStackSlotsNumber();
DEBUG_NOT_OSX_ARM64_ASSERT(numberOfStackSlots == info->numSlots);
#endif
DEBUG_NOT_OSX_ARM64_ASSERT(slotNumber == info->slotNum);

putArg =
new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg,
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
slotNumber PUT_STRUCT_ARG_STK_ONLY_ARG(numberOfStackSlots),
call->IsFastTailCall(), call);

#ifdef FEATURE_PUT_STRUCT_ARG_STK
Expand Down Expand Up @@ -2115,7 +2132,10 @@ GenTree* Lowering::LowerTailCallViaJitHelper(GenTreeCall* call, GenTree* callTar
// We need to figure out the size of the outgoing stack arguments, not including the special args.
// The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
// This number is exactly the next slot number in the call's argument info struct.
unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
unsigned nNewStkArgsBytes = call->fgArgInfo->GetNextSlotByteOffset();
const int wordSize = 4;
unsigned nNewStkArgsWords = nNewStkArgsBytes / wordSize;
DEBUG_NOT_OSX_ARM64_ASSERT(call->fgArgInfo->GetNextSlotNum() == nNewStkArgsWords);
assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
nNewStkArgsWords -= 4;

Expand Down Expand Up @@ -4167,7 +4187,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// On x86 targets, PInvoke calls need the size of the stack args in InlinedCallFrame.m_Datum.
// This is because the callee pops stack arguments, and we need to keep track of this during stack
// walking
const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE;
const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotByteOffset();
GenTree* stackBytes = comp->gtNewIconNode(numStkArgBytes, TYP_INT);
GenTreeCall::Use* args = comp->gtNewCallArgs(frameAddr, stackBytes);
#else
Expand Down Expand Up @@ -4201,9 +4221,9 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
{
#if !defined(TARGET_64BIT)
// On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum.
const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE;
const unsigned stackByteOffset = call->fgArgInfo->GetNextSlotByteOffset();

src = comp->gtNewIconNode(numStkArgBytes, TYP_INT);
src = comp->gtNewIconNode(stackByteOffset, TYP_INT);
#else
// On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum.
// If the stub parameter value is not needed, m_Datum will be initialized by the VM.
Expand Down
Loading

0 comments on commit 1ca1a42

Please sign in to comment.