Skip to content

Commit

Permalink
JIT: Handle primitive-sized remainders overlapping padding/promotions…
Browse files Browse the repository at this point in the history
… in physical promotion (#88109)

The remainder may be separated by a bit of padding or other promoted fields but
still fit into a primitive; in this case it is still beneficial to copy it all
as a primitive, instead of falling back to a full block copy.

Example:
```csharp
private S _s;

void Foo()
{
    S s = new();
    s.A = 10;
    s.D = 20;
    s.F = 30; // A, D, F gets promoted
    _s = s;
}

private struct S
{
    public byte A;
    public byte B;
    public byte C;
    public byte D;
    public byte E;
    public byte F;
}
```

```diff
 Processing block operation [000018] that involves replacements
   dst+003 <- V04 (V01.[003..004)) (last use)
   dst+005 <- V05 (V01.[005..006)) (last use)
   Block op remainder: [001..003) [004..005)
-  => Remainder strategy: retain a full block op
+  => Remainder strategy: int at +001
```

```diff
 ;  V00 this         [V00,T01] (  3,  3   )     ref  ->  rcx         this class-hnd single-def
 ;* V01 loc0         [V01    ] (  0,  0   )  struct ( 8) zero-ref    do-not-enreg[SF] ld-addr-op
 ;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+00H]   do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;* V03 tmp1         [V03    ] (  0,  0   )   ubyte  ->  zero-ref    "V01.[000..001)"
 ;* V04 tmp2         [V04    ] (  0,  0   )   ubyte  ->  zero-ref    "V01.[003..004)"
 ;* V05 tmp3         [V05    ] (  0,  0   )   ubyte  ->  zero-ref    "V01.[005..006)"
 ;  V06 tmp4         [V06,T00] (  5, 10   )   byref  ->  rcx         single-def "Spilling address for field-by-field copy"
 ;
 ; Lcl frame size = 0
 
 G_M52879_IG01:  ;; offset=0000H
 						;; size=0 bbWeight=1 PerfScore 0.00
 G_M52879_IG02:  ;; offset=0000H
        add      rcx, 8
        xor      eax, eax
-       mov      dword ptr [rcx], eax
-       mov      dword ptr [rcx+02H], eax
+       mov      dword ptr [rcx+01H], eax
        mov      byte  ptr [rcx], 10
        mov      byte  ptr [rcx+03H], 20
        mov      byte  ptr [rcx+05H], 30
-						;; size=22 bbWeight=1 PerfScore 5.50
-G_M52879_IG03:  ;; offset=0016H
+						;; size=20 bbWeight=1 PerfScore 4.50
+G_M52879_IG03:  ;; offset=0014H
        ret      
 						;; size=1 bbWeight=1 PerfScore 1.00
 
-; Total bytes of code 23, prolog size 0, PerfScore 8.80, instruction count 8, allocated bytes for code 23 (MethodHash=1da13170) for method Program:Foo():this (FullOpts)
+; Total bytes of code 21, prolog size 0, PerfScore 7.60, instruction count 7, allocated bytes for code 21 (MethodHash=1da13170) for method Program:Foo():this (FullOpts)
 ; ============================================================

```

We have to be careful, however, since the covering segment can now contain
promoted fields. If this happens we need to make sure we write the promoted
field _after_ the remainder.
  • Loading branch information
jakobbotsch authored Jul 4, 2023
1 parent e5be81a commit 210a7a5
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 86 deletions.
22 changes: 0 additions & 22 deletions src/coreclr/jit/promotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1626,28 +1626,6 @@ bool StructSegments::IsEmpty()
return m_segments.size() == 0;
}

//------------------------------------------------------------------------
// IsSingleSegment:
// Check if the segment tree contains only a single segment, and return
// it if so.
//
// Parameters:
// result - [out] The single segment. Only valid if the method returns true.
//
// Returns:
// True if so.
//
bool StructSegments::IsSingleSegment(Segment* result)
{
if (m_segments.size() == 1)
{
*result = m_segments[0];
return true;
}

return false;
}

//------------------------------------------------------------------------
// CoveringSegment:
// Compute a segment that covers all contained segments in this segment tree.
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/promotion.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ class StructSegments
void Add(const Segment& segment);
void Subtract(const Segment& segment);
bool IsEmpty();
bool IsSingleSegment(Segment* result);
bool CoveringSegment(Segment* result);

#ifdef DEBUG
Expand Down
213 changes: 150 additions & 63 deletions src/coreclr/jit/promotiondecomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ class DecompositionPlan

StructSegments::Segment segment;
// See if we can "plug the hole" with a single primitive.
if (remainder.IsSingleSegment(&segment))
if (remainder.CoveringSegment(&segment))
{
var_types primitiveType = TYP_UNDEF;
unsigned size = segment.End - segment.Start;
Expand Down Expand Up @@ -487,6 +487,13 @@ class DecompositionPlan
}
}

// We prefer to do the remainder at the end, if possible, since CQ
// analysis shows that this is best. However, handling the remainder
// may overwrite the destination with stale bits if the source has
// replacements (since handling the remainder copies from the struct,
// and the fresh values are usually in the replacement locals).
bool handleRemainderFirst = RemainderOverwritesDestinationWithStaleBits(remainderStrategy, dstDeaths);

GenTree* addr = nullptr;
target_ssize_t addrBaseOffs = 0;
FieldSeq* addrBaseOffsFldSeq = nullptr;
Expand Down Expand Up @@ -525,26 +532,29 @@ class DecompositionPlan

if (m_compiler->fgAddrCouldBeNull(addr))
{
switch (remainderStrategy.Type)
if (handleRemainderFirst)
{
needsNullCheck = (remainderStrategy.Type == RemainderStrategy::Primitive) &&
m_compiler->fgIsBigOffset(remainderStrategy.PrimitiveOffset);
}
else
{
case RemainderStrategy::NoRemainder:
case RemainderStrategy::Primitive:
needsNullCheck = true;
// See if our first indirection will subsume the null check (usual case).
for (int i = 0; i < m_entries.Height(); i++)
needsNullCheck = true;
// See if our first indirection will subsume the null check (usual case).
for (int i = 0; i < m_entries.Height(); i++)
{
if (CanSkipEntry(m_entries.BottomRef(i), dstDeaths, remainderStrategy))
{
if (CanSkipEntry(m_entries.BottomRef(i), dstDeaths, remainderStrategy))
{
continue;
}
const Entry& entry = m_entries.BottomRef(i);
assert((entry.FromReplacement == nullptr) || (entry.ToReplacement == nullptr));
needsNullCheck = m_compiler->fgIsBigOffset(entry.Offset);
break;
continue;
}
const Entry& entry = m_entries.BottomRef(i);
assert((entry.FromReplacement == nullptr) || (entry.ToReplacement == nullptr));
needsNullCheck = m_compiler->fgIsBigOffset(entry.Offset);
break;
}
}
}

if (needsNullCheck)
{
numAddrUses++;
Expand Down Expand Up @@ -606,26 +616,9 @@ class DecompositionPlan
statements->AddStatement(nullCheck);
}

if (remainderStrategy.Type == RemainderStrategy::FullBlock)
{
// We will reuse the existing block op. Rebase the address off of the new local we created.
if (m_src->OperIs(GT_BLK))
{
m_src->AsIndir()->Addr() = indirAccess->GrabAddress(0, m_compiler);
}
else if (m_store->OperIs(GT_STORE_BLK))
{
m_store->AsIndir()->Addr() = indirAccess->GrabAddress(0, m_compiler);
}
}

// If the source involves replacements then do the struct op first --
// we would overwrite the destination with stale bits if we did it last.
// If the source does not involve replacements then CQ analysis shows
// that it's best to do it last.
if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && m_srcInvolvesReplacements)
if (handleRemainderFirst)
{
statements->AddStatement(m_store);
CopyRemainder(storeAccess, srcAccess, remainderStrategy, statements);

if (m_src->OperIs(GT_LCL_VAR, GT_LCL_FLD))
{
Expand Down Expand Up @@ -693,34 +686,9 @@ class DecompositionPlan
statements->AddStatement(store);
}

if ((remainderStrategy.Type == RemainderStrategy::FullBlock) && !m_srcInvolvesReplacements)
{
statements->AddStatement(m_store);
}

if (remainderStrategy.Type == RemainderStrategy::Primitive)
if (!handleRemainderFirst)
{
var_types primitiveType = remainderStrategy.PrimitiveType;
// The remainder might match a regularly promoted field exactly. If
// it does then use the promoted field's type so we can create a
// direct access.
unsigned srcPromField = srcAccess.FindRegularlyPromotedField(remainderStrategy.PrimitiveOffset, m_compiler);
unsigned storePromField =
storeAccess.FindRegularlyPromotedField(remainderStrategy.PrimitiveOffset, m_compiler);

if ((srcPromField != BAD_VAR_NUM) || (storePromField != BAD_VAR_NUM))
{
var_types regPromFieldType =
m_compiler->lvaGetDesc(srcPromField != BAD_VAR_NUM ? srcPromField : storePromField)->TypeGet();
if (genTypeSize(regPromFieldType) == genTypeSize(primitiveType))
{
primitiveType = regPromFieldType;
}
}

GenTree* src = srcAccess.CreateRead(remainderStrategy.PrimitiveOffset, primitiveType, m_compiler);
GenTree* store = storeAccess.CreateStore(remainderStrategy.PrimitiveOffset, primitiveType, src, m_compiler);
statements->AddStatement(store);
CopyRemainder(storeAccess, srcAccess, remainderStrategy, statements);
}

INDEBUG(storeAccess.CheckFullyUsed());
Expand All @@ -730,7 +698,7 @@ class DecompositionPlan
//------------------------------------------------------------------------
// CanSkipEntry:
// Check if the specified entry can be skipped because it is writing to a
// death replacement or because the remainder would handle it anyway.
// dead replacement or because the remainder would handle it anyway.
//
// Parameters:
// entry - The init/copy entry
Expand Down Expand Up @@ -861,7 +829,71 @@ class DecompositionPlan
return addrNode->IsInvariant();
}

private:
//------------------------------------------------------------------------
// RemainderOverwritesDestinationWithStaleBits:
// Check if handling the remainder is going to write stale bits to the
// destination.
//
// Parameters:
// remainderStrategy - The remainder strategy
// dstDeaths - Destination liveness
//
// Returns:
// True if so.
//
// Remarks:
// We usually prefer to write the remainder last as CQ analysis shows
// that to be most beneficial. However, if we do that we may overwrite
// the destination with stale bits. This occurs if the source has
// replacements. Handling the remainder copies from the source struct
// local, but the up-to-date values may be in its replacement locals. So
// we must take care to write the replacement locals _after_ the
// remainder has been written.
//
bool RemainderOverwritesDestinationWithStaleBits(const RemainderStrategy& remainderStrategy,
const StructDeaths& dstDeaths)
{
if (!m_srcInvolvesReplacements)
{
return false;
}

switch (remainderStrategy.Type)
{
case RemainderStrategy::FullBlock:
return true;
case RemainderStrategy::Primitive:
for (int i = 0; i < m_entries.Height(); i++)
{
const Entry& entry = m_entries.BottomRef(i);
if (entry.Offset + genTypeSize(entry.Type) <= remainderStrategy.PrimitiveOffset)
{
// Entry ends before remainder starts
continue;
}

// Remainder ends before entry starts
if (remainderStrategy.PrimitiveOffset + genTypeSize(remainderStrategy.PrimitiveType) <=
entry.Offset)
{
continue;
}

// Are we even going to write the entry?
if (!CanSkipEntry(entry, dstDeaths, remainderStrategy))
{
// Yep, so we need to be careful.
return true;
}
}

// No entry overlaps.
return false;
default:
return false;
}
}

// Helper class to create derived accesses off of a location: either a
// local, or as indirections off of an address.
class LocationAccess
Expand Down Expand Up @@ -1080,6 +1112,61 @@ class DecompositionPlan
return m_indirFlags;
}
};

//------------------------------------------------------------------------
// CopyRemainder:
// Create IR to copy the remainder.
//
// Parameters:
// storeAccess - Helper class to create derived stores
// srcAccess - Helper class to create derived source accesses
// remainderStrategy - The strategy to generate IR for
// statements - List to add IR to.
//
void CopyRemainder(LocationAccess& storeAccess,
LocationAccess& srcAccess,
const RemainderStrategy& remainderStrategy,
DecompositionStatementList* statements)
{
if (remainderStrategy.Type == RemainderStrategy::FullBlock)
{
// We will reuse the existing block op. Rebase the address off of the new local we created.
if (m_src->OperIs(GT_BLK))
{
m_src->AsIndir()->Addr() = srcAccess.GrabAddress(0, m_compiler);
}
else if (m_store->OperIs(GT_STORE_BLK))
{
m_store->AsIndir()->Addr() = storeAccess.GrabAddress(0, m_compiler);
}

statements->AddStatement(m_store);
}
else if (remainderStrategy.Type == RemainderStrategy::Primitive)
{
var_types primitiveType = remainderStrategy.PrimitiveType;
// The remainder might match a regularly promoted field exactly. If
// it does then use the promoted field's type so we can create a
// direct access.
unsigned srcPromField = srcAccess.FindRegularlyPromotedField(remainderStrategy.PrimitiveOffset, m_compiler);
unsigned storePromField =
storeAccess.FindRegularlyPromotedField(remainderStrategy.PrimitiveOffset, m_compiler);

if ((srcPromField != BAD_VAR_NUM) || (storePromField != BAD_VAR_NUM))
{
var_types regPromFieldType =
m_compiler->lvaGetDesc(srcPromField != BAD_VAR_NUM ? srcPromField : storePromField)->TypeGet();
if (genTypeSize(regPromFieldType) == genTypeSize(primitiveType))
{
primitiveType = regPromFieldType;
}
}

GenTree* src = srcAccess.CreateRead(remainderStrategy.PrimitiveOffset, primitiveType, m_compiler);
GenTree* store = storeAccess.CreateStore(remainderStrategy.PrimitiveOffset, primitiveType, src, m_compiler);
statements->AddStatement(store);
}
}
};

//------------------------------------------------------------------------
Expand Down

0 comments on commit 210a7a5

Please sign in to comment.