Skip to content

Commit

Permalink
JIT: Support some assignment decomposition in physical promotion (#85105
Browse files Browse the repository at this point in the history
)

Add support for directly initializing and copying into replacements
instead of doing a struct local and read back. Physically promoted
struct locals used as sources are still handled conservatively (by first
writing them back to stack, then doing the copy).

For example, for a case like

```
void Foo()
{
    S s = _field;
    s.A = s.B + 3;
    Consume(s);
}

struct S
{
    public int A, B;
}
```

We see the following:

```
STMT00000 ( 0x000[E-] ... 0x006 )
               [000003] -A-XG------                         ▌  ASG       struct (copy)
               [000002] D------N---                         ├──▌  LCL_VAR   struct<Program+S, 8> V01 loc0
               [000001] ---XG------                         └──▌  FIELD     struct Program:_field
               [000000] -----------                            └──▌  LCL_VAR   ref    V00 this          (last use)
Processing block operation [000003] that involves replacements
New statement:
STMT00000 ( 0x000[E-] ... 0x006 )
               [000029] -A-XG------                         ▌  COMMA     int
               [000021] -A-XG------                         ├──▌  ASG       int
               [000015] D------N---                         │  ├──▌  LCL_VAR   int    V03 tmp1
               [000020] ---XG------                         │  └──▌  IND       int
               [000018] -----------                         │     └──▌  ADD       ref
               [000016] -----------                         │        ├──▌  LCL_VAR   ref    V00 this
               [000017] -----------                         │        └──▌  CNS_INT   long   8
               [000028] -A-XG------                         └──▌  ASG       int
               [000022] D------N---                            ├──▌  LCL_VAR   int    V04 tmp2
               [000027] ---XG------                            └──▌  IND       int
               [000025] -----------                               └──▌  ADD       ref
               [000023] -----------                                  ├──▌  LCL_VAR   ref    V00 this
               [000024] -----------                                  └──▌  CNS_INT   long   12
```

The logic is currently quite rudimentary when it comes to
holes/uncovered parts of the struct. For example, in the above case if
we add another unused field at the end of S then the result is:

```
STMT00000 ( 0x000[E-] ... 0x006 )
               [000003] -A-XG------                         ▌  ASG       struct (copy)
               [000002] D------N---                         ├──▌  LCL_VAR   struct<Program+S, 12> V01 loc0
               [000001] ---XG------                         └──▌  FIELD     struct Program:_field
               [000000] -----------                            └──▌  LCL_VAR   ref    V00 this          (last use)
Processing block operation [000003] that involves replacements
Struct operation is not fully covered by replaced fields. Keeping struct operation.
New statement:
STMT00000 ( 0x000[E-] ... 0x006 )
               [000030] -A-XG------                         ▌  COMMA     struct
               [000021] -A-XG------                         ├──▌  ASG       int
               [000015] D------N---                         │  ├──▌  LCL_VAR   int    V03 tmp1
               [000020] ---XG------                         │  └──▌  IND       int
               [000018] -----------                         │     └──▌  ADD       ref
               [000016] -----------                         │        ├──▌  LCL_VAR   ref    V00 this
               [000017] -----------                         │        └──▌  CNS_INT   long   8
               [000029] -A-XG------                         └──▌  COMMA     struct
               [000028] -A-XG------                            ├──▌  ASG       int
               [000022] D------N---                            │  ├──▌  LCL_VAR   int    V04 tmp2
               [000027] ---XG------                            │  └──▌  IND       int
               [000025] -----------                            │     └──▌  ADD       ref
               [000023] -----------                            │        ├──▌  LCL_VAR   ref    V00 this
               [000024] -----------                            │        └──▌  CNS_INT   long   12
               [000003] -A-XG------                            └──▌  ASG       struct (copy)
               [000002] D------N---                               ├──▌  LCL_VAR   struct<Program+S, 12> V01 loc0
               [000001] ---XG------                               └──▌  FIELD     struct Program:_field
               [000000] -----------                                  └──▌  LCL_VAR   ref    V00 this
```

In this case it would be significantly more efficient to copy only the
remainder, which is just a small part of the struct. However, in the
general case it is not easy to predict the most efficient way to do
this, and in some cases we cannot even represent the hole in JIT IR (if
it involves GC pointers), so I have left this for a future change for
now. Liveness should also be beneficial for that as there are many cases
where we would expect the remainder to be dead.
  • Loading branch information
jakobbotsch authored Apr 25, 2023
1 parent 6abedb8 commit 2e0033c
Show file tree
Hide file tree
Showing 9 changed files with 879 additions and 49 deletions.
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,7 @@ class LclVarDsc
unsigned short lvRefCnt(RefCountState state = RCS_NORMAL) const;
void incLvRefCnt(unsigned short delta, RefCountState state = RCS_NORMAL);
void setLvRefCnt(unsigned short newValue, RefCountState state = RCS_NORMAL);
void incLvRefCntSaturating(unsigned short delta, RefCountState state = RCS_NORMAL);

weight_t lvRefCntWtd(RefCountState state = RCS_NORMAL) const;
void incLvRefCntWtd(weight_t delta, RefCountState state = RCS_NORMAL);
Expand Down Expand Up @@ -2944,6 +2945,7 @@ class Compiler
static bool gtHasRef(GenTree* tree, unsigned lclNum);

bool gtHasLocalsWithAddrOp(GenTree* tree);
bool gtHasAddressExposedLocals(GenTree* tree);

unsigned gtSetCallArgsOrder(CallArgs* args, bool lateArgs, int* callCostEx, int* callCostSz);
unsigned gtSetMultiOpOrder(GenTreeMultiOp* multiOp);
Expand Down
22 changes: 20 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4361,10 +4361,9 @@ inline unsigned short LclVarDsc::lvRefCnt(RefCountState state) const
// Notes:
// It is currently the caller's responsibility to ensure this increment
// will not cause overflow.

//
inline void LclVarDsc::incLvRefCnt(unsigned short delta, RefCountState state)
{

#if defined(DEBUG)
assert(state != RCS_INVALID);
Compiler* compiler = JitTls::GetCompiler();
Expand All @@ -4376,6 +4375,25 @@ inline void LclVarDsc::incLvRefCnt(unsigned short delta, RefCountState state)
assert(m_lvRefCnt >= oldRefCnt);
}

//------------------------------------------------------------------------------
// incLvRefCntSaturating: increment reference count for this local var (with saturating semantics)
//
// Arguments:
// delta: the amount of the increment
// state: the requestor's expected ref count state; defaults to RCS_NORMAL
//
inline void LclVarDsc::incLvRefCntSaturating(unsigned short delta, RefCountState state)
{
#if defined(DEBUG)
assert(state != RCS_INVALID);
Compiler* compiler = JitTls::GetCompiler();
assert(compiler->lvaRefCountState == state);
#endif

int newRefCnt = m_lvRefCnt + delta;
m_lvRefCnt = static_cast<unsigned short>(min(USHRT_MAX, newRefCnt));
}

//------------------------------------------------------------------------------
// setLvRefCnt: set the reference count for this local var
//
Expand Down
59 changes: 53 additions & 6 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2994,8 +2994,6 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTree* tree)
DoLclVarsOnly = true,
};

bool HasAddrTakenLocal = false;

LocalsWithAddrOpVisitor(Compiler* comp) : GenTreeVisitor(comp)
{
}
Expand All @@ -3005,7 +3003,6 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTree* tree)
LclVarDsc* varDsc = m_compiler->lvaGetDesc((*use)->AsLclVarCommon());
if (varDsc->lvHasLdAddrOp || varDsc->IsAddressExposed())
{
HasAddrTakenLocal = true;
return WALK_ABORT;
}

Expand All @@ -3014,8 +3011,48 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTree* tree)
};

LocalsWithAddrOpVisitor visitor(this);
visitor.WalkTree(&tree, nullptr);
return visitor.HasAddrTakenLocal;
return visitor.WalkTree(&tree, nullptr) == WALK_ABORT;
}

//------------------------------------------------------------------------------
// gtHasAddressExposedLocal:
// Check if this tree contains locals with IsAddressExposed() flags set. Does
// a full tree walk.
//
// Paramters:
// tree - the tree
//
// Return Value:
// True if any sub tree is such a local.
//
bool Compiler::gtHasAddressExposedLocals(GenTree* tree)
{
struct Visitor : GenTreeVisitor<Visitor>
{
enum
{
DoPreOrder = true,
DoLclVarsOnly = true,
};

Visitor(Compiler* comp) : GenTreeVisitor(comp)
{
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
LclVarDsc* varDsc = m_compiler->lvaGetDesc((*use)->AsLclVarCommon());
if (varDsc->IsAddressExposed())
{
return WALK_ABORT;
}

return WALK_CONTINUE;
}
};

Visitor visitor(this);
return visitor.WalkTree(&tree, nullptr) == WALK_ABORT;
}

#ifdef DEBUG
Expand Down Expand Up @@ -16329,7 +16366,17 @@ bool Compiler::gtSplitTree(

bool IsValue(const UseInfo& useInf)
{
GenTree* node = (*useInf.Use)->gtEffectiveVal();
GenTree* node = *useInf.Use;

// Some places create void-typed commas that wrap actual values
// (e.g. VN-based dead store removal), so we need the double check
// here.
if (!node->IsValue())
{
return false;
}

node = node->gtEffectiveVal();
if (!node->IsValue())
{
return false;
Expand Down
5 changes: 1 addition & 4 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1548,10 +1548,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>

// Note we don't need accurate counts when the values are large.
//
if (varDsc->lvRefCnt(RCS_EARLY) < USHRT_MAX)
{
varDsc->incLvRefCnt(1, RCS_EARLY);
}
varDsc->incLvRefCntSaturating(1, RCS_EARLY);

if (!m_compiler->lvaIsImplicitByRefLocal(lclNum))
{
Expand Down
Loading

0 comments on commit 2e0033c

Please sign in to comment.