Skip to content

Commit

Permalink
JIT: Cache significant segments computations for layouts in physical …
Browse files Browse the repository at this point in the history
…promotion (#87745)

Before:
JitEnablePhysicalPromotion=1
benchmarks.run_pgo:
Total num SignificantSegments calls: 2543
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>     312 count ( 37% of total)
      2 ..       2 ===>      58 count ( 44% of total)
      3 ..       3 ===>     181 count ( 66% of total)
      4 ..       5 ===>     191 count ( 90% of total)
      6 ..      10 ===>      66 count ( 98% of total)
     11 ..      20 ===>      12 count ( 99% of total)
     21 ..      35 ===>       4 count (100% of total)
     36 ..      50 ===>       0 count (100% of total)
     51 ..      75 ===>       0 count (100% of total)
     76 ..     100 ===>       0 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

libraries.pmi:
Total num SignificantSegments calls: 28525
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>    1154 count ( 19% of total)
      2 ..       2 ===>    1878 count ( 50% of total)
      3 ..       3 ===>     809 count ( 64% of total)
      4 ..       5 ===>     782 count ( 77% of total)
      6 ..      10 ===>     831 count ( 91% of total)
     11 ..      20 ===>     357 count ( 97% of total)
     21 ..      35 ===>     101 count ( 98% of total)
     36 ..      50 ===>      29 count ( 99% of total)
     51 ..      75 ===>      29 count ( 99% of total)
     76 ..     100 ===>       8 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION
benchmarks.run_pgo:
Total num SignificantSegments calls: 90839
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>     681 count (  7% of total)
      2 ..       2 ===>    1035 count ( 17% of total)
      3 ..       3 ===>    1635 count ( 34% of total)
      4 ..       5 ===>    1053 count ( 45% of total)
      6 ..      10 ===>    3162 count ( 78% of total)
     11 ..      20 ===>     814 count ( 87% of total)
     21 ..      35 ===>     931 count ( 96% of total)
     36 ..      50 ===>     215 count ( 99% of total)
     51 ..      75 ===>      69 count ( 99% of total)
     76 ..     100 ===>       4 count ( 99% of total)
    101 ..     150 ===>       4 count ( 99% of total)
    151 ..     250 ===>       0 count ( 99% of total)
    251 ..     500 ===>      11 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

libraries.pmi:
Total num SignificantSegments calls: 277708
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>    6993 count ( 17% of total)
      2 ..       2 ===>    8197 count ( 38% of total)
      3 ..       3 ===>    5225 count ( 51% of total)
      4 ..       5 ===>    5380 count ( 65% of total)
      6 ..      10 ===>    7141 count ( 83% of total)
     11 ..      20 ===>    4094 count ( 93% of total)
     21 ..      35 ===>    1627 count ( 97% of total)
     36 ..      50 ===>     519 count ( 98% of total)
     51 ..      75 ===>     292 count ( 99% of total)
     76 ..     100 ===>      98 count ( 99% of total)
    101 ..     150 ===>      59 count ( 99% of total)
    151 ..     250 ===>      11 count ( 99% of total)
    251 ..     500 ===>       4 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

After:
benchmarks.run_pgo:
Total num SignificantSegments calls: 915
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>     741 count ( 89% of total)
      2 ..       2 ===>      75 count ( 99% of total)
      3 ..       3 ===>       8 count (100% of total)
      4 ..       5 ===>       0 count (100% of total)
      6 ..      10 ===>       0 count (100% of total)
     11 ..      20 ===>       0 count (100% of total)
     21 ..      35 ===>       0 count (100% of total)
     36 ..      50 ===>       0 count (100% of total)
     51 ..      75 ===>       0 count (100% of total)
     76 ..     100 ===>       0 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

libraries.pmi:
Total num SignificantSegments calls: 9061
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>    4172 count ( 69% of total)
      2 ..       2 ===>    1105 count ( 88% of total)
      3 ..       3 ===>     416 count ( 95% of total)
      4 ..       5 ===>     214 count ( 98% of total)
      6 ..      10 ===>      67 count ( 99% of total)
     11 ..      20 ===>       3 count ( 99% of total)
     21 ..      35 ===>       1 count (100% of total)
     36 ..      50 ===>       0 count (100% of total)
     51 ..      75 ===>       0 count (100% of total)
     76 ..     100 ===>       0 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION
benchmarks.run_pgo:
Total num SignificantSegments calls: 15082
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>    6709 count ( 69% of total)
      2 ..       2 ===>    1735 count ( 87% of total)
      3 ..       3 ===>     462 count ( 92% of total)
      4 ..       5 ===>     471 count ( 97% of total)
      6 ..      10 ===>     237 count (100% of total)
     11 ..      20 ===>       0 count (100% of total)
     21 ..      35 ===>       0 count (100% of total)
     36 ..      50 ===>       0 count (100% of total)
     51 ..      75 ===>       0 count (100% of total)
     76 ..     100 ===>       0 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)

libraries.pmi:
Total uncached SignificantSegments calls: 68938
(Per context) SignificantSegments calls
     <=          0 ===>       0 count (  0% of total)
      1 ..       1 ===>   24323 count ( 61% of total)
      2 ..       2 ===>    8687 count ( 83% of total)
      3 ..       3 ===>    3357 count ( 91% of total)
      4 ..       5 ===>    2378 count ( 97% of total)
      6 ..      10 ===>     789 count ( 99% of total)
     11 ..      20 ===>     105 count ( 99% of total)
     21 ..      35 ===>       1 count (100% of total)
     36 ..      50 ===>       0 count (100% of total)
     51 ..      75 ===>       0 count (100% of total)
     76 ..     100 ===>       0 count (100% of total)
    101 ..     150 ===>       0 count (100% of total)
    151 ..     250 ===>       0 count (100% of total)
    251 ..     500 ===>       0 count (100% of total)
    501 ..    1000 ===>       0 count (100% of total)
  • Loading branch information
jakobbotsch authored Jun 19, 2023
1 parent 2a3f5be commit e834d42
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 96 deletions.
27 changes: 10 additions & 17 deletions src/coreclr/jit/jitstd/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,36 +305,29 @@ template <typename T, typename Allocator>
template <typename Alt, typename AltAllocator>
vector<T, Allocator>::vector(const vector<Alt, AltAllocator>& vec)
: m_allocator(vec.m_allocator)
, m_pArray(NULL)
, m_nSize(0)
, m_nCapacity(0)
, m_pArray(m_allocator.allocate(vec.m_nSize))
, m_nSize(vec.m_nSize)
, m_nCapacity(vec.m_nSize)
{
ensure_capacity(vec.m_nSize);
for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
for (size_type i = 0; i < vec.m_nSize; ++i)
{
new (m_pArray + i, placement_t()) T((T) vec.m_pArray[j]);
new (m_pArray + i, placement_t()) T((T) vec.m_pArray[i]);
}

m_nSize = vec.m_nSize;
}

template <typename T, typename Allocator>
vector<T, Allocator>::vector(const vector<T, Allocator>& vec)
: m_allocator(vec.m_allocator)
, m_pArray(NULL)
, m_nSize(0)
, m_nCapacity(0)
, m_pArray(m_allocator.allocate(vec.m_nSize))
, m_nSize(vec.m_nSize)
, m_nCapacity(vec.m_nSize)
{
ensure_capacity(vec.m_nSize);
for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
for (size_type i = 0; i < vec.m_nSize; ++i)
{
new (m_pArray + i, placement_t()) T(vec.m_pArray[j]);
new (m_pArray + i, placement_t()) T(vec.m_pArray[i]);
}

m_nSize = vec.m_nSize;
}


template <typename T, typename Allocator>
vector<T, Allocator>::~vector()
{
Expand Down
75 changes: 15 additions & 60 deletions src/coreclr/jit/promotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ class LocalsUseVisitor : public GenTreeVisitor<LocalsUseVisitor>

JITDUMP("Computing unpromoted remainder for V%02u\n", agg->LclNum);
StructSegments unpromotedParts =
Promotion::SignificantSegments(m_compiler, m_compiler->lvaGetDesc(agg->LclNum)->GetLayout());
m_prom->SignificantSegments(m_compiler->lvaGetDesc(agg->LclNum)->GetLayout());
for (Replacement& rep : reps)
{
unpromotedParts.Subtract(StructSegments::Segment(rep.Offset, rep.Offset + genTypeSize(rep.AccessType)));
Expand Down Expand Up @@ -1576,42 +1576,6 @@ bool StructSegments::CoveringSegment(Segment* result)
}

#ifdef DEBUG
//------------------------------------------------------------------------
// Check:
// Validate that the data structure is normalized and that it equals a
// specific fixed bit vector.
//
// Parameters:
// vect - The bit vector
//
// Remarks:
// This validates that the internal representation is normalized (i.e.
// all adjacent intervals are merged) and that it contains an index iff
// the specified vector contains that index.
//
void StructSegments::Check(FixedBitVect* vect)
{
bool first = true;
unsigned last = 0;
for (const Segment& segment : m_segments)
{
assert(first || (last < segment.Start));
assert(segment.End <= vect->bitVectGetSize());

for (unsigned i = last; i < segment.Start; i++)
assert(!vect->bitVectTest(i));

for (unsigned i = segment.Start; i < segment.End; i++)
assert(vect->bitVectTest(i));

first = false;
last = segment.End;
}

for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++)
assert(!vect->bitVectTest(i));
}

//------------------------------------------------------------------------
// Dump:
// Dump a string representation of the segment tree to stdout.
Expand Down Expand Up @@ -1640,18 +1604,20 @@ void StructSegments::Dump()
// for the specified class layout.
//
// Parameters:
// compiler - Compiler instance
// layout - The layout
// bitVectRept - In debug, a bit vector that represents the same segments as the returned segment tree.
// Used for verification purposes.
//
// Returns:
// Segment tree containing all significant parts of the layout.
//
StructSegments Promotion::SignificantSegments(Compiler* compiler,
ClassLayout* layout DEBUGARG(FixedBitVect** bitVectRepr))
StructSegments Promotion::SignificantSegments(ClassLayout* layout)
{
COMP_HANDLE compHnd = compiler->info.compCompHnd;
StructSegments* cached;
if ((m_significantSegmentsCache != nullptr) && m_significantSegmentsCache->Lookup(layout, &cached))
{
return StructSegments(*cached);
}

COMP_HANDLE compHnd = m_compiler->info.compCompHnd;

bool significantPadding;
if (layout->IsBlockLayout())
Expand Down Expand Up @@ -1683,19 +1649,11 @@ StructSegments Promotion::SignificantSegments(Compiler* compiler,
}
}

StructSegments segments(compiler->getAllocator(CMK_Promotion));

// Validate with "obviously correct" but less scalable fixed bit vector implementation.
INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(layout->GetSize(), compiler));
StructSegments segments(m_compiler->getAllocator(CMK_Promotion));

if (significantPadding)
{
segments.Add(StructSegments::Segment(0, layout->GetSize()));

#ifdef DEBUG
for (unsigned i = 0; i < layout->GetSize(); i++)
segmentBitVect->bitVectSet(i);
#endif
}
else
{
Expand All @@ -1720,19 +1678,16 @@ StructSegments Promotion::SignificantSegments(Compiler* compiler,
}

segments.Add(StructSegments::Segment(fldOffset, fldOffset + size));
#ifdef DEBUG
for (unsigned i = 0; i < size; i++)
segmentBitVect->bitVectSet(fldOffset + i);
#endif
}
}

#ifdef DEBUG
if (bitVectRepr != nullptr)
if (m_significantSegmentsCache == nullptr)
{
*bitVectRepr = segmentBitVect;
m_significantSegmentsCache =
new (m_compiler, CMK_Promotion) ClassLayoutStructSegmentsMap(m_compiler->getAllocator(CMK_Promotion));
}
#endif

m_significantSegmentsCache->Set(layout, new (m_compiler, CMK_Promotion) StructSegments(segments));

return segments;
}
Expand Down
12 changes: 7 additions & 5 deletions src/coreclr/jit/promotion.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ class StructSegments
bool CoveringSegment(Segment* result);

#ifdef DEBUG
void Check(FixedBitVect* vect);
void Dump();
#endif
};
Expand All @@ -104,9 +103,12 @@ struct AggregateInfo
Replacement** endReplacement);
};

typedef JitHashTable<ClassLayout*, JitPtrKeyFuncs<ClassLayout>, class StructSegments*> ClassLayoutStructSegmentsMap;

class Promotion
{
Compiler* m_compiler;
Compiler* m_compiler;
ClassLayoutStructSegmentsMap* m_significantSegmentsCache = nullptr;

friend class LocalUses;
friend class LocalsUseVisitor;
Expand All @@ -116,8 +118,7 @@ class Promotion
friend class DecompositionPlan;
friend class StructSegments;

static StructSegments SignificantSegments(Compiler* compiler,
ClassLayout* layout DEBUGARG(FixedBitVect** bitVectRepr = nullptr));
StructSegments SignificantSegments(ClassLayout* layout);

void ExplicitlyZeroInitReplacementLocals(unsigned lclNum,
const jitstd::vector<Replacement>& replacements,
Expand Down Expand Up @@ -252,6 +253,7 @@ class ReplaceVisitor : public GenTreeVisitor<ReplaceVisitor>
{
friend class DecompositionPlan;

Promotion* m_promotion;
jitstd::vector<AggregateInfo*>& m_aggregates;
PromotionLiveness* m_liveness;
bool m_madeChanges = false;
Expand All @@ -269,7 +271,7 @@ class ReplaceVisitor : public GenTreeVisitor<ReplaceVisitor>
};

ReplaceVisitor(Promotion* prom, jitstd::vector<AggregateInfo*>& aggregates, PromotionLiveness* liveness)
: GenTreeVisitor(prom->m_compiler), m_aggregates(aggregates), m_liveness(liveness)
: GenTreeVisitor(prom->m_compiler), m_promotion(prom), m_aggregates(aggregates), m_liveness(liveness)
{
}

Expand Down
21 changes: 7 additions & 14 deletions src/coreclr/jit/promotiondecomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class DecompositionPlan
var_types Type;
};

Promotion* m_promotion;
Compiler* m_compiler;
ReplaceVisitor* m_replacer;
jitstd::vector<AggregateInfo*>& m_aggregates;
Expand All @@ -61,23 +62,24 @@ class DecompositionPlan
bool m_hasNonRemainderUseOfStructLocal = false;

public:
DecompositionPlan(Compiler* comp,
DecompositionPlan(Promotion* prom,
ReplaceVisitor* replacer,
jitstd::vector<AggregateInfo*>& aggregates,
PromotionLiveness* liveness,
GenTree* store,
GenTree* src,
bool dstInvolvesReplacements,
bool srcInvolvesReplacements)
: m_compiler(comp)
: m_promotion(prom)
, m_compiler(prom->m_compiler)
, m_replacer(replacer)
, m_aggregates(aggregates)
, m_liveness(liveness)
, m_store(store)
, m_src(src)
, m_dstInvolvesReplacements(dstInvolvesReplacements)
, m_srcInvolvesReplacements(srcInvolvesReplacements)
, m_entries(comp->getAllocator(CMK_Promotion))
, m_entries(prom->m_compiler->getAllocator(CMK_Promotion))
{
}

Expand Down Expand Up @@ -274,25 +276,16 @@ class DecompositionPlan
{
ClassLayout* dstLayout = m_store->GetLayout(m_compiler);

// Validate with "obviously correct" but less scalable fixed bit vector implementation.
INDEBUG(FixedBitVect * segmentBitVect);
StructSegments segments = Promotion::SignificantSegments(m_compiler, dstLayout DEBUGARG(&segmentBitVect));
StructSegments segments = m_promotion->SignificantSegments(dstLayout);

for (int i = 0; i < m_entries.Height(); i++)
{
const Entry& entry = m_entries.BottomRef(i);

segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type)));

#ifdef DEBUG
for (unsigned i = 0; i < genTypeSize(entry.Type); i++)
segmentBitVect->bitVectClear(entry.Offset + i);
#endif
}

#ifdef DEBUG
segments.Check(segmentBitVect);

if (m_compiler->verbose)
{
printf(" Remainder: ");
Expand Down Expand Up @@ -1084,7 +1077,7 @@ void ReplaceVisitor::HandleStructStore(GenTree** use, GenTree* user)
DecompositionStatementList result;
EliminateCommasInBlockOp(store, &result);

DecompositionPlan plan(m_compiler, this, m_aggregates, m_liveness, store, src, dstInvolvesReplacements,
DecompositionPlan plan(m_promotion, this, m_aggregates, m_liveness, store, src, dstInvolvesReplacements,
srcInvolvesReplacements);

if (dstInvolvesReplacements)
Expand Down

0 comments on commit e834d42

Please sign in to comment.