Skip to content

Commit

Permalink
[SLP] Vectorize non-power-of-2 ops with padding.
Browse files Browse the repository at this point in the history
This patch introduces a new VectorizeWithPadding node type for root and
leave nodes to allow vectorizing loads/stores with non-power-of-2 number
of elements.

VectorizeWithPadding load nodes will pad the result to the next power of 2
with poison elements.

Non-leaf nodes will operate on normal power-of-2 vectors. For those
non-leaf nodes, we still track the number of padding elements needed to
go to the next power-of-2, to be used in various places, like cost
computation.

VectorizeWithPadding store nodes strip away the padding elements and
store the non-power-of-2 number of data elements.

Note that re-ordering and shuffling is not implemented for nodes
requiring padding yet to keep the initial implementation simpler.

The initial implementation also only tries to vectorize with padding if
original number of elements + 1 is a power-of-2, i.e. if only a single
padding element is needed.

The feature is guarded by a new flag, off by defaul for now.
  • Loading branch information
fhahn committed Jan 19, 2024
1 parent 5330daa commit 72a0396
Show file tree
Hide file tree
Showing 7 changed files with 686 additions and 350 deletions.
115 changes: 99 additions & 16 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));

static cl::opt<bool> VectorizeNonPowerOf2(
"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
cl::desc("Try to vectorize with non-power-of-2 with number of elements."));

// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
Expand Down Expand Up @@ -2733,6 +2737,9 @@ class BoUpSLP {
SmallVectorImpl<Value *> *OpScalars = nullptr,
SmallVectorImpl<Value *> *AltScalars = nullptr) const;

/// Return the number of padding lanes (containg poison) for this node.
bool isNonPowOf2Vec() const { return !isPowerOf2_32(Scalars.size()); }

#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dump() const {
Expand Down Expand Up @@ -2891,9 +2898,13 @@ class BoUpSLP {
ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
}

if (UserTreeIdx.UserTE)
if (UserTreeIdx.UserTE) {
Last->UserTreeIndices.push_back(UserTreeIdx);

if (!isPowerOf2_32(Last->Scalars.size())) {
assert((Last->ReorderIndices.empty()) &&
"Reodering isn't implemented for nodes with padding yet");
}
}
return Last;
}

Expand Down Expand Up @@ -2921,7 +2932,8 @@ class BoUpSLP {
/// and fills required data before actual scheduling of the instructions.
TreeEntry::EntryState getScalarsVectorizationState(
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const;
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps,
bool HasPadding) const;

/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
Expand Down Expand Up @@ -3881,6 +3893,9 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
Order.clear();
// Check the order of pointer operands or that all pointers are the same.
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
if (!Order.empty() && !isPowerOf2_32(VL.size()))
return LoadsState::Gather;

if (IsSorted || all_of(PointerOps, [&](Value *P) {
return arePointersCompatible(P, PointerOps.front(), TLI);
})) {
Expand Down Expand Up @@ -4568,6 +4583,10 @@ bool BoUpSLP::canReorderOperands(
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
ArrayRef<TreeEntry *> ReorderableGathers,
SmallVectorImpl<TreeEntry *> &GatherOps) {
// Reordering isn't implemented for nodes with padding yet.
if (UserTE->isNonPowOf2Vec())
return false;

for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
return OpData.first == I &&
Expand Down Expand Up @@ -4746,6 +4765,9 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
const auto &&AllowsReordering = [IgnoreReorder, &GathersToOrders](
const TreeEntry *TE) {
// Reordering for nodes with padding not implemented yet.
if (TE->isNonPowOf2Vec())
return false;
if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
(TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
(IgnoreReorder && TE->Idx == 0))
Expand Down Expand Up @@ -5233,7 +5255,8 @@ static bool isAlternateInstruction(const Instruction *I,

BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps,
bool HasPadding) const {
assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");

unsigned ShuffleOrOp =
Expand All @@ -5256,7 +5279,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
bool Reuse = !HasPadding && canReuseExtract(VL, VL0, CurrentOrder);
if (Reuse || !CurrentOrder.empty())
return TreeEntry::Vectorize;
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
Expand Down Expand Up @@ -5583,6 +5606,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (PWSz == VL.size()) {
ReuseShuffleIndicies.clear();
} else {
if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
"for nodes with padding.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return false;
}
NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
NonUniqueValueVL.append(PWSz - UniqueValues.size(),
UniqueValues.back());
Expand All @@ -5594,6 +5623,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return false;
}
if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported for "
"nodes with padding.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return false;
}
VL = UniqueValues;
}
return true;
Expand Down Expand Up @@ -5859,7 +5894,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
OrdersType CurrentOrder;
SmallVector<Value *> PointerOps;
TreeEntry::EntryState State = getScalarsVectorizationState(
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps,
UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec());
if (State == TreeEntry::NeedToGather) {
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
Expand Down Expand Up @@ -6955,7 +6991,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Constant::getAllOnesValue(Ty);
}

InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root,
bool WithPadding = false) {
if ((!Root && allConstant(VL)) || all_of(VL, UndefValue::classof))
return TTI::TCC_Free;
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
Expand All @@ -6966,7 +7003,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionsState S = getSameOpcode(VL, *R.TLI);
const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
unsigned MinVF = R.getMinVF(2 * Sz);
if (VL.size() > 2 &&
if (!WithPadding && VL.size() > 2 &&
((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
(InVectors.empty() &&
any_of(seq<unsigned>(0, VL.size() / MinVF),
Expand Down Expand Up @@ -7077,7 +7114,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
GatherCost -= ScalarsCost;
}
} else if (!Root && isSplat(VL)) {
} else if (!WithPadding && !Root && isSplat(VL)) {
// Found the broadcasting of the single scalar, calculate the cost as
// the broadcast.
const auto *It =
Expand Down Expand Up @@ -7640,8 +7677,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
CommonMask[Idx] = Mask[Idx] + VF;
}
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
Value *Root = nullptr) {
Cost += getBuildVectorCost(VL, Root);
Value *Root = nullptr, bool WithPadding = false) {
Cost += getBuildVectorCost(VL, Root, WithPadding);
if (!Root) {
// FIXME: Need to find a way to avoid use of getNullValue here.
SmallVector<Constant *> Vals;
Expand Down Expand Up @@ -9710,6 +9747,9 @@ BoUpSLP::isGatherShuffledEntry(
// No need to check for the topmost gather node.
if (TE == VectorizableTree.front().get())
return {};
// Gathering for nodes with padding is not implemented yet.
if (TE->isNonPowOf2Vec())
return {};
Mask.assign(VL.size(), PoisonMaskElem);
assert(TE->UserTreeIndices.size() == 1 &&
"Expected only single user of the gather node.");
Expand Down Expand Up @@ -10422,7 +10462,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
add(V1, NewMask);
}
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
Value *Root = nullptr) {
Value *Root = nullptr, bool WithPadding = false) {
return R.gather(VL, Root);
}
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
Expand Down Expand Up @@ -10491,7 +10531,6 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
SmallVector<int> Mask(E->ReorderIndices.begin(), E->ReorderIndices.end());
reorderScalars(VL, Mask);
}
const unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL, *TLI);
// Special processing for GEPs bundle, which may include non-gep values.
if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
Expand Down Expand Up @@ -10533,6 +10572,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
ShuffleBuilder.add(V, Mask);
return ShuffleBuilder.finalize(std::nullopt);
};
const unsigned VF = VL.size();
Value *V = vectorizeTree(VE, PostponedPHIs);
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
Expand Down Expand Up @@ -10659,6 +10699,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
return true;
};
BVTy ShuffleBuilder(Params...);
if (E->isNonPowOf2Vec()) {
Value *BV = ShuffleBuilder.gather(E->Scalars, 0, nullptr, true);
SmallVector<int> Mask(VF, PoisonMaskElem);
std::iota(Mask.begin(), Mask.begin() + E->Scalars.size(), 0);
ShuffleBuilder.add(BV, Mask);
return ShuffleBuilder.finalize(E->ReuseShuffleIndices);
}

ResTy Res = ResTy();
SmallVector<int> Mask;
SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
Expand Down Expand Up @@ -13422,8 +13470,13 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
const unsigned Sz = R.getVectorElementSize(Chain[0]);
unsigned VF = Chain.size();

if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
return false;
if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) {
// Check if vectorizing with a non-power-of-2 VF should be considered. At
// the moment, only consider cases where VF + 1 is a power-of-2, i.e. almost
// all vector lanes are used.
if (!VectorizeNonPowerOf2 || (VF < MinVF && VF + 1 != MinVF))
return false;
}

LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
<< "\n");
Expand Down Expand Up @@ -13519,9 +13572,39 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
<< "MinVF (" << MinVF << ")\n");
}

unsigned StartIdx = 0;
if (VectorizeNonPowerOf2) {
// Try vectorizing with a non-power-of-2 VF. At the moment, only
// consider cases where VF + 1 is a power-of-2, i.e. almost all vector
// lanes are used.
unsigned CandVF = Operands.size() + 1;
if (isPowerOf2_32(CandVF) && CandVF <= MaxVF) {
assert(
all_of(
Operands,
[&](Value *V) {
return cast<StoreInst>(V)->getValueOperand()->getType() ==
cast<StoreInst>(Operands.front())
->getValueOperand()
->getType();
}) &&
"Expected all operands of same type.");
if (!VectorizedStores.count(Operands.front()) &&
!VectorizedStores.count(Operands.back()) &&
TriedSequences
.insert(std::make_pair(Operands.front(), Operands.back()))
.second &&
vectorizeStoreChain(Operands, R, Operands.size(), MinVF)) {
// Mark the vectorized stores so that we don't vectorize them again.
VectorizedStores.insert(Operands.begin(), Operands.end());
Changed = true;
StartIdx += Operands.size();
}
}
}

// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?
unsigned StartIdx = 0;
for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
Expand Down
70 changes: 39 additions & 31 deletions llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll
Original file line number Diff line number Diff line change
@@ -1,35 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=NON-POW2 %s
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=POW2-ONLY %s

define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
; CHECK-LABEL: define void @v15_load_i8_mul_by_constant_store(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
; CHECK-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
; CHECK-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
; CHECK-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
; CHECK-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
; CHECK-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
; CHECK-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
; CHECK-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
; CHECK-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
; CHECK-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
; CHECK-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
; CHECK-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
; CHECK-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
; CHECK-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
; CHECK-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
; CHECK-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
; CHECK-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
; CHECK-NEXT: ret void
; NON-POW2-LABEL: define void @v15_load_i8_mul_by_constant_store(
; NON-POW2-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
; NON-POW2-NEXT: entry:
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4
; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1
; NON-POW2-NEXT: ret void
;
; POW2-ONLY-LABEL: define void @v15_load_i8_mul_by_constant_store(
; POW2-ONLY-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
; POW2-ONLY-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
; POW2-ONLY-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
; POW2-ONLY-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
; POW2-ONLY-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
; POW2-ONLY-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
; POW2-ONLY-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
; POW2-ONLY-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
; POW2-ONLY-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
; POW2-ONLY-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
; POW2-ONLY-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
; POW2-ONLY-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
; POW2-ONLY-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
; POW2-ONLY-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
; POW2-ONLY-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
; POW2-ONLY-NEXT: ret void
;
entry:
%gep.src.0 = getelementptr inbounds i8, ptr %src, i8 0
Expand Down Expand Up @@ -123,5 +133,3 @@ entry:

ret void
}


Loading

0 comments on commit 72a0396

Please sign in to comment.