diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 74471cf6f222fb..9f477aa81b0c0f 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -270,7 +270,7 @@ class fir_AllocatableOp traits = []> : bool hasLenParams() { return bool{getAttr(lenpName())}; } unsigned numLenParams() { - if (auto val = getAttrOfType(lenpName())) + if (auto val = (*this)->getAttrOfType(lenpName())) return val.getInt(); return 0; } @@ -291,7 +291,7 @@ class fir_AllocatableOp traits = []> : /// Get the input type of the allocation mlir::Type getInType() { - return getAttrOfType(inType()).getValue(); + return (*this)->getAttrOfType(inType()).getValue(); } }]; @@ -567,7 +567,7 @@ class fir_SwitchTerminatorOp traits = []> : // The number of destination conditions that may be tested unsigned getNumConditions() { - return getAttrOfType(getCasesAttr()).size(); + return (*this)->getAttrOfType(getCasesAttr()).size(); } // The selector is the value being tested to determine the destination @@ -577,7 +577,7 @@ class fir_SwitchTerminatorOp traits = []> : } // The number of blocks that may be branched to - unsigned getNumDest() { return getOperation()->getNumSuccessors(); } + unsigned getNumDest() { return (*this)->getNumSuccessors(); } llvm::Optional getCompareOperands(unsigned cond); llvm::Optional> getCompareOperands( @@ -1561,11 +1561,11 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> { let parser = "return parseCoordinateOp(parser, result);"; let printer = [{ - p << getOperationName() << ' ' << getOperation()->getOperands(); + p << getOperationName() << ' ' << (*this)->getOperands(); p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{baseType()}); p << " : "; - p.printFunctionalType(getOperation()->getOperandTypes(), - getOperation()->getResultTypes()); + p.printFunctionalType((*this)->getOperandTypes(), + (*this)->getResultTypes()); }]; let verifier = [{ @@ -1940,9 +1940,9 @@ def fir_LoopOp : region_Op<"do_loop", return getOperands().drop_front(getNumControlOperands()); } - void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } - void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } - void setStep(Value step) { getOperation()->setOperand(2, step); } + void setLowerBound(Value bound) { (*this)->setOperand(0, bound); } + void setUpperBound(Value bound) { (*this)->setOperand(1, bound); } + void setStep(Value step) { (*this)->setOperand(2, step); } /// Number of region arguments for loop-carried values unsigned getNumRegionIterArgs() { @@ -1952,18 +1952,18 @@ def fir_LoopOp : region_Op<"do_loop", unsigned getNumControlOperands() { return 3; } /// Does the operation hold operands for loop-carried values bool hasIterOperands() { - return getOperation()->getNumOperands() > getNumControlOperands(); + return (*this)->getNumOperands() > getNumControlOperands(); } /// Get Number of loop-carried values unsigned getNumIterOperands() { - return getOperation()->getNumOperands() - getNumControlOperands(); + return (*this)->getNumOperands() - getNumControlOperands(); } /// Get the body of the loop mlir::Block *getBody() { return ®ion().front(); } void setUnordered() { - getOperation()->setAttr(unorderedAttrName(), + (*this)->setAttr(unorderedAttrName(), mlir::UnitAttr::get(getContext())); } }]; @@ -2062,9 +2062,9 @@ def fir_IterWhileOp : region_Op<"iterate_while", return getOperands().drop_front(getNumControlOperands()); } - void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } - void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } - void setStep(mlir::Value step) { getOperation()->setOperand(2, step); } + void setLowerBound(Value bound) { (*this)->setOperand(0, bound); } + void setUpperBound(Value bound) { (*this)->setOperand(1, bound); } + void setStep(mlir::Value step) { (*this)->setOperand(2, step); } /// Number of region arguments for loop-carried values unsigned getNumRegionIterArgs() { @@ -2074,11 +2074,11 @@ def fir_IterWhileOp : region_Op<"iterate_while", unsigned getNumControlOperands() { return 3; } /// Does the operation hold operands for loop-carried values bool hasIterOperands() { - return getOperation()->getNumOperands() > getNumControlOperands(); + return (*this)->getNumOperands() > getNumControlOperands(); } /// Get Number of loop-carried values unsigned getNumIterOperands() { - return getOperation()->getNumOperands() - getNumControlOperands(); + return (*this)->getNumOperands() - getNumControlOperands(); } }]; } @@ -2705,7 +2705,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { p << " : "; p.printType(getType()); if (hasInitializationBody()) - p.printRegion(getOperation()->getRegion(0), /*printEntryBlockArgs=*/false, + p.printRegion((*this)->getRegion(0), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/true); }]; @@ -2754,7 +2754,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { void appendInitialValue(mlir::Operation *op); /// A GlobalOp has one region. - mlir::Region &getRegion() { return getOperation()->getRegion(0); } + mlir::Region &getRegion() { return (*this)->getRegion(0); } /// A GlobalOp has one block. mlir::Block &getBlock() { return getRegion().front(); } @@ -2763,7 +2763,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { static mlir::ParseResult verifyValidLinkage(StringRef linkage); bool hasInitializationBody() { - return (getOperation()->getNumRegions() == 1) && !getRegion().empty() && + return ((*this)->getNumRegions() == 1) && !getRegion().empty() && !isa(getBlock().front()); } @@ -2869,7 +2869,7 @@ def fir_DispatchTableOp : fir_Op<"dispatch_table", mlir::SymbolTable::getSymbolAttrName()).getValue(); p << getOperationName() << " @" << tableName; - Region &body = getOperation()->getRegion(0); + Region &body = (*this)->getRegion(0); if (!body.empty()) p.printRegion(body, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/false); @@ -2900,7 +2900,7 @@ def fir_DispatchTableOp : fir_Op<"dispatch_table", void appendTableEntry(mlir::Operation *op); mlir::Region &getRegion() { - return this->getOperation()->getRegion(0); + return (*this)->getRegion(0); } mlir::Block &getBlock() { diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6724a4019030d8..b29eb589e2d727 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16095,6 +16095,81 @@ Arguments: """""""""" The argument to this intrinsic must be a vector of floating-point values. +'``llvm.experimental.vector.insert``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. You can use ``llvm.experimental.vector.insert`` +to insert a fixed-width vector into a scalable vector, but not the other way +around. + +:: + + declare @llvm.experimental.vector.insert.v4f32( %vec, <4 x float> %subvec, i64 %idx) + declare @llvm.experimental.vector.insert.v2f64( %vec, <2 x double> %subvec, i64 %idx) + +Overview: +""""""""" + +The '``llvm.experimental.vector.insert.*``' intrinsics insert a vector into another vector +starting from a given index. The return type matches the type of the vector we +insert into. Conceptually, this can be used to build a scalable vector out of +non-scalable vectors. + +Arguments: +"""""""""" + +The ``vec`` is the vector which ``subvec`` will be inserted into. +The ``subvec`` is the vector that will be inserted. + +``idx`` represents the starting element number at which ``subvec`` will be +inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum +vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by +the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at +``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` + +num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition +cannot be determined statically but is false at runtime, then the result vector +is undefined. + + +'``llvm.experimental.vector.extract``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. You can use +``llvm.experimental.vector.extract`` to extract a fixed-width vector from a +scalable vector, but not the other way around. + +:: + + declare <4 x float> @llvm.experimental.vector.extract.v4f32( %vec, i64 %idx) + declare <2 x double> @llvm.experimental.vector.extract.v2f64( %vec, i64 %idx) + +Overview: +""""""""" + +The '``llvm.experimental.vector.extract.*``' intrinsics extract a vector from +within another vector starting from a given index. The return type must be +explicitly specified. Conceptually, this can be used to decompose a scalable +vector into non-scalable parts. + +Arguments: +"""""""""" + +The ``vec`` is the vector from which we will extract a subvector. + +The ``idx`` specifies the starting element number within ``vec`` from which a +subvector is extracted. ``idx`` must be a constant multiple of the known-minimum +vector length of the result type. If the result type is a scalable vector, +``idx`` is first scaled by the result type's runtime scaling factor. Elements +``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector +indices. If this condition cannot be determined statically but is false at +runtime, then the result vector is undefined. The ``idx`` parameter must be a +vector index constant type (for most targets this will be an integer pointer +type). + Matrix Intrinsics ----------------- diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index d454c4ea8d9b70..d73155aa2f2fad 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1362,7 +1362,7 @@ class SelectionDAG { ISD::MemIndexedMode AM); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType); + ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 1e71d110730e0e..aa81a31bf23ad4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -512,6 +512,7 @@ BEGIN_TWO_BYTE_PACK() class LoadSDNodeBitfields { friend class LoadSDNode; friend class MaskedLoadSDNode; + friend class MaskedGatherSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -2451,12 +2452,18 @@ class MaskedGatherSDNode : public MaskedGatherScatterSDNode { MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) + ISD::MemIndexType IndexType, ISD::LoadExtType ETy) : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, - IndexType) {} + IndexType) { + LoadSDNodeBits.ExtTy = ETy; + } const SDValue &getPassThru() const { return getOperand(1); } + ISD::LoadExtType getExtensionType() const { + return ISD::LoadExtType(LoadSDNodeBits.ExtTy); + } + static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MGATHER; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index db215094a7e498..c2b3446d159f2f 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -879,6 +879,10 @@ class IRBuilderBase { Type *ResultType, const Twine &Name = ""); + /// Create a call to llvm.vscale, multiplied by \p Scaling. The type of VScale + /// will be the same type as that of \p Scaling. + Value *CreateVScale(Constant *Scaling, const Twine &Name = ""); + /// Create a call to intrinsic \p ID with 1 operand which is mangled on its /// type. CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 710479103459b5..eb6c408b4f85b0 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1614,6 +1614,15 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty], //===---------- Intrinsics to query properties of scalable vectors --------===// def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +//===---------- Intrinsics to perform subvector insertion/extraction ------===// +def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + +def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5054ed81fcfc23..4ab0c60399f2ae 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -787,6 +787,16 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, (void)InsertedReg; } + // Do not emit CSInfo for undef forwarding registers. + for (auto &MO : CallMI->uses()) { + if (!MO.isReg() || !MO.isUndef()) + continue; + auto It = ForwardedRegWorklist.find(MO.getReg()); + if (It == ForwardedRegWorklist.end()) + continue; + ForwardedRegWorklist.erase(It); + } + // We erase, from the ForwardedRegWorklist, those forwarding registers for // which we successfully describe a loaded value (by using // the describeLoadedValue()). For those remaining arguments in the working diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8f0c9542b3e7a2..212e0a2ea9884b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -932,6 +932,33 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } +static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { + if (!ScalarTy.isSimple()) + return false; + + uint64_t MaskForTy = 0ULL; + switch (ScalarTy.getSimpleVT().SimpleTy) { + case MVT::i8: + MaskForTy = 0xFFULL; + break; + case MVT::i16: + MaskForTy = 0xFFFFULL; + break; + case MVT::i32: + MaskForTy = 0xFFFFFFFFULL; + break; + default: + return false; + break; + } + + APInt Val; + if (ISD::isConstantSplatVector(N, Val)) + return Val.getLimitedValue() == MaskForTy; + + return false; +} + // Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -5622,6 +5649,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (masked_gather x)) -> (zext_masked_gather x) + if (auto *GN0 = dyn_cast(N0)) { + EVT MemVT = GN0->getMemoryVT(); + EVT ScalarVT = MemVT.getScalarType(); + + if (SDValue(GN0, 0).hasOneUse() && + isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) && + TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), + GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; + + SDValue ZExtLoad = DAG.getMaskedGather( + DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops, + GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD); + + CombineTo(N, ZExtLoad); + AddToWorklist(ZExtLoad.getNode()); + // Avoid recheck of N. + return SDValue(N, 0); + } + } + // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) @@ -9499,14 +9548,16 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), PassThru.getValueType(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType()); + MGT->getMemOperand(), MGT->getIndexType(), + MGT->getExtensionType()); } if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), PassThru.getValueType(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType()); + MGT->getMemOperand(), MGT->getIndexType(), + MGT->getExtensionType()); } return SDValue(); @@ -11595,6 +11646,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } } + // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) + if (auto *GN0 = dyn_cast(N0)) { + if (SDValue(GN0, 0).hasOneUse() && + ExtVT == GN0->getMemoryVT() && + TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), + GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; + + SDValue ExtLoad = DAG.getMaskedGather( + DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops, + GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD); + + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorklist(ExtLoad.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8468f51a922cf7..5c8a562ed9d7d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -679,12 +679,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { assert(NVT == ExtPassThru.getValueType() && "Gather result type and the passThru argument type should be the same"); + ISD::LoadExtType ExtType = N->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand(), N->getIndexType()); + N->getMemOperand(), N->getIndexType(), + ExtType); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1525543a60b67e..86a1f6bff9f7f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1748,6 +1748,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Scale = MGT->getScale(); EVT MemoryVT = MGT->getMemoryVT(); Align Alignment = MGT->getOriginalAlign(); + ISD::LoadExtType ExtType = MGT->getExtensionType(); // Split Mask operand SDValue MaskLo, MaskHi; @@ -1783,11 +1784,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo, - MMO, MGT->getIndexType()); + MMO, MGT->getIndexType(), ExtType); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi, - MMO, MGT->getIndexType()); + MMO, MGT->getIndexType(), ExtType); // Build a factor node to remember that this load is independent of the // other one. @@ -2392,6 +2393,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Mask = MGT->getMask(); SDValue PassThru = MGT->getPassThru(); Align Alignment = MGT->getOriginalAlign(); + ISD::LoadExtType ExtType = MGT->getExtensionType(); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) @@ -2423,11 +2425,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, - OpsLo, MMO, MGT->getIndexType()); + OpsLo, MMO, MGT->getIndexType(), ExtType); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, - OpsHi, MMO, MGT->getIndexType()); + OpsHi, MMO, MGT->getIndexType(), ExtType); // Build a factor node to remember that this load is independent of the // other one. @@ -3928,7 +3930,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand(), N->getIndexType()); + N->getMemOperand(), N->getIndexType(), + N->getExtensionType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4722,7 +4725,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand(), MG->getIndexType()); + MG->getMemOperand(), MG->getIndexType(), + MG->getExtensionType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index dd837d4d495fcb..4661b0d9189b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7295,14 +7295,15 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) { + ISD::MemIndexType IndexType, + ISD::LoadExtType ExtTy) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, VT, MMO, IndexType)); + dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7312,7 +7313,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]); auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO, IndexType); + VTs, VT, MMO, IndexType, ExtTy); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fbbe54a1d34ada..170f3f2c4e1395 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4421,7 +4421,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, IndexType, ISD::NON_EXTLOAD); PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); @@ -6932,6 +6932,27 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SetCC)); return; } + case Intrinsic::experimental_vector_insert: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue SubVec = getValue(I.getOperand(1)); + SDValue Index = getValue(I.getOperand(2)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + Index)); + return; + } + case Intrinsic::experimental_vector_extract: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue Index = getValue(I.getOperand(1)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + return; + } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 82f102f27a089f..82b4de3d5449b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -743,6 +743,25 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isCompressingStore()) OS << ", compressing"; + OS << ">"; + } else if (const auto *MGather = dyn_cast(this)) { + OS << "<"; + printMemOperand(OS, *MGather->getMemOperand(), G); + + bool doExt = true; + switch (MGather->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << MGather->getMemoryVT().getEVTString(); + + auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned"; + auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled"; + OS << ", " << Signed << " " << Scaled << " offset"; + OS << ">"; } else if (const auto *MScatter = dyn_cast(this)) { OS << "<"; diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index c0e4451f52003c..f936f5756b6f0b 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -80,6 +80,17 @@ static CallInst *createCallHelper(Function *Callee, ArrayRef Ops, return CI; } +Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) { + Module *M = GetInsertBlock()->getParent()->getParent(); + assert(isa(Scaling) && "Expected constant integer"); + Function *TheFn = + Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()}); + CallInst *CI = createCallHelper(TheFn, {}, this, Name); + return cast(Scaling)->getSExtValue() == 1 + ? CI + : CreateMul(CI, Scaling); +} + CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, MaybeAlign Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index bc24d488d2f785..e3a2828be41aa8 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5138,6 +5138,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { break; } + case Intrinsic::experimental_vector_insert: { + VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); + VectorType *SubVecTy = cast(Call.getArgOperand(1)->getType()); + + Assert(VecTy->getElementType() == SubVecTy->getElementType(), + "experimental_vector_insert parameters must have the same element " + "type.", + &Call); + break; + } + case Intrinsic::experimental_vector_extract: { + VectorType *ResultTy = cast(Call.getType()); + VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); + + Assert(ResultTy->getElementType() == VecTy->getElementType(), + "experimental_vector_extract result must have the same element " + "type as the input vector.", + &Call); + break; + } }; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 20f5ded9935085..5d9c66e170eab7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3836,6 +3836,26 @@ unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { return AddrModes.find(Key)->second; } +unsigned getSignExtendedGatherOpcode(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("unimplemented opcode"); + return Opcode; + case AArch64ISD::GLD1_MERGE_ZERO: + return AArch64ISD::GLD1S_MERGE_ZERO; + case AArch64ISD::GLD1_UXTW_MERGE_ZERO: + return AArch64ISD::GLD1S_UXTW_MERGE_ZERO; + case AArch64ISD::GLD1_SXTW_MERGE_ZERO: + return AArch64ISD::GLD1S_SXTW_MERGE_ZERO; + case AArch64ISD::GLD1_SCALED_MERGE_ZERO: + return AArch64ISD::GLD1S_SCALED_MERGE_ZERO; + case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: + return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; + case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: + return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; + } +} + bool getGatherScatterIndexIsExtended(SDValue Index) { unsigned Opcode = Index.getOpcode(); if (Opcode == ISD::SIGN_EXTEND_INREG) @@ -3865,6 +3885,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, SDValue PassThru = MGT->getPassThru(); SDValue Mask = MGT->getMask(); SDValue BasePtr = MGT->getBasePtr(); + ISD::LoadExtType ExtTy = MGT->getExtensionType(); ISD::MemIndexType IndexType = MGT->getIndexType(); bool IsScaled = @@ -3874,6 +3895,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, bool IdxNeedsExtend = getGatherScatterIndexIsExtended(Index) || Index.getSimpleValueType().getVectorElementType() == MVT::i32; + bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; EVT VT = PassThru.getSimpleValueType(); EVT MemVT = MGT->getMemoryVT(); @@ -3900,9 +3922,12 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (getGatherScatterIndexIsExtended(Index)) Index = Index.getOperand(0); + unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); + if (ResNeedsSignExtend) + Opcode = getSignExtendedGatherOpcode(Opcode); + SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru}; - return DAG.getNode(getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend), DL, - VTs, Ops); + return DAG.getNode(Opcode, DL, VTs, Ops); } SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e437b9291148d0..135b13b34f3329 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47438,7 +47438,8 @@ static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, return DAG.getMaskedGather(Gather->getVTList(), Gather->getMemoryVT(), DL, Ops, Gather->getMemOperand(), - Gather->getIndexType()); + Gather->getIndexType(), + Gather->getExtensionType()); } auto *Scatter = cast(GorS); SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3b0b12d4d7424f..22e4fd7d995589 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1652,6 +1652,102 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } + case Intrinsic::experimental_vector_insert: { + Value *Vec = II->getArgOperand(0); + Value *SubVec = II->getArgOperand(1); + Value *Idx = II->getArgOperand(2); + auto *DstTy = dyn_cast(II->getType()); + auto *VecTy = dyn_cast(Vec->getType()); + auto *SubVecTy = dyn_cast(SubVec->getType()); + + // Only canonicalize if the destination vector, Vec, and SubVec are all + // fixed vectors. + if (DstTy && VecTy && SubVecTy) { + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned SubVecNumElts = SubVecTy->getNumElements(); + unsigned IdxN = cast(Idx)->getZExtValue(); + + // The result of this call is undefined if IdxN is not a constant multiple + // of the SubVec's minimum vector length OR the insertion overruns Vec. + if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) { + replaceInstUsesWith(CI, UndefValue::get(CI.getType())); + return eraseInstFromFunction(CI); + } + + // An insert that entirely overwrites Vec with SubVec is a nop. + if (VecNumElts == SubVecNumElts) { + replaceInstUsesWith(CI, SubVec); + return eraseInstFromFunction(CI); + } + + // Widen SubVec into a vector of the same width as Vec, since + // shufflevector requires the two input vectors to be the same width. + // Elements beyond the bounds of SubVec within the widened vector are + // undefined. + SmallVector WidenMask; + unsigned i; + for (i = 0; i != SubVecNumElts; ++i) + WidenMask.push_back(i); + for (; i != VecNumElts; ++i) + WidenMask.push_back(UndefMaskElem); + + Value *WidenShuffle = Builder.CreateShuffleVector( + SubVec, llvm::UndefValue::get(SubVecTy), WidenMask); + + SmallVector Mask; + for (unsigned i = 0; i != IdxN; ++i) + Mask.push_back(i); + for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) + Mask.push_back(i); + for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) + Mask.push_back(i); + + Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceInstUsesWith(CI, Shuffle); + return eraseInstFromFunction(CI); + } + break; + } + case Intrinsic::experimental_vector_extract: { + Value *Vec = II->getArgOperand(0); + Value *Idx = II->getArgOperand(1); + + auto *DstTy = dyn_cast(II->getType()); + auto *VecTy = dyn_cast(Vec->getType()); + + // Only canonicalize if the the destination vector and Vec are fixed + // vectors. + if (DstTy && VecTy) { + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned IdxN = cast(Idx)->getZExtValue(); + + // The result of this call is undefined if IdxN is not a constant multiple + // of the result type's minimum vector length OR the extraction overruns + // Vec. + if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) { + replaceInstUsesWith(CI, UndefValue::get(CI.getType())); + return eraseInstFromFunction(CI); + } + + // Extracting the entirety of Vec is a nop. + if (VecNumElts == DstNumElts) { + replaceInstUsesWith(CI, Vec); + return eraseInstFromFunction(CI); + } + + SmallVector Mask; + for (unsigned i = 0; i != DstNumElts; ++i) + Mask.push_back(IdxN + i); + + Value *Shuffle = + Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask); + replaceInstUsesWith(CI, Shuffle); + return eraseInstFromFunction(CI); + } + break; + } default: { // Handle target specific intrinsics Optional V = targetInstCombineIntrinsic(*II); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ba14e942ff804..a91fb988badf61 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -367,7 +367,6 @@ static Type *getMemInstValueType(Value *I) { /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. if (VF.isVector()) { @@ -1122,6 +1121,15 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, return R; } +/// Return a value for Step multiplied by VF. +static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) { + assert(isa(Step) && "Expected an integer step"); + Constant *StepVal = ConstantInt::get( + Step->getType(), + cast(Step)->getSExtValue() * VF.getKnownMinValue()); + return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal; +} + namespace llvm { void reportVectorizationFailure(const StringRef DebugMsg, @@ -1387,9 +1395,7 @@ class LoopVectorizationCostModel { /// width \p VF. Return CM_Unknown if this instruction did not pass /// through the cost modeling. InstWidening getWideningDecision(Instruction *I, ElementCount VF) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); - assert(VF.isVector() && "Expected VF >=2"); - + assert(VF.isVector() && "Expected VF to be a vector VF"); // Cost model is not run in the VPlan-native path - return conservative // result until this changes. if (EnableVPlanNativePath) @@ -2280,8 +2286,6 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, const InductionDescriptor &ID) { // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF.isVector() && "VF should be greater than one"); - assert(!VF.isScalable() && - "the code below assumes a fixed number of elements at compile time"); // Get the value type and ensure it and the step have the same integer type. Type *ScalarIVTy = ScalarIV->getType()->getScalarType(); assert(ScalarIVTy == Step->getType() && @@ -2306,11 +2310,24 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 : VF.getKnownMinValue(); + assert((!VF.isScalable() || Lanes == 1) && + "Should never scalarize a scalable vector"); // Compute the scalar steps and save the results in VectorLoopValueMap. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - auto *StartIdx = getSignedIntOrFpConstant( - ScalarIVTy, VF.getKnownMinValue() * Part + Lane); + auto *IntStepTy = IntegerType::get(ScalarIVTy->getContext(), + ScalarIVTy->getScalarSizeInBits()); + Value *StartIdx = + createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF); + if (ScalarIVTy->isFloatingPointTy()) + StartIdx = Builder.CreateSIToFP(StartIdx, ScalarIVTy); + StartIdx = addFastMathFlag(Builder.CreateBinOp( + AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane))); + // The step returned by `createStepForVF` is a runtime-evaluated value + // when VF is scalable. Otherwise, it should be folded into a Constant. + assert((VF.isScalable() || isa(StartIdx)) && + "Expected StartIdx to be folded to a constant when VF is not " + "scalable"); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); @@ -2353,10 +2370,11 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // is known to be uniform after vectorization, this corresponds to lane zero // of the Part unroll iteration. Otherwise, the last instruction is the one // we created for the last vector lane of the Part unroll iteration. - assert(!VF.isScalable() && "scalable vectors not yet supported."); unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF.getKnownMinValue() - 1; + assert((!VF.isScalable() || LastLane == 0) && + "Scalable vectorization can't lead to any scalarized values."); auto *LastInst = cast( VectorLoopValueMap.getScalarValue(V, {Part, LastLane})); @@ -2698,7 +2716,6 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( Type *ScalarDataTy = getMemInstValueType(Instr); - assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *DataTy = VectorType::get(ScalarDataTy, VF); const Align Alignment = getLoadStoreAlignment(Instr); @@ -2731,6 +2748,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( InBounds = gep->isInBounds(); if (Reverse) { + assert(!VF.isScalable() && + "Reversing vectors is not yet supported for scalable vectors."); + // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = cast(Builder.CreateGEP( @@ -2742,8 +2762,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( if (isMaskRequired) // Reverse of a null all-one mask is a null mask. BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); } else { - PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, Ptr, Builder.getInt32(Part * VF.getKnownMinValue()))); + Value *Increment = createStepForVF(Builder, Builder.getInt32(Part), VF); + PartPtr = cast( + Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); PartPtr->setIsInBounds(InBounds); } @@ -2948,8 +2969,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { Type *Ty = TC->getType(); // This is where we can make the step a runtime constant. - assert(!VF.isScalable() && "scalable vectorization is not supported yet"); - Constant *Step = ConstantInt::get(Ty, VF.getKnownMinValue() * UF); + Value *Step = createStepForVF(Builder, ConstantInt::get(Ty, UF), VF); // If the tail is to be folded by masking, round the number of iterations N // up to a multiple of Step instead of rounding down. This is done by first @@ -2960,6 +2980,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { if (Cost->foldTailByMasking()) { assert(isPowerOf2_32(VF.getKnownMinValue() * UF) && "VF*UF must be a power of 2 when folding tail by masking"); + assert(!VF.isScalable() && + "Tail folding not yet supported for scalable vectors"); TC = Builder.CreateAdd( TC, ConstantInt::get(Ty, VF.getKnownMinValue() * UF - 1), "n.rnd.up"); } @@ -3038,11 +3060,9 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, // If tail is to be folded, vector loop takes care of all iterations. Value *CheckMinIters = Builder.getFalse(); if (!Cost->foldTailByMasking()) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); - CheckMinIters = Builder.CreateICmp( - P, Count, - ConstantInt::get(Count->getType(), VF.getKnownMinValue() * UF), - "min.iters.check"); + Value *Step = + createStepForVF(Builder, ConstantInt::get(Count->getType(), UF), VF); + CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); } // Create new preheader for vector loop. LoopVectorPreHeader = @@ -3521,8 +3541,8 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *StartIdx = ConstantInt::get(IdxTy, 0); // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). - assert(!VF.isScalable() && "scalable vectors not yet supported."); - Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF); + Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt()); + Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Induction = createInductionVariable(Lp, StartIdx, CountRoundDown, Step, @@ -3902,8 +3922,10 @@ void InnerLoopVectorizer::fixVectorizedLoop() { // profile is not inherently precise anyway. Note also possible bypass of // vector code caused by legality checks is ignored, assigning all the weight // to the vector loop, optimistically. - assert(!VF.isScalable() && - "cannot use scalable ElementCount to determine unroll factor"); + // + // For scalable vectorization we can't know at compile time how many iterations + // of the loop are handled in one vector iteration, so instead assume a pessimistic + // vscale of '1'. setProfileInfoAfterUnrolling( LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); @@ -4366,7 +4388,6 @@ void InnerLoopVectorizer::clearReductionWrapFlags( } void InnerLoopVectorizer::fixLCSSAPHIs() { - assert(!VF.isScalable() && "the code below assumes fixed width vectors"); for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { if (LCSSAPhi.getNumIncomingValues() == 1) { auto *IncomingValue = LCSSAPhi.getIncomingValue(0); @@ -4377,6 +4398,8 @@ void InnerLoopVectorizer::fixLCSSAPHIs() { cast(IncomingValue), VF) ? 0 : VF.getKnownMinValue() - 1; + assert((!VF.isScalable() || LastLane == 0) && + "scalable vectors dont support non-uniform scalars yet"); // Can be a loop invariant incoming value or the last scalar value to be // extracted from the vectorized loop. Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); @@ -4709,7 +4732,6 @@ static bool mayDivideByZero(Instruction &I) { void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, VPUser &User, VPTransformState &State) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); switch (I.getOpcode()) { case Instruction::Call: case Instruction::Br: @@ -4797,7 +4819,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, setDebugLocFromInst(Builder, CI); /// Vectorize casts. - assert(!VF.isScalable() && "VF is assumed to be non scalable."); Type *DestTy = (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF); @@ -5099,7 +5120,6 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, ElementCount VF) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); if (!blockNeedsPredication(I->getParent())) return false; switch(I->getOpcode()) { @@ -5532,7 +5552,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, ElementCount UserVF) { - assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); @@ -5545,6 +5564,11 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits(); if (UserVF.isNonZero()) { + // For now, don't verify legality of scalable vectors. + // This will be addressed properly in https://reviews.llvm.org/D91718. + if (UserVF.isScalable()) + return UserVF; + // If legally unsafe, clamp the user vectorization factor to a safe value. unsigned MaxSafeVF = PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType); if (UserVF.getFixedValue() <= MaxSafeVF) @@ -5633,6 +5657,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount, VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { + // FIXME: This can be fixed for scalable vectors later, because at this stage + // the LoopVectorizer will only consider vectorizing a loop with scalable + // vectors when the loop has a hint to enable vectorization for a given VF. assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); float Cost = expectedCost(ElementCount::getFixed(1)).first; @@ -5942,7 +5969,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, } // Clamp the interleave ranges to reasonable counts. - assert(!VF.isScalable() && "scalable vectors not yet supported."); unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF.getKnownMinValue()); @@ -5958,6 +5984,13 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, // If trip count is known or estimated compile time constant, limit the // interleave count to be less than the trip count divided by VF, provided it // is at least 1. + // + // For scalable vectors we can't know if interleaving is beneficial. It may + // not be beneficial for small loops if none of the lanes in the second vector + // iterations is enabled. However, for larger loops, there is likely to be a + // similar benefit as for fixed-width vectors. For now, we choose to leave + // the InterleaveCount as if vscale is '1', although if some information about + // the vector is known (e.g. min vector size), we can make a better decision. if (BestKnownTC) { MaxInterleaveCount = std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); @@ -6001,7 +6034,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, // potentially expose ILP opportunities. LLVM_DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n' << "LV: IC is " << IC << '\n' - << "LV: VF is " << VF.getKnownMinValue() << '\n'); + << "LV: VF is " << VF << '\n'); const bool AggressivelyInterleaveReductions = TTI.enableAggressiveInterleaving(HasReductions); if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { @@ -6420,7 +6453,6 @@ int LoopVectorizationCostModel::computePredInstDiscount( LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::expectedCost(ElementCount VF) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); VectorizationCostTy Cost; // For each block. @@ -6669,8 +6701,6 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF) { - assert(!VF.isScalable() && - "the cost model is not yet implemented for scalable vectorization"); // If we know that this instruction will remain uniform, check the cost of // the scalar version. if (isUniformAfterVectorization(I, VF)) @@ -6734,7 +6764,6 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, } void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); if (VF.isScalar()) return; NumPredStores = 0; @@ -7321,7 +7350,6 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { - assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); assert(OrigLoop->isInnermost() && "Inner loop expected."); Optional MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. @@ -7344,9 +7372,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { ElementCount MaxVF = MaybeMaxVF.getValue(); assert(MaxVF.isNonZero() && "MaxVF is zero."); - if (!UserVF.isZero() && UserVF.getFixedValue() <= MaxVF.getFixedValue()) { + if (!UserVF.isZero() && ElementCount::isKnownLE(UserVF, MaxVF)) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); - assert(isPowerOf2_32(UserVF.getFixedValue()) && + assert(isPowerOf2_32(UserVF.getKnownMinValue()) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. @@ -7357,6 +7385,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return {{UserVF, 0}}; } + assert(!MaxVF.isScalable() && + "Scalable vectors not yet supported beyond this point"); + for (ElementCount VF = ElementCount::getFixed(1); ElementCount::isKnownLE(VF, MaxVF); VF *= 2) { // Collect Uniform and Scalar instructions after vectorization with VF. @@ -7935,7 +7966,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, "Must be called with either a load or store"); auto willWiden = [&](ElementCount VF) -> bool { - assert(!VF.isScalable() && "unexpected scalable ElementCount"); if (VF.isScalar()) return false; LoopVectorizationCostModel::InstWidening Decision = @@ -8701,6 +8731,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. + assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. @@ -8723,6 +8754,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) { // instruction is uniform inwhich case generate only the first lane for each // of the UF parts. unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); + assert((!State.VF.isScalable() || IsUniform) && + "Can't scalarize a scalable vector"); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane}, @@ -8876,12 +8909,6 @@ static bool processLoopInVPlanNativePath( // Get user vectorization factor. ElementCount UserVF = Hints.getWidth(); - if (UserVF.isScalable()) { - // TODO: Use scalable UserVF once we've added initial support for scalable - // vectorization. For now we convert it to fixed width, but this will be - // removed in a later patch. - UserVF = ElementCount::getFixed(UserVF.getKnownMinValue()); - } // Plan how to best vectorize, return the best VF and its cost. const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF); @@ -9047,13 +9074,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); - if (UserVF.isScalable()) { - // TODO: Use scalable UserVF once we've added initial support for scalable - // vectorization. For now we convert it to fixed width, but this will be - // removed in a later patch. - UserVF = ElementCount::getFixed(UserVF.getKnownMinValue()); - } - unsigned UserIC = Hints.getInterleave(); // Plan how to best vectorize, return the best VF and its cost. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1dd81fa30af34e..d24f19e6bffa94 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -163,7 +163,6 @@ struct VectorizerValueMap { assert(Instance.Part < UF && "Queried Scalar Part is too large."); assert(Instance.Lane < VF.getKnownMinValue() && "Queried Scalar Lane is too large."); - assert(!VF.isScalable() && "VF is assumed to be non scalable."); if (!hasAnyScalarValue(Key)) return false; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll new file mode 100644 index 00000000000000..c9a2223946e65f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK +; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Should codegen to a nop, since idx is zero. +define <2 x i64> @extract_v2i64_nxv2i64( %vec) nounwind { +; CHECK-LABEL: extract_v2i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 0) + ret <2 x i64> %retval +} + +; Goes through memory currently; idx != 0. +define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v2i64_nxv2i64_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 1) +ret <2 x i64> %retval +} + +; Should codegen to a nop, since idx is zero. +define <4 x i32> @extract_v4i32_nxv4i32( %vec) nounwind { +; CHECK-LABEL: extract_v4i32_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret +%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 0) +ret <4 x i32> %retval +} + +; Goes through memory currently; idx != 0. +define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v4i32_nxv4i32_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 1) + ret <4 x i32> %retval +} + +; Should codegen to a nop, since idx is zero. +define <8 x i16> @extract_v8i16_nxv8i16( %vec) nounwind { +; CHECK-LABEL: extract_v8i16_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v8i16_nxv8i16_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16( %vec, i64 1) + ret <8 x i16> %retval +} + +; Should codegen to a nop, since idx is zero. +define <16 x i8> @extract_v16i8_nxv16i8( %vec) nounwind { +; CHECK-LABEL: extract_v16i8_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v16i8_nxv16i8_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8( %vec, i64 1) + ret <16 x i8> %retval +} + +declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(, i64) +declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll new file mode 100644 index 00000000000000..c1de878a32ac9b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK +; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define @insert_v2i64_nxv2i64( %vec, <2 x i64> %subvec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 0) + ret %retval +} + +define @insert_v2i64_nxv2i64_idx1( %vec, <2 x i64> %subvec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 1) + ret %retval +} + +define @insert_v4i32_nxv4i32( %vec, <4 x i32> %subvec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 0) + ret %retval +} + +define @insert_v4i32_nxv4i32_idx1( %vec, <4 x i32> %subvec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 1) + ret %retval +} + +define @insert_v8i16_nxv8i16( %vec, <8 x i16> %subvec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 0) + ret %retval +} + +define @insert_v8i16_nxv8i16_idx1( %vec, <8 x i16> %subvec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv8i16.v8i16( %vec, <8 x i16> %subvec, i64 1) + ret %retval +} + +define @insert_v16i8_nxv16i8( %vec, <16 x i8> %subvec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: csel x8, x8, xzr, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 0) + ret %retval +} + +define @insert_v16i8_nxv16i8_idx1( %vec, <16 x i8> %subvec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: csinc x8, x8, xzr, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv16i8.v16i8( %vec, <16 x i8> %subvec, i64 1) + ret %retval +} + +declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) +declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) +declare @llvm.experimental.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-scaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-scaled.ll index 32dca0d26cdc07..e6b89b0070d6de 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-scaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-scaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets @@ -9,7 +10,6 @@ define @masked_gather_nxv2i16(i16* %base, ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) @@ -21,7 +21,6 @@ define @masked_gather_nxv2i32(i32* %base, ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %ptrs = getelementptr i32, i32* %base, %offsets %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) @@ -72,9 +71,7 @@ define @masked_gather_nxv2f64(double* %base, @masked_sgather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) @@ -85,9 +82,7 @@ define @masked_sgather_nxv2i16(i16* %base, define @masked_sgather_nxv2i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] ; CHECK-NEXT: ret %ptrs = getelementptr i32, i32* %base, %offsets %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) @@ -103,7 +98,6 @@ define @masked_gather_nxv4i16(i16* %base, ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] -; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) @@ -144,9 +138,7 @@ define @masked_gather_nxv4f32(float* %base, @masked_sgather_nxv4i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1] ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-unscaled.ll index 1fc048a3adf7e1..2d4ce50e8464fc 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-unscaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-signed-unscaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets @@ -9,7 +10,6 @@ define @masked_gather_nxv2i8(i8* %base, %o ; CHECK-LABEL: masked_gather_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) @@ -21,7 +21,6 @@ define @masked_gather_nxv2i16(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -34,7 +33,6 @@ define @masked_gather_nxv2i32(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -90,9 +88,7 @@ define @masked_gather_nxv2f64(i8* %base, @masked_sgather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) @@ -103,9 +99,7 @@ define @masked_sgather_nxv2i8(i8* %base, % define @masked_sgather_nxv2i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -117,9 +111,7 @@ define @masked_sgather_nxv2i16(i8* %base, define @masked_sgather_nxv2i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -136,7 +128,6 @@ define @masked_gather_nxv4i8(i8* %base, %o ; CHECK-LABEL: masked_gather_nxv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] -; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv4i8( %ptrs, i32 1, %mask, undef) @@ -148,7 +139,6 @@ define @masked_gather_nxv4i16(i8* %base, % ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] -; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -193,9 +183,7 @@ define @masked_gather_nxv4f32(i8* %base, define @masked_sgather_nxv4i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv4i8( %ptrs, i32 1, %mask, undef) @@ -206,9 +194,7 @@ define @masked_sgather_nxv4i8(i8* %base, % define @masked_sgather_nxv4i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-scaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-scaled.ll index ada49b7fecbc66..41f1eb4e94d4c4 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-scaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-scaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets @@ -9,7 +10,6 @@ define @masked_gather_nxv2i16(i16* %base, ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext @@ -22,7 +22,6 @@ define @masked_gather_nxv2i32(i32* %base, ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i32, i32* %base, %offsets.zext @@ -78,9 +77,7 @@ define @masked_gather_nxv2f64(double* %base, @masked_sgather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext @@ -92,9 +89,7 @@ define @masked_sgather_nxv2i16(i16* %base, define @masked_sgather_nxv2i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i32, i32* %base, %offsets.zext @@ -111,7 +106,6 @@ define @masked_gather_nxv4i16(i16* %base, ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] -; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext @@ -156,9 +150,7 @@ define @masked_gather_nxv4f32(float* %base, @masked_sgather_nxv4i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i16, i16* %base, %offsets.zext diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-unscaled.ll index 61b8e3e53e23c5..51ab73c14ac9e0 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-unscaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-32b-unsigned-unscaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets @@ -9,7 +10,6 @@ define @masked_gather_nxv2i8(i8* %base, %o ; CHECK-LABEL: masked_gather_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -22,7 +22,6 @@ define @masked_gather_nxv2i16(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -36,7 +35,6 @@ define @masked_gather_nxv2i32(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -97,9 +95,7 @@ define @masked_gather_nxv2f64(i8* %base, @masked_sgather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -111,9 +107,7 @@ define @masked_sgather_nxv2i8(i8* %base, % define @masked_sgather_nxv2i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -126,9 +120,7 @@ define @masked_sgather_nxv2i16(i8* %base, define @masked_sgather_nxv2i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -146,7 +138,6 @@ define @masked_gather_nxv4i8(i8* %base, %o ; CHECK-LABEL: masked_gather_nxv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] -; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -159,7 +150,6 @@ define @masked_gather_nxv4i16(i8* %base, % ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] -; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -208,9 +198,7 @@ define @masked_gather_nxv4f32(i8* %base, define @masked_sgather_nxv4i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext @@ -222,9 +210,7 @@ define @masked_sgather_nxv4i8(i8* %base, % define @masked_sgather_nxv4i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-scaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-scaled.ll index 197ed69ee52f55..15dfcc61316e10 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-scaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-scaled.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s define @masked_gather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) @@ -17,7 +17,6 @@ define @masked_gather_nxv2i32(i32* %base, ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %ptrs = getelementptr i32, i32* %base, %offsets %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) @@ -68,9 +67,7 @@ define @masked_gather_nxv2f64(double* %base, @masked_sgather_nxv2i16(i16* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1] ; CHECK-NEXT: ret %ptrs = getelementptr i16, i16* %base, %offsets %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) @@ -81,9 +78,7 @@ define @masked_sgather_nxv2i16(i16* %base, define @masked_sgather_nxv2i32(i32* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2] ; CHECK-NEXT: ret %ptrs = getelementptr i32, i32* %base, %offsets %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll index 3f4f54c5d8393f..3320b88691eefd 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-64b-unscaled.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s define @masked_gather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) @@ -17,7 +17,6 @@ define @masked_gather_nxv2i16(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: and z0.d, z0.d, #0xffff ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -30,7 +29,6 @@ define @masked_gather_nxv2i32(i8* %base, % ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -86,9 +84,7 @@ define @masked_gather_nxv2f64(i8* %base, @masked_sgather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] ; CHECK-NEXT: ret %ptrs = getelementptr i8, i8* %base, %offsets %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) @@ -99,9 +95,7 @@ define @masked_sgather_nxv2i8(i8* %base, % define @masked_sgather_nxv2i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -113,9 +107,7 @@ define @masked_sgather_nxv2i16(i8* %base, define @masked_sgather_nxv2i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll index 962ba079ca9ef1..076edc1fd86da4 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll @@ -1,5 +1,46 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s + +; Test for multiple uses of the mgather where the s/zext should not be combined + +define @masked_sgather_sext(i8* %base, %offsets, %mask, %vals) { +; CHECK-LABEL: masked_sgather_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z2.d, p0/m, z0.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ret + %ptrs = getelementptr i8, i8* %base, %offsets + %data = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %data.sext = sext %data to + %add = add %data, %vals + %add.sext = sext %add to + %mul = mul %data.sext, %add.sext + ret %mul +} + +define @masked_sgather_zext(i8* %base, %offsets, %mask, %vals) { +; CHECK-LABEL: masked_sgather_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add z1.d, z0.d, z1.d +; CHECK-NEXT: and z0.d, z0.d, #0xff +; CHECK-NEXT: and z1.d, z1.d, #0xff +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %ptrs = getelementptr i8, i8* %base, %offsets + %data = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %data.zext = zext %data to + %add = add %data, %vals + %add.zext = zext %add to + %mul = mul %data.zext, %add.zext + ret %mul +} ; Tests that exercise various type legalisation scenarios for ISD::MGATHER. @@ -7,7 +48,7 @@ define @masked_gather_nxv2i32( %ptrs, %mask) { ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK-DAG: mov x8, xzr -; CHECK-DAG: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d] ; CHECK: ret %data = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) ret %data @@ -41,8 +82,8 @@ define @masked_sgather_nxv4i8( %ptrs, @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 %idx) +declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 %idx) +declare <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 %idx) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 %idx) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 %idx) +declare <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 %idx) + +; ============================================================================ ; +; Trivial cases +; ============================================================================ ; + +; Extracting the entirety of a vector is a nop. +define <8 x i32> @trivial_nop(<8 x i32> %vec) { +; CHECK-LABEL: @trivial_nop( +; CHECK-NEXT: ret <8 x i32> [[VEC:%.*]] +; + %1 = call <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 0) + ret <8 x i32> %1 +} + +; ============================================================================ ; +; Valid canonicalizations +; ============================================================================ ; + +define <2 x i32> @valid_extraction_a(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_a( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 0) + ret <2 x i32> %1 +} + +define <2 x i32> @valid_extraction_b(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_b( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 2) + ret <2 x i32> %1 +} + +define <2 x i32> @valid_extraction_c(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_c( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 4) + ret <2 x i32> %1 +} + +define <2 x i32> @valid_extraction_d(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_d( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 6) + ret <2 x i32> %1 +} + +define <4 x i32> @valid_extraction_e(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_e( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 0) + ret <4 x i32> %1 +} + +define <4 x i32> @valid_extraction_f(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_f( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 4) + ret <4 x i32> %1 +} + +define <3 x i32> @valid_extraction_g(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_g( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[TMP1]] +; + %1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 0) + ret <3 x i32> %1 +} + +define <3 x i32> @valid_extraction_h(<8 x i32> %vec) { +; CHECK-LABEL: @valid_extraction_h( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[TMP1]] +; + %1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 3) + ret <3 x i32> %1 +} + +; ============================================================================ ; +; Invalid canonicalizations +; ============================================================================ ; + +; Idx must be the be a constant multiple of the destination vector's length, +; otherwise the result is undefined. +define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) { +; CHECK-LABEL: @idx_not_constant_multiple( +; CHECK-NEXT: ret <4 x i32> undef +; + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1) + ret <4 x i32> %1 +} + +; If the extraction overruns the vector, the result is undefined. +define <10 x i32> @extract_overrun(<8 x i32> %vec) { +; CHECK-LABEL: @extract_overrun( +; CHECK-NEXT: ret <10 x i32> undef +; + %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0) + ret <10 x i32> %1 +} + +; ============================================================================ ; +; Scalable cases +; ============================================================================ ; + +; Scalable extractions should not be canonicalized. This will be lowered to the +; EXTRACT_SUBVECTOR ISD node later. +define <4 x i32> @scalable_extract( %vec) { +; CHECK-LABEL: @scalable_extract( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( [[VEC:%.*]], i64 0) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 0) + ret <4 x i32> %1 +} diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll new file mode 100644 index 00000000000000..413d8155787f16 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; llvm.experimental.vector.insert canonicalizes to shufflevector in the fixed case. In the +; scalable case, we lower to the INSERT_SUBVECTOR ISD node. + +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx) +declare @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 %idx) + +; ============================================================================ ; +; Trivial cases +; ============================================================================ ; + +; An insert that entirely overwrites an with another is a +; nop. +define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) { +; CHECK-LABEL: @trivial_nop( +; CHECK-NEXT: ret <8 x i32> [[SUBVEC:%.*]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +; ============================================================================ ; +; Valid canonicalizations +; ============================================================================ ; + +define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_a( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_b( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_c( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_d( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_e( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_f( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_g( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_h( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3) + ret <8 x i32> %1 +} + +; ============================================================================ ; +; Invalid canonicalizations +; ============================================================================ ; + +; Idx must be the be a constant multiple of the subvector's minimum vector +; length, otherwise the result is undefined. +define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @idx_not_constant_multiple( +; CHECK-NEXT: ret <8 x i32> undef +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2) + ret <8 x i32> %1 +} + +; If the insertion overruns the vector, the result is undefined. +define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) { +; CHECK-LABEL: @insert_overrun( +; CHECK-NEXT: ret <8 x i32> undef +; + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4) + ret <8 x i32> %1 +} + +; ============================================================================ ; +; Scalable cases +; ============================================================================ ; + +; Scalable insertions should not be canonicalized. This will be lowered to the +; INSERT_SUBVECTOR ISD node later. +define @scalable_insert( %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @scalable_insert( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.v4i32( [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.experimental.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 0) + ret %1 +} diff --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll index db9c6c9d862ac0..e107de672c8be3 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -13,8 +13,7 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = trunc i64 %indvars.iv to i32 - store i32 %0, i32* %arrayidx, align 4 + store i32 42, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -25,7 +24,7 @@ for.end: ; preds = %for.body, %entry } ; CHECK-LABEL: @test2( -; CHECK: store <8 x i32> +; CHECK: store ; CHECK: ret void define void @test2(i32* nocapture %a, i32 %n) #0 { entry: @@ -35,8 +34,7 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = trunc i64 %indvars.iv to i32 - store i32 %0, i32* %arrayidx, align 4 + store i32 42, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -57,8 +55,7 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = trunc i64 %indvars.iv to i32 - store i32 %0, i32* %arrayidx, align 4 + store i32 42, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll new file mode 100644 index 00000000000000..692b6e5c4ce179 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -0,0 +1,101 @@ +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=1 < %s | FileCheck %s --check-prefix=CHECKUF1 +; RUN: opt -S -loop-vectorize -instcombine -force-vector-interleave=2 < %s | FileCheck %s --check-prefix=CHECKUF2 + +; CHECKUF1: for.body.preheader: +; CHECKUF1-DAG: %wide.trip.count = zext i32 %N to i64 +; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 +; CHECKUF1-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX4]], %wide.trip.count + +; CHECKUF1: vector.ph: +; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 +; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]] +; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf + +; CHECKUF1: vector.body: +; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index +; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to * +; CHECKUF1: %wide.load = load , * %[[IDXB_CAST]], align 8, !alias.scope !0 +; CHECKUF1: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) +; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index +; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to * +; CHECKUF1: store %[[FADD]], * %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 +; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 +; CHECKUF1: %index.next = add i64 %index, %[[VSCALEX4]] +; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec +; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !5 + + +; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2). +; There is also the increment for the next iteration, e.g. instead of indexing IDXB, it indexes at IDXB + vscale * 4. + +; CHECKUF2: for.body.preheader: +; CHECKUF2-DAG: %wide.trip.count = zext i32 %N to i64 +; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 +; CHECKUF2-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX8]], %wide.trip.count + +; CHECKUF2: vector.ph: +; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 +; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]] +; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf + +; CHECKUF2: vector.body: +; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index +; CHECKUF2: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to * +; CHECKUF2: %wide.load = load , * %[[IDXB_CAST]], align 8, !alias.scope !0 +; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() +; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 +; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 +; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]] +; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to * +; CHECKUF2: %wide.load{{[0-9]+}} = load , * %[[IDXB_NEXT_CAST]], align 8, !alias.scope !0 +; CHECKUF2: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) +; CHECKUF2: %[[FADD_NEXT:.*]] = fadd %wide.load{{[0-9]+}}, shufflevector ( insertelement ( undef, double 1.000000e+00, i32 0), undef, zeroinitializer) +; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index +; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to * +; CHECKUF2: store %[[FADD]], * %[[IDXA_CAST]], align 8, !alias.scope !3, !noalias !0 +; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() +; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2 +; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64 +; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXA]], i64 %[[VSCALE2_EXT]] +; CHECKUF2: %[[IDXA_NEXT_CAST:.*]] = bitcast double* %[[IDXA_NEXT]] to * +; CHECKUF2: store %[[FADD_NEXT]], * %[[IDXA_NEXT_CAST]], align 8, !alias.scope !3, !noalias !0 +; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 +; CHECKUF2: %index.next = add i64 %index, %[[VSCALEX8]] +; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec +; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !5 + +define void @loop(i32 %N, double* nocapture %a, double* nocapture readonly %b) { +entry: + %cmp7 = icmp sgt i32 %N, 0 + br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1 +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Verifier/extract-vector-mismatched-element-types.ll b/llvm/test/Verifier/extract-vector-mismatched-element-types.ll new file mode 100644 index 00000000000000..b8a10854938646 --- /dev/null +++ b/llvm/test/Verifier/extract-vector-mismatched-element-types.ll @@ -0,0 +1,9 @@ +; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s + +; CHECK: experimental_vector_extract result must have the same element type as the input vector. +define <16 x i16> @invalid_mismatched_element_types( %vec) nounwind { + %retval = call <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8( %vec, i64 0) + ret <16 x i16> %retval +} + +declare <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8(, i64) diff --git a/llvm/test/Verifier/insert-vector-mismatched-element-types.ll b/llvm/test/Verifier/insert-vector-mismatched-element-types.ll new file mode 100644 index 00000000000000..84376f5a3ec752 --- /dev/null +++ b/llvm/test/Verifier/insert-vector-mismatched-element-types.ll @@ -0,0 +1,9 @@ +; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s + +; CHECK: experimental_vector_insert parameters must have the same element type. +define @invalid_mismatched_element_types( %vec, <4 x i16> %subvec) nounwind { + %retval = call @llvm.experimental.vector.insert.nxv16i8.v4i16( %vec, <4 x i16> %subvec, i64 0) + ret %retval +} + +declare @llvm.experimental.vector.insert.nxv16i8.v4i16(, <4 x i16>, i64) diff --git a/mlir/examples/toy/Ch2/mlir/Dialect.cpp b/mlir/examples/toy/Ch2/mlir/Dialect.cpp index 0e715c9aafa4cc..e4391e3b6fa909 100644 --- a/mlir/examples/toy/Ch2/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch2/mlir/Dialect.cpp @@ -191,7 +191,7 @@ void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/examples/toy/Ch3/mlir/Dialect.cpp b/mlir/examples/toy/Ch3/mlir/Dialect.cpp index 0e715c9aafa4cc..e4391e3b6fa909 100644 --- a/mlir/examples/toy/Ch3/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch3/mlir/Dialect.cpp @@ -191,7 +191,7 @@ void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp index 09875f7bdac198..0a3ec29b570740 100644 --- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp @@ -246,7 +246,7 @@ void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, /// Return the callee of the generic call operation, this is required by the /// call interface. CallInterfaceCallable GenericCallOp::getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } /// Get the argument operands to the called function, this is required by the @@ -272,7 +272,7 @@ void MulOp::inferShapes() { getResult().setType(getOperand(0).getType()); } static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp index 045673e67f7961..d4356f61f83bea 100644 --- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp @@ -246,7 +246,7 @@ void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, /// Return the callee of the generic call operation, this is required by the /// call interface. CallInterfaceCallable GenericCallOp::getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } /// Get the argument operands to the called function, this is required by the @@ -272,7 +272,7 @@ void MulOp::inferShapes() { getResult().setType(getOperand(0).getType()); } static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp index 045673e67f7961..d4356f61f83bea 100644 --- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp @@ -246,7 +246,7 @@ void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, /// Return the callee of the generic call operation, this is required by the /// call interface. CallInterfaceCallable GenericCallOp::getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } /// Get the argument operands to the called function, this is required by the @@ -272,7 +272,7 @@ void MulOp::inferShapes() { getResult().setType(getOperand(0).getType()); } static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index 032d7a61fb904f..5c87e5a886fbdc 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -298,7 +298,7 @@ void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, /// Return the callee of the generic call operation, this is required by the /// call interface. CallInterfaceCallable GenericCallOp::getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } /// Get the argument operands to the called function, this is required by the @@ -324,7 +324,7 @@ void MulOp::inferShapes() { getResult().setType(getOperand(0).getType()); } static mlir::LogicalResult verify(ReturnOp op) { // We know that the parent operation is a function, because of the 'HasParent' // trait attached to the operation definition. - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); /// ReturnOps can only have a single optional operand. if (op.getNumOperands() > 1) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 33c00ca9b22c02..953a2d5c282c91 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -207,8 +207,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, /// Returns `true` if the GPU function defined by this Op is a kernel, i.e. /// it is intended to be launched from host. bool isKernel() { - return getAttrOfType(GPUDialect::getKernelFuncAttrName()) != - nullptr; + return (*this)->getAttrOfType( + GPUDialect::getKernelFuncAttrName()) != nullptr; } /// Change the type of this function in place. This is an extremely @@ -223,8 +223,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, /// Returns the number of buffers located in the workgroup memory. unsigned getNumWorkgroupAttributions() { - return getAttrOfType(getNumWorkgroupAttributionsAttrName()) - .getInt(); + return (*this)->getAttrOfType( + getNumWorkgroupAttributionsAttrName()).getInt(); } /// Returns a list of block arguments that correspond to buffers located in diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index e4dee53560bd3d..807ea8826ef842 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -766,7 +766,7 @@ def LLVM_LLVMFuncOp Block *addEntryBlock(); LLVMType getType() { - return getAttrOfType(getTypeAttrName()) + return (*this)->getAttrOfType(getTypeAttrName()) .getValue().cast(); } bool isVarArg() { diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 96b111f7c50828..1f9b860eb52eb5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -107,7 +107,7 @@ def NVVM_ShflBflyOp : let parser = [{ return parseNVVMShflSyncBflyOp(parser, result); }]; let printer = [{ printNVVMIntrinsicOp(p, this->getOperation()); }]; let verifier = [{ - if (!getAttrOfType("return_value_and_is_valid")) + if (!(*this)->getAttrOfType("return_value_and_is_valid")) return success(); auto type = getType().cast(); if (!type.isStructTy() || type.getStructNumElements() != 2 || diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index e2b9470f19a785..7302bd486657cc 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -59,7 +59,9 @@ class ConstantFloatOp : public ConstantOp { static void build(OpBuilder &builder, OperationState &result, const APFloat &value, FloatType type); - APFloat getValue() { return getAttrOfType("value").getValue(); } + APFloat getValue() { + return (*this)->getAttrOfType("value").getValue(); + } static bool classof(Operation *op); }; @@ -81,7 +83,9 @@ class ConstantIntOp : public ConstantOp { static void build(OpBuilder &builder, OperationState &result, int64_t value, Type type); - int64_t getValue() { return getAttrOfType("value").getInt(); } + int64_t getValue() { + return (*this)->getAttrOfType("value").getInt(); + } static bool classof(Operation *op); }; @@ -98,7 +102,9 @@ class ConstantIndexOp : public ConstantOp { /// Build a constant int op producing an index. static void build(OpBuilder &builder, OperationState &result, int64_t value); - int64_t getValue() { return getAttrOfType("value").getInt(); } + int64_t getValue() { + return (*this)->getAttrOfType("value").getInt(); + } static bool classof(Operation *op); }; @@ -159,8 +165,8 @@ class DmaStartOp } // Returns the source memref indices for this DMA operation. operand_range getSrcIndices() { - return {getOperation()->operand_begin() + 1, - getOperation()->operand_begin() + 1 + getSrcMemRefRank()}; + return {(*this)->operand_begin() + 1, + (*this)->operand_begin() + 1 + getSrcMemRefRank()}; } // Returns the destination MemRefType for this DMA operations. @@ -178,8 +184,8 @@ class DmaStartOp // Returns the destination memref indices for this DMA operation. operand_range getDstIndices() { - return {getOperation()->operand_begin() + 1 + getSrcMemRefRank() + 1, - getOperation()->operand_begin() + 1 + getSrcMemRefRank() + 1 + + return {(*this)->operand_begin() + 1 + getSrcMemRefRank() + 1, + (*this)->operand_begin() + 1 + getSrcMemRefRank() + 1 + getDstMemRefRank()}; } @@ -201,9 +207,8 @@ class DmaStartOp operand_range getTagIndices() { unsigned tagIndexStartPos = 1 + getSrcMemRefRank() + 1 + getDstMemRefRank() + 1 + 1; - return {getOperation()->operand_begin() + tagIndexStartPos, - getOperation()->operand_begin() + tagIndexStartPos + - getTagMemRefRank()}; + return {(*this)->operand_begin() + tagIndexStartPos, + (*this)->operand_begin() + tagIndexStartPos + getTagMemRefRank()}; } /// Returns true if this is a DMA from a faster memory space to a slower one. @@ -279,8 +284,8 @@ class DmaWaitOp // Returns the tag memref index for this DMA operation. operand_range getTagIndices() { - return {getOperation()->operand_begin() + 1, - getOperation()->operand_begin() + 1 + getTagMemRefRank()}; + return {(*this)->operand_begin() + 1, + (*this)->operand_begin() + 1 + getTagMemRefRank()}; } // Returns the rank (number of indices) of the tag memref. diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 441cff497ed26f..5368880a7cb134 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -820,7 +820,7 @@ def CallOp : Std_Op<"call", /// Return the callee of this operation. CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } }]; @@ -1040,8 +1040,8 @@ def CmpFOp : Std_Op<"cmpf", static CmpFPredicate getPredicateByName(StringRef name); CmpFPredicate getPredicate() { - return (CmpFPredicate)getAttrOfType(getPredicateAttrName()) - .getInt(); + return (CmpFPredicate)(*this)->getAttrOfType( + getPredicateAttrName()).getInt(); } }]; @@ -1162,8 +1162,8 @@ def CmpIOp : Std_Op<"cmpi", static CmpIPredicate getPredicateByName(StringRef name); CmpIPredicate getPredicate() { - return (CmpIPredicate)getAttrOfType(getPredicateAttrName()) - .getInt(); + return (CmpIPredicate)(*this)->getAttrOfType( + getPredicateAttrName()).getInt(); } }]; diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 76b98c1f3f36ca..de77e3b034830f 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -1937,7 +1937,8 @@ def Vector_TupleGetOp : return getResult().getType().cast(); } int64_t getIndex() { - return getAttrOfType("index").getValue().getSExtValue(); + auto index = (*this)->getAttrOfType("index"); + return index.getValue().getSExtValue(); } static StringRef getIndexAttrName() { return "index"; } }]; diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index 361bfa2b6fade4..75c23d37a53a54 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -466,7 +466,7 @@ static void addSuspensionPoint(CoroMachinery coro, Value coroState, // Note that this is not reversible transformation. static std::pair outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) { - ModuleOp module = execute.getParentOfType(); + ModuleOp module = execute->getParentOfType(); MLIRContext *ctx = module.getContext(); Location loc = execute.getLoc(); @@ -727,7 +727,7 @@ class AwaitOpLoweringBase : public ConversionPattern { return failure(); // Check if await operation is inside the outlined coroutine function. - auto func = await.template getParentOfType(); + auto func = await->template getParentOfType(); auto outlined = outlinedFunctions.find(func); const bool isInCoroutine = outlined != outlinedFunctions.end(); diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp index 810511194f682b..3b4b39e57d557c 100644 --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -587,7 +587,8 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( launchOp, launchOp.getKernelModuleName()); assert(kernelModule && "expected a kernel module"); - auto binaryAttr = kernelModule.getAttrOfType(gpuBinaryAnnotation); + auto binaryAttr = + kernelModule->getAttrOfType(gpuBinaryAnnotation); if (!binaryAttr) { kernelModule.emitOpError() << "missing " << gpuBinaryAnnotation << " attribute"; diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp index 16f30c3fe02617..355bced96ae750 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp @@ -186,7 +186,7 @@ void VulkanLaunchFuncToVulkanCallsPass::collectSPIRVAttributes( // Check that `kSPIRVBinary` and `kSPIRVEntryPoint` are present in attributes // for the given vulkan launch call. auto spirvBlobAttr = - vulkanLaunchCallOp.getAttrOfType(kSPIRVBlobAttrName); + vulkanLaunchCallOp->getAttrOfType(kSPIRVBlobAttrName); if (!spirvBlobAttr) { vulkanLaunchCallOp.emitError() << "missing " << kSPIRVBlobAttrName << " attribute"; @@ -194,7 +194,7 @@ void VulkanLaunchFuncToVulkanCallsPass::collectSPIRVAttributes( } auto spirvEntryPointNameAttr = - vulkanLaunchCallOp.getAttrOfType(kSPIRVEntryPointAttrName); + vulkanLaunchCallOp->getAttrOfType(kSPIRVEntryPointAttrName); if (!spirvEntryPointNameAttr) { vulkanLaunchCallOp.emitError() << "missing " << kSPIRVEntryPointAttrName << " attribute"; diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp index b7b4e7aab859c4..39b0d62c76453a 100644 --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -381,7 +381,7 @@ static LogicalResult processParallelLoop( // TODO: Verify that this is a valid GPU mapping. // processor ids: 0-2 block [x/y/z], 3-5 -> thread [x/y/z], 6-> sequential ArrayAttr mapping = - parallelOp.getAttrOfType(gpu::getMappingAttrName()); + parallelOp->getAttrOfType(gpu::getMappingAttrName()); // TODO: Support reductions. if (!mapping || parallelOp.getNumResults() != 0) @@ -390,7 +390,7 @@ static LogicalResult processParallelLoop( Location loc = parallelOp.getLoc(); auto launchIndependent = [&launchOp](Value val) { - return val.getParentRegion()->isAncestor(launchOp.getParentRegion()); + return val.getParentRegion()->isAncestor(launchOp->getParentRegion()); }; auto ensureLaunchIndependent = [&rewriter, @@ -568,7 +568,7 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp, PatternRewriter &rewriter) const { // We can only transform starting at the outer-most loop. Launches inside of // parallel loops are not supported. - if (auto parentLoop = parallelOp.getParentOfType()) + if (auto parentLoop = parallelOp->getParentOfType()) return failure(); // Create a launch operation. We start with bound one for all grid/block // sizes. Those will be refined later as we discover them from mappings. diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 906ee8bafbbb55..3adb02af15c604 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -66,7 +66,7 @@ static void insertOpenMPParallel(FuncOp func) { SmallVector topLevelParallelOps; func.walk([&topLevelParallelOps](scf::ParallelOp parallelOp) { // Ignore ops that are already within OpenMP parallel construct. - if (!parallelOp.getParentOfType()) + if (!parallelOp->getParentOfType()) topLevelParallelOps.push_back(parallelOp); }); @@ -87,7 +87,7 @@ static LogicalResult applyPatterns(FuncOp func) { ConversionTarget target(*func.getContext()); target.addIllegalOp(); target.addDynamicallyLegalOp( - [](scf::YieldOp op) { return !isa(op.getParentOp()); }); + [](scf::YieldOp op) { return !isa(op->getParentOp()); }); target.addLegalDialect(); OwningRewritePatternList patterns; diff --git a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp index 1030f0dbd288b2..754e1937a7833c 100644 --- a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp +++ b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp @@ -269,11 +269,11 @@ LogicalResult TerminatorOpConversion::matchAndRewrite( // VariableOp created during lowering of the parent region. if (!operands.empty()) { auto loc = terminatorOp.getLoc(); - auto &allocas = scfToSPIRVContext->outputVars[terminatorOp.getParentOp()]; + auto &allocas = scfToSPIRVContext->outputVars[terminatorOp->getParentOp()]; assert(allocas.size() == operands.size()); for (unsigned i = 0, e = operands.size(); i < e; i++) rewriter.create(loc, allocas[i], operands[i]); - if (isa(terminatorOp.getParentOp())) { + if (isa(terminatorOp->getParentOp())) { // For loops we also need to update the branch jumping back to the header. auto br = cast(rewriter.getInsertionBlock()->getTerminator()); diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp index f83f72d1d10ebe..b01c443ddc779f 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp @@ -52,7 +52,7 @@ static std::string bindingName() { /// i -> (0, i) /// which is implemented under `LowerABIAttributesPass`. static unsigned calculateGlobalIndex(spirv::GlobalVariableOp op) { - IntegerAttr binding = op.getAttrOfType(bindingName()); + IntegerAttr binding = op->getAttrOfType(bindingName()); return binding.getInt(); } @@ -75,8 +75,8 @@ static std::string createGlobalVariableWithBindName(spirv::GlobalVariableOp op, StringRef kernelModuleName) { IntegerAttr descriptorSet = - op.getAttrOfType(descriptorSetName()); - IntegerAttr binding = op.getAttrOfType(bindingName()); + op->getAttrOfType(descriptorSetName()); + IntegerAttr binding = op->getAttrOfType(bindingName()); return llvm::formatv("{0}_{1}_descriptor_set{2}_binding{3}", kernelModuleName.str(), op.sym_name().str(), std::to_string(descriptorSet.getInt()), @@ -87,8 +87,8 @@ createGlobalVariableWithBindName(spirv::GlobalVariableOp op, /// and a binding number. static bool hasDescriptorSetAndBinding(spirv::GlobalVariableOp op) { IntegerAttr descriptorSet = - op.getAttrOfType(descriptorSetName()); - IntegerAttr binding = op.getAttrOfType(bindingName()); + op->getAttrOfType(descriptorSetName()); + IntegerAttr binding = op->getAttrOfType(bindingName()); return descriptorSet && binding; } @@ -155,7 +155,7 @@ class GPULaunchLowering : public ConvertOpToLLVMPattern { ConversionPatternRewriter &rewriter) const override { auto *op = launchOp.getOperation(); MLIRContext *context = rewriter.getContext(); - auto module = launchOp.getParentOfType(); + auto module = launchOp->getParentOfType(); // Get the SPIR-V module that represents the gpu kernel module. The module // is named: diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index f81ae9e9c1d519..5309daec5e523e 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -647,7 +647,7 @@ class ExecutionModePattern // First, create the global struct's name that would be associated with // this entry point's execution mode. We set it to be: // __spv__{SPIR-V module name}_{function name}_execution_mode_info - ModuleOp module = op.getParentOfType(); + ModuleOp module = op->getParentOfType(); std::string moduleName; if (module.getName().hasValue()) moduleName = "_" + module.getName().getValue().str(); @@ -1530,8 +1530,9 @@ void mlir::encodeBindAttribute(ModuleOp module) { auto spvModules = module.getOps(); for (auto spvModule : spvModules) { spvModule.walk([&](spirv::GlobalVariableOp op) { - IntegerAttr descriptorSet = op.getAttrOfType(kDescriptorSet); - IntegerAttr binding = op.getAttrOfType(kBinding); + IntegerAttr descriptorSet = + op->getAttrOfType(kDescriptorSet); + IntegerAttr binding = op->getAttrOfType(kBinding); // For every global variable in the module, get the ones with descriptor // set and binding numbers. if (descriptorSet && binding) { diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 17a06546329700..2f6856dc275a76 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1349,7 +1349,7 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { ConversionPatternRewriter &rewriter) const { // Convert the original function arguments. They are converted using the // LLVMTypeConverter provided to this legalization pattern. - auto varargsAttr = funcOp.getAttrOfType("std.varargs"); + auto varargsAttr = funcOp->getAttrOfType("std.varargs"); TypeConverter::SignatureConversion result(funcOp.getNumArguments()); auto llvmType = getTypeConverter()->convertFunctionSignature( funcOp.getType(), varargsAttr && varargsAttr.getValue(), result); @@ -1407,7 +1407,7 @@ struct FuncOpConversion : public FuncOpConversionBase { return failure(); if (getTypeConverter()->getOptions().emitCWrappers || - funcOp.getAttrOfType(kEmitIfaceAttrName)) { + funcOp->getAttrOfType(kEmitIfaceAttrName)) { if (newFuncOp.isExternal()) wrapExternalFunction(rewriter, funcOp.getLoc(), *getTypeConverter(), funcOp, newFuncOp); @@ -1717,7 +1717,7 @@ struct AssertOpLowering : public ConvertOpToLLVMPattern { AssertOp::Adaptor transformed(operands); // Insert the `abort` declaration if necessary. - auto module = op.getParentOfType(); + auto module = op->getParentOfType(); auto abortFunc = module.lookupSymbol("abort"); if (!abortFunc) { OpBuilder::InsertionGuard guard(rewriter); @@ -2056,7 +2056,7 @@ struct AllocOpLowering : public AllocLikeOpLowering { Type elementPtrType = this->getElementPtrType(memRefType); Value allocatedPtr = createAllocCall(loc, "malloc", elementPtrType, {sizeBytes}, - allocOp.getParentOfType(), rewriter); + allocOp->getParentOfType(), rewriter); Value alignedPtr = allocatedPtr; if (alignment) { @@ -2138,7 +2138,7 @@ struct AlignedAllocOpLowering : public AllocLikeOpLowering { Type elementPtrType = this->getElementPtrType(memRefType); Value allocatedPtr = createAllocCall( loc, "aligned_alloc", elementPtrType, {allocAlignment, sizeBytes}, - allocOp.getParentOfType(), rewriter); + allocOp->getParentOfType(), rewriter); return std::make_tuple(allocatedPtr, allocatedPtr); } @@ -2363,11 +2363,11 @@ struct DeallocOpLowering : public ConvertOpToLLVMPattern { // Insert the `free` declaration if it is not already present. auto freeFunc = - op.getParentOfType().lookupSymbol("free"); + op->getParentOfType().lookupSymbol("free"); if (!freeFunc) { OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart( - op.getParentOfType().getBody()); + op->getParentOfType().getBody()); freeFunc = rewriter.create( rewriter.getUnknownLoc(), "free", LLVM::LLVMType::getFunctionTy(getVoidType(), getVoidPtrType(), diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp index 1c7aec1f55a3ef..cd5079f50425b0 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -280,7 +280,7 @@ class AllocOpPattern final : public SPIRVOpLowering { // Insert spv.globalVariable for this allocation. Operation *parent = - SymbolTable::getNearestSymbolTable(operation.getParentOp()); + SymbolTable::getNearestSymbolTable(operation->getParentOp()); if (!parent) return failure(); Location loc = operation.getLoc(); @@ -868,9 +868,9 @@ IntLoadOpPattern::matchAndRewrite(LoadOp loadOp, ArrayRef operands, srcBits, dstBits, rewriter); Value spvLoadOp = rewriter.create( loc, dstType, adjustedPtr, - loadOp.getAttrOfType( + loadOp->getAttrOfType( spirv::attributeName()), - loadOp.getAttrOfType("alignment")); + loadOp->getAttrOfType("alignment")); // Shift the bits to the rightmost. // ____XXXX________ -> ____________XXXX diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 7c3d1b762d7ee3..005db18c54e51d 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -1670,7 +1670,7 @@ void AffineForOp::setLowerBound(ValueRange lbOperands, AffineMap map) { newOperands.append(ubOperands.begin(), ubOperands.end()); auto iterOperands = getIterOperands(); newOperands.append(iterOperands.begin(), iterOperands.end()); - getOperation()->setOperands(newOperands); + (*this)->setOperands(newOperands); setAttr(getLowerBoundAttrName(), AffineMapAttr::get(map)); } @@ -1683,7 +1683,7 @@ void AffineForOp::setUpperBound(ValueRange ubOperands, AffineMap map) { newOperands.append(ubOperands.begin(), ubOperands.end()); auto iterOperands = getIterOperands(); newOperands.append(iterOperands.begin(), iterOperands.end()); - getOperation()->setOperands(newOperands); + (*this)->setOperands(newOperands); setAttr(getUpperBoundAttrName(), AffineMapAttr::get(map)); } @@ -1902,7 +1902,7 @@ struct SimplifyDeadElse : public OpRewritePattern { static LogicalResult verify(AffineIfOp op) { // Verify that we have a condition attribute. auto conditionAttr = - op.getAttrOfType(op.getConditionAttrName()); + op->getAttrOfType(op.getConditionAttrName()); if (!conditionAttr) return op.emitOpError( "requires an integer set attribute named 'condition'"); @@ -1975,7 +1975,7 @@ static ParseResult parseAffineIfOp(OpAsmParser &parser, static void print(OpAsmPrinter &p, AffineIfOp op) { auto conditionAttr = - op.getAttrOfType(op.getConditionAttrName()); + op->getAttrOfType(op.getConditionAttrName()); p << "affine.if " << conditionAttr; printDimAndSymbolList(op.operand_begin(), op.operand_end(), conditionAttr.getValue().getNumDims(), p); @@ -1999,7 +1999,9 @@ static void print(OpAsmPrinter &p, AffineIfOp op) { } IntegerSet AffineIfOp::getIntegerSet() { - return getAttrOfType(getConditionAttrName()).getValue(); + return (*this) + ->getAttrOfType(getConditionAttrName()) + .getValue(); } void AffineIfOp::setIntegerSet(IntegerSet newSet) { setAttr(getConditionAttrName(), IntegerSetAttr::get(newSet)); @@ -2007,7 +2009,7 @@ void AffineIfOp::setIntegerSet(IntegerSet newSet) { void AffineIfOp::setConditional(IntegerSet set, ValueRange operands) { setIntegerSet(set); - getOperation()->setOperands(operands); + (*this)->setOperands(operands); } void AffineIfOp::build(OpBuilder &builder, OperationState &result, @@ -2120,7 +2122,7 @@ static ParseResult parseAffineLoadOp(OpAsmParser &parser, static void print(OpAsmPrinter &p, AffineLoadOp op) { p << "affine.load " << op.getMemRef() << '['; if (AffineMapAttr mapAttr = - op.getAttrOfType(op.getMapAttrName())) + op->getAttrOfType(op.getMapAttrName())) p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); @@ -2163,7 +2165,7 @@ LogicalResult verify(AffineLoadOp op) { if (failed(verifyMemoryOpIndexing( op.getOperation(), - op.getAttrOfType(op.getMapAttrName()), + op->getAttrOfType(op.getMapAttrName()), op.getMapOperands(), memrefType, /*numIndexOperands=*/op.getNumOperands() - 1))) return failure(); @@ -2236,7 +2238,7 @@ static void print(OpAsmPrinter &p, AffineStoreOp op) { p << "affine.store " << op.getValueToStore(); p << ", " << op.getMemRef() << '['; if (AffineMapAttr mapAttr = - op.getAttrOfType(op.getMapAttrName())) + op->getAttrOfType(op.getMapAttrName())) p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); @@ -2252,7 +2254,7 @@ LogicalResult verify(AffineStoreOp op) { if (failed(verifyMemoryOpIndexing( op.getOperation(), - op.getAttrOfType(op.getMapAttrName()), + op->getAttrOfType(op.getMapAttrName()), op.getMapOperands(), memrefType, /*numIndexOperands=*/op.getNumOperands() - 2))) return failure(); @@ -2438,7 +2440,7 @@ static ParseResult parseAffinePrefetchOp(OpAsmParser &parser, static void print(OpAsmPrinter &p, AffinePrefetchOp op) { p << AffinePrefetchOp::getOperationName() << " " << op.memref() << '['; - AffineMapAttr mapAttr = op.getAttrOfType(op.getMapAttrName()); + AffineMapAttr mapAttr = op->getAttrOfType(op.getMapAttrName()); if (mapAttr) { SmallVector operands(op.getMapOperands()); p.printAffineMapOfSSAIds(mapAttr, operands); @@ -2454,7 +2456,7 @@ static void print(OpAsmPrinter &p, AffinePrefetchOp op) { } static LogicalResult verify(AffinePrefetchOp op) { - auto mapAttr = op.getAttrOfType(op.getMapAttrName()); + auto mapAttr = op->getAttrOfType(op.getMapAttrName()); if (mapAttr) { AffineMap map = mapAttr.getValue(); if (map.getNumResults() != op.getMemRefType().getRank()) @@ -2624,7 +2626,7 @@ void AffineParallelOp::setLowerBounds(ValueRange lbOperands, AffineMap map) { SmallVector newOperands(lbOperands); newOperands.append(ubOperands.begin(), ubOperands.end()); - getOperation()->setOperands(newOperands); + (*this)->setOperands(newOperands); lowerBoundsMapAttr(AffineMapAttr::get(map)); } @@ -2636,7 +2638,7 @@ void AffineParallelOp::setUpperBounds(ValueRange ubOperands, AffineMap map) { SmallVector newOperands(getLowerBoundsOperands()); newOperands.append(ubOperands.begin(), ubOperands.end()); - getOperation()->setOperands(newOperands); + (*this)->setOperands(newOperands); upperBoundsMapAttr(AffineMapAttr::get(map)); } @@ -2880,7 +2882,7 @@ static ParseResult parseAffineParallelOp(OpAsmParser &parser, //===----------------------------------------------------------------------===// static LogicalResult verify(AffineYieldOp op) { - auto *parentOp = op.getParentOp(); + auto *parentOp = op->getParentOp(); auto results = parentOp->getResults(); auto operands = op.getOperands(); @@ -2960,7 +2962,7 @@ static ParseResult parseAffineVectorLoadOp(OpAsmParser &parser, static void print(OpAsmPrinter &p, AffineVectorLoadOp op) { p << "affine.vector_load " << op.getMemRef() << '['; if (AffineMapAttr mapAttr = - op.getAttrOfType(op.getMapAttrName())) + op->getAttrOfType(op.getMapAttrName())) p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); @@ -2981,7 +2983,7 @@ static LogicalResult verify(AffineVectorLoadOp op) { MemRefType memrefType = op.getMemRefType(); if (failed(verifyMemoryOpIndexing( op.getOperation(), - op.getAttrOfType(op.getMapAttrName()), + op->getAttrOfType(op.getMapAttrName()), op.getMapOperands(), memrefType, /*numIndexOperands=*/op.getNumOperands() - 1))) return failure(); @@ -3048,7 +3050,7 @@ static void print(OpAsmPrinter &p, AffineVectorStoreOp op) { p << "affine.vector_store " << op.getValueToStore(); p << ", " << op.getMemRef() << '['; if (AffineMapAttr mapAttr = - op.getAttrOfType(op.getMapAttrName())) + op->getAttrOfType(op.getMapAttrName())) p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); @@ -3059,7 +3061,7 @@ static LogicalResult verify(AffineVectorStoreOp op) { MemRefType memrefType = op.getMemRefType(); if (failed(verifyMemoryOpIndexing( op.getOperation(), - op.getAttrOfType(op.getMapAttrName()), + op->getAttrOfType(op.getMapAttrName()), op.getMapOperands(), memrefType, /*numIndexOperands=*/op.getNumOperands() - 2))) return failure(); diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index e5f5a6d8998fd5..b57b2908d63507 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -221,7 +221,7 @@ LogicalResult mlir::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) { // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up // a sequence of affine.fors that are all perfectly nested). applyPatternsAndFoldGreedily( - hoistedIfOp.getParentWithTrait(), + hoistedIfOp->getParentWithTrait(), frozenPatterns); return success(); diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index 1e84ba3418bb1b..36b3393118c3ad 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -102,7 +102,7 @@ Type ValueType::getValueType() { return getImpl()->valueType; } static LogicalResult verify(YieldOp op) { // Get the underlying value types from async values returned from the // parent `async.execute` operation. - auto executeOp = op.getParentOfType(); + auto executeOp = op->getParentOfType(); auto types = llvm::map_range(executeOp.results(), [](const OpResult &result) { return result.getType().cast().getValueType(); }); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index e9d8009fb2e9dc..ee66ede112146a 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -80,13 +80,13 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op, auto walkResult = module.walk([&module](LaunchFuncOp launchOp) -> WalkResult { // Ignore launches that are nested more or less deep than functions in the // module we are currently checking. - if (!launchOp.getParentOp() || - launchOp.getParentOp()->getParentOp() != module) + if (!launchOp->getParentOp() || + launchOp->getParentOp()->getParentOp() != module) return success(); // Ignore launch ops with missing attributes here. The errors will be // reported by the verifiers of those ops. - if (!launchOp.getAttrOfType( + if (!launchOp->getAttrOfType( LaunchFuncOp::getKernelAttrName())) return success(); @@ -434,7 +434,7 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x, blockSize.y, blockSize.z}); result.addOperands(kernelOperands); - auto kernelModule = kernelFunc.getParentOfType(); + auto kernelModule = kernelFunc->getParentOfType(); auto kernelSymbol = builder.getSymbolRefAttr( kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())}); result.addAttribute(getKernelAttrName(), kernelSymbol); @@ -470,16 +470,17 @@ KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() { } static LogicalResult verify(LaunchFuncOp op) { - auto module = op.getParentOfType(); + auto module = op->getParentOfType(); if (!module) return op.emitOpError("expected to belong to a module"); - if (!module.getAttrOfType(GPUDialect::getContainerModuleAttrName())) + if (!module->getAttrOfType( + GPUDialect::getContainerModuleAttrName())) return op.emitOpError( "expected the closest surrounding module to have the '" + GPUDialect::getContainerModuleAttrName() + "' attribute"); - auto kernelAttr = op.getAttrOfType(op.getKernelAttrName()); + auto kernelAttr = op->getAttrOfType(op.getKernelAttrName()); if (!kernelAttr) return op.emitOpError("symbol reference attribute '" + op.getKernelAttrName() + "' must be specified"); @@ -522,7 +523,7 @@ static void printLaunchFuncOperands(OpAsmPrinter &printer, Operation *, /// workgroup memory. BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type) { auto attrName = getNumWorkgroupAttributionsAttrName(); - auto attr = getAttrOfType(attrName); + auto attr = (*this)->getAttrOfType(attrName); setAttr(attrName, IntegerAttr::get(attr.getType(), attr.getValue() + 1)); return getBody().insertArgument(getType().getNumInputs() + attr.getInt(), type); @@ -777,7 +778,7 @@ static ParseResult parseReturnOp(OpAsmParser &parser, OperationState &result) { } static LogicalResult verify(gpu::ReturnOp returnOp) { - GPUFuncOp function = returnOp.getParentOfType(); + GPUFuncOp function = returnOp->getParentOfType(); FunctionType funType = function.getType(); diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp index f2c7010be29189..eaa777c380604b 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -140,7 +140,7 @@ struct GpuAsyncRegionPass::DeferWaitCallback { ~DeferWaitCallback() { for (size_t i = 0; i < worklist.size(); ++i) { auto waitOp = worklist[i]; - auto executeOp = waitOp.getParentOfType(); + auto executeOp = waitOp->getParentOfType(); auto numDependencies = waitOp.asyncDependencies().size(); // Erase `gpu.wait` and return async dependencies from region instead. diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index c7be304236c366..ac5fe8c9458d34 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -243,7 +243,7 @@ class GpuKernelOutliningPass auto funcWalkResult = func.walk([&](gpu::LaunchOp op) { llvm::SetVector operands; std::string kernelFnName = - Twine(op.getParentOfType().getName(), "_kernel").str(); + Twine(op->getParentOfType().getName(), "_kernel").str(); // Pull in instructions that can be sunk if (failed(sinkOperationsIntoLaunchOp(op))) diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp index b953bad6762760..43a27db9df6924 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -123,7 +123,7 @@ static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel = MapGrid) { // Do not try to add a mapping to already mapped loops or nested loops. if (parallelOp.getAttr(getMappingAttrName()) || - ((mappingLevel == MapGrid) && parallelOp.getParentOfType())) + ((mappingLevel == MapGrid) && parallelOp->getParentOfType())) return; MLIRContext *ctx = parallelOp.getContext(); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index f481b702822b6f..4c48dccad096fa 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -457,7 +457,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) { static LogicalResult verify(LandingpadOp op) { Value value; - if (LLVMFuncOp func = op.getParentOfType()) { + if (LLVMFuncOp func = op->getParentOfType()) { if (!func.personality().hasValue()) return op.emitError( "llvm.landingpad needs to be in a function with a personality"); @@ -985,11 +985,13 @@ static OpTy lookupSymbolInModule(Operation *parent, StringRef name) { } GlobalOp AddressOfOp::getGlobal() { - return lookupSymbolInModule(getParentOp(), global_name()); + return lookupSymbolInModule((*this)->getParentOp(), + global_name()); } LLVMFuncOp AddressOfOp::getFunction() { - return lookupSymbolInModule(getParentOp(), global_name()); + return lookupSymbolInModule((*this)->getParentOp(), + global_name()); } static LogicalResult verify(AddressOfOp op) { @@ -1203,7 +1205,7 @@ static LogicalResult verify(GlobalOp op) { if (!LLVMPointerType::isValidElementType(op.getType())) return op.emitOpError( "expects type to be a valid element type for an LLVM pointer"); - if (op.getParentOp() && !satisfiesLLVMModule(op.getParentOp())) + if (op->getParentOp() && !satisfiesLLVMModule(op->getParentOp())) return op.emitOpError("must appear at the module level"); if (auto strAttr = op.getValueOrNull().dyn_cast_or_null()) { diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 19c42c1e990cb8..707ff7c1b089ba 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -109,8 +109,8 @@ static LogicalResult verify(MmaOp op) { "s or 8 floats"); } - auto alayout = op.getAttrOfType("alayout"); - auto blayout = op.getAttrOfType("blayout"); + auto alayout = op->getAttrOfType("alayout"); + auto blayout = op->getAttrOfType("blayout"); if (!(alayout && blayout) || !(alayout.getValue() == "row" || alayout.getValue() == "col") || diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 92668be811f687..9d7148fe68dd4b 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1139,7 +1139,7 @@ static LogicalResult verifyYield(linalg::YieldOp op, } static LogicalResult verify(linalg::YieldOp op) { - auto *parentOp = op.getParentOp(); + auto *parentOp = op->getParentOp(); if (parentOp->getNumRegions() != 1 || parentOp->getRegion(0).empty()) return op.emitOpError("expected single non-empty parent region"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp index bc86dcd9e05011..652a036838ede7 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp @@ -67,7 +67,7 @@ void mlir::linalg::CodegenStrategy::transform(FuncOp func) const { // Post staged patterns transforms //===--------------------------------------------------------------------===// - ModuleOp module = func.getParentOfType(); + ModuleOp module = func->getParentOfType(); // Programmatic splitting of slow/fast path vector transfers. OwningRewritePatternList patterns; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 9aeb39e6b565ab..9e7e7efdd1361f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -89,8 +89,8 @@ void mlir::linalg::hoistRedundantVectorTransfers(FuncOp func) { func.walk([&](vector::TransferReadOp transferRead) { LLVM_DEBUG(DBGS() << "Candidate for hoisting: " << *transferRead.getOperation() << "\n"); - auto loop = dyn_cast(transferRead.getParentOp()); - LLVM_DEBUG(DBGS() << "Parent op: " << *transferRead.getParentOp() + auto loop = dyn_cast(transferRead->getParentOp()); + LLVM_DEBUG(DBGS() << "Parent op: " << *transferRead->getParentOp() << "\n"); if (!loop) return WalkResult::advance(); diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index 951c76246b1247..49f7f273af19f8 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -97,7 +97,7 @@ static LogicalResult verify(AttributeOp op) { Value attrType = op.type(); Optional attrValue = op.value(); - if (!attrValue && isa(op.getParentOp())) + if (!attrValue && isa(op->getParentOp())) return op.emitOpError("expected constant value when specified within a " "`pdl.rewrite`"); if (attrValue && attrType) @@ -273,7 +273,7 @@ static LogicalResult verifyResultTypesAreInferrable(OperationOp op, } static LogicalResult verify(OperationOp op) { - bool isWithinRewrite = isa(op.getParentOp()); + bool isWithinRewrite = isa(op->getParentOp()); if (isWithinRewrite && !op.name()) return op.emitOpError("must have an operation name when nested within " "a `pdl.rewrite`"); diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 5bff924b8e7df7..1ea0571cf69024 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -1172,7 +1172,7 @@ static void print(OpAsmPrinter &p, ReduceOp op) { static LogicalResult verify(ReduceReturnOp op) { // The type of the return value should be the same type as the type of the // operand of the enclosing ReduceOp. - auto reduceOp = cast(op.getParentOp()); + auto reduceOp = cast(op->getParentOp()); Type reduceType = reduceOp.operand().getType(); if (reduceType != op.result().getType()) return op.emitOpError() << "needs to have type " << reduceType diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index bf9f797a118ea1..9197375cc237b6 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -144,7 +144,7 @@ void mlir::scf::populateSCFStructuralTypeConversionsAndLegality( target.addDynamicallyLegalOp([&](scf::YieldOp op) { // We only have conversions for a subset of ops that use scf.yield // terminators. - if (!isa(op.getParentOp())) + if (!isa(op->getParentOp())) return true; return typeConverter.isLegal(op.getOperandTypes()); }); diff --git a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp index a451c167aeea33..c30c823792ae2b 100644 --- a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp @@ -89,7 +89,7 @@ void mlir::outlineIfOp(OpBuilder &b, scf::IfOp ifOp, FuncOp *thenFn, // Outline before current function. OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(ifOp.getParentOfType()); + b.setInsertionPoint(ifOp->getParentOfType()); llvm::SetVector captures; getUsedValuesDefinedAbove(ifOrElseRegion, captures); diff --git a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp index 10260b8029c6a4..756e318ac39939 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp @@ -523,7 +523,7 @@ static spirv::GlobalVariableOp getBuiltinVariable(Block &body, // Look through all global variables in the given `body` block and check if // there is a spv.globalVariable that has the same `builtin` attribute. for (auto varOp : body.getOps()) { - if (auto builtinAttr = varOp.getAttrOfType( + if (auto builtinAttr = varOp->getAttrOfType( spirv::SPIRVDialect::getAttributeName( spirv::Decoration::BuiltIn))) { auto varBuiltIn = spirv::symbolizeBuiltIn(builtinAttr.getValue()); diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index 19e0f98f26e96b..03e416e9544140 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -1046,7 +1046,7 @@ void spirv::AddressOfOp::build(OpBuilder &builder, OperationState &state, static LogicalResult verify(spirv::AddressOfOp addressOfOp) { auto varOp = dyn_cast_or_null( - SymbolTable::lookupNearestSymbolFrom(addressOfOp.getParentOp(), + SymbolTable::lookupNearestSymbolFrom(addressOfOp->getParentOp(), addressOfOp.variable())); if (!varOp) { return addressOfOp.emitOpError("expected spv.globalVariable symbol"); @@ -1849,7 +1849,7 @@ static LogicalResult verify(spirv::FunctionCallOp functionCallOp) { auto funcOp = dyn_cast_or_null(SymbolTable::lookupNearestSymbolFrom( - functionCallOp.getParentOp(), fnName)); + functionCallOp->getParentOp(), fnName)); if (!funcOp) { return functionCallOp.emitOpError("callee function '") << fnName << "' not found in nearest symbol table"; @@ -1898,7 +1898,7 @@ static LogicalResult verify(spirv::FunctionCallOp functionCallOp) { } CallInterfaceCallable spirv::FunctionCallOp::getCallableForCallee() { - return getAttrOfType(kCallee); + return (*this)->getAttrOfType(kCallee); } Operation::operand_range spirv::FunctionCallOp::getArgOperands() { @@ -2005,9 +2005,9 @@ static LogicalResult verify(spirv::GlobalVariableOp varOp) { } if (auto init = - varOp.getAttrOfType(kInitializerAttrName)) { + varOp->getAttrOfType(kInitializerAttrName)) { Operation *initOp = SymbolTable::lookupNearestSymbolFrom( - varOp.getParentOp(), init.getValue()); + varOp->getParentOp(), init.getValue()); // TODO: Currently only variable initialization with specialization // constants and other variables is supported. They could be normal // constants in the module scope as well. @@ -2066,7 +2066,7 @@ static LogicalResult verify(spirv::GroupNonUniformBroadcastOp broadcastOp) { // SPIR-V spec: "Before version 1.5, Id must come from a // constant instruction. auto targetEnv = spirv::getDefaultTargetEnv(broadcastOp.getContext()); - if (auto spirvModule = broadcastOp.getParentOfType()) + if (auto spirvModule = broadcastOp->getParentOfType()) targetEnv = spirv::lookupTargetEnvOrDefault(spirvModule); if (targetEnv.getVersion() < spirv::Version::V_1_5) { @@ -2407,12 +2407,12 @@ void spirv::LoopOp::addEntryAndMergeBlock() { //===----------------------------------------------------------------------===// static LogicalResult verify(spirv::MergeOp mergeOp) { - auto *parentOp = mergeOp.getParentOp(); + auto *parentOp = mergeOp->getParentOp(); if (!parentOp || !isa(parentOp)) return mergeOp.emitOpError( "expected parent op to be 'spv.selection' or 'spv.loop'"); - Block &parentLastBlock = mergeOp.getParentRegion()->back(); + Block &parentLastBlock = mergeOp->getParentRegion()->back(); if (mergeOp.getOperation() != parentLastBlock.getTerminator()) return mergeOp.emitOpError( "can only be used in the last block of 'spv.selection' or 'spv.loop'"); @@ -2577,7 +2577,7 @@ static LogicalResult verify(spirv::ModuleOp moduleOp) { static LogicalResult verify(spirv::ReferenceOfOp referenceOfOp) { auto *specConstSym = SymbolTable::lookupNearestSymbolFrom( - referenceOfOp.getParentOp(), referenceOfOp.spec_const()); + referenceOfOp->getParentOp(), referenceOfOp.spec_const()); Type constType; auto specConstOp = dyn_cast_or_null(specConstSym); @@ -2792,13 +2792,13 @@ static ParseResult parseSpecConstantOp(OpAsmParser &parser, static void print(spirv::SpecConstantOp constOp, OpAsmPrinter &printer) { printer << spirv::SpecConstantOp::getOperationName() << ' '; printer.printSymbolName(constOp.sym_name()); - if (auto specID = constOp.getAttrOfType(kSpecIdAttrName)) + if (auto specID = constOp->getAttrOfType(kSpecIdAttrName)) printer << ' ' << kSpecIdAttrName << '(' << specID.getInt() << ')'; printer << " = " << constOp.default_value(); } static LogicalResult verify(spirv::SpecConstantOp constOp) { - if (auto specID = constOp.getAttrOfType(kSpecIdAttrName)) + if (auto specID = constOp->getAttrOfType(kSpecIdAttrName)) if (specID.getValue().isNegative()) return constOp.emitOpError("SpecId cannot be negative"); @@ -3383,7 +3383,7 @@ static LogicalResult verify(spirv::SpecConstantCompositeOp constOp) { auto constituentSpecConstOp = dyn_cast(SymbolTable::lookupNearestSymbolFrom( - constOp.getParentOp(), constituent.getValue())); + constOp->getParentOp(), constituent.getValue())); if (constituentSpecConstOp.default_value().getType() != cType.getElementType(index)) @@ -3400,7 +3400,7 @@ static LogicalResult verify(spirv::SpecConstantCompositeOp constOp) { //===----------------------------------------------------------------------===// static LogicalResult verify(spirv::YieldOp yieldOp) { - Operation *parentOp = yieldOp.getParentOp(); + Operation *parentOp = yieldOp->getParentOp(); if (!parentOp || !isa(parentOp)) return yieldOp.emitOpError( diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp index b7236c17c00040..691cf973f06cbc 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp @@ -638,8 +638,8 @@ void Serializer::processExtension() { } void Serializer::processMemoryModel() { - uint32_t mm = module.getAttrOfType("memory_model").getInt(); - uint32_t am = module.getAttrOfType("addressing_model").getInt(); + uint32_t mm = module->getAttrOfType("memory_model").getInt(); + uint32_t am = module->getAttrOfType("addressing_model").getInt(); encodeInstructionInto(memoryModel, spirv::Opcode::OpMemoryModel, {am, mm}); } @@ -656,7 +656,7 @@ LogicalResult Serializer::processSpecConstantOp(spirv::SpecConstantOp op) { if (auto resultID = prepareConstantScalar(op.getLoc(), op.default_value(), /*isSpec=*/true)) { // Emit the OpDecorate instruction for SpecId. - if (auto specID = op.getAttrOfType("spec_id")) { + if (auto specID = op->getAttrOfType("spec_id")) { auto val = static_cast(specID.getInt()); emitDecoration(resultID, spirv::Decoration::SpecId, {val}); } @@ -1973,7 +1973,7 @@ Serializer::processOp(spirv::ControlBarrierOp op) { SmallVector operands; for (auto argName : argNames) { - auto argIntAttr = op.getAttrOfType(argName); + auto argIntAttr = op->getAttrOfType(argName); auto operand = prepareConstantInt(op.getLoc(), argIntAttr); if (!operand) { return failure(); @@ -2020,7 +2020,7 @@ Serializer::processOp(spirv::MemoryBarrierOp op) { SmallVector operands; for (auto argName : argNames) { - auto argIntAttr = op.getAttrOfType(argName); + auto argIntAttr = op->getAttrOfType(argName); auto operand = prepareConstantInt(op.getLoc(), argIntAttr); if (!operand) { return failure(); diff --git a/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp index 53160427cf39c2..282c6bbfb65194 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp @@ -63,7 +63,7 @@ class SPIRVAddressOfOpLayoutInfoDecoration LogicalResult matchAndRewrite(spirv::AddressOfOp op, PatternRewriter &rewriter) const override { - auto spirvModule = op.getParentOfType(); + auto spirvModule = op->getParentOfType(); auto varName = op.variable(); auto varOp = spirvModule.lookupSymbol(varName); diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp index 24679e4d523094..12f0152079ccbe 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp @@ -27,7 +27,7 @@ static spirv::GlobalVariableOp createGlobalVarForEntryPointArgument(OpBuilder &builder, spirv::FuncOp funcOp, unsigned argIndex, spirv::InterfaceVarABIAttr abiInfo) { - auto spirvModule = funcOp.getParentOfType(); + auto spirvModule = funcOp->getParentOfType(); if (!spirvModule) return nullptr; @@ -70,7 +70,7 @@ createGlobalVarForEntryPointArgument(OpBuilder &builder, spirv::FuncOp funcOp, static LogicalResult getInterfaceVariables(spirv::FuncOp funcOp, SmallVectorImpl &interfaceVars) { - auto module = funcOp.getParentOfType(); + auto module = funcOp->getParentOfType(); if (!module) { return failure(); } @@ -108,13 +108,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp, OpBuilder &builder) { auto entryPointAttrName = spirv::getEntryPointABIAttrName(); auto entryPointAttr = - funcOp.getAttrOfType(entryPointAttrName); + funcOp->getAttrOfType(entryPointAttrName); if (!entryPointAttr) { return failure(); } OpBuilder::InsertionGuard moduleInsertionGuard(builder); - auto spirvModule = funcOp.getParentOfType(); + auto spirvModule = funcOp->getParentOfType(); builder.setInsertionPoint(spirvModule.body().front().getTerminator()); // Adds the spv.EntryPointOp after collecting all the interface variables @@ -169,7 +169,7 @@ class LowerABIAttributesPass final LogicalResult ProcessInterfaceVarABI::matchAndRewrite( spirv::FuncOp funcOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { - if (!funcOp.getAttrOfType( + if (!funcOp->getAttrOfType( spirv::getEntryPointABIAttrName())) { // TODO: Non-entry point functions are not handled. return failure(); @@ -271,7 +271,7 @@ void LowerABIAttributesPass::runOnOperation() { SmallVector entryPointFns; auto entryPointAttrName = spirv::getEntryPointABIAttrName(); module.walk([&](spirv::FuncOp funcOp) { - if (funcOp.getAttrOfType(entryPointAttrName)) { + if (funcOp->getAttrOfType(entryPointAttrName)) { entryPointFns.push_back(funcOp); } }); diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index acb35b916f7e75..44f897cbf505b9 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -832,7 +832,7 @@ void SizeToIndexOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// static LogicalResult verify(shape::YieldOp op) { - auto *parentOp = op.getParentOp(); + auto *parentOp = op->getParentOp(); auto results = parentOp->getResults(); auto operands = op.getOperands(); diff --git a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp index 49af5d7ce9a203..6190ff351ebf48 100644 --- a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp @@ -34,7 +34,7 @@ NumElementsOpConverter::matchAndRewrite(NumElementsOp op, PatternRewriter &rewriter) const { auto loc = op.getLoc(); Type valueType = op.getResult().getType(); - Value init = op.getDialect() + Value init = op->getDialect() ->materializeConstant(rewriter, rewriter.getIndexAttr(1), valueType, loc) ->getResult(0); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 300b8a42656c8c..34c3da9b5eca04 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -287,7 +287,7 @@ static LogicalResult verify(AllocOp op) { return verifyAllocLikeOp(op); } static LogicalResult verify(AllocaOp op) { // An alloca op needs to have an ancestor with an allocation scope trait. - if (!op.getParentWithTrait()) + if (!op->getParentWithTrait()) return op.emitOpError( "requires an ancestor op with AutomaticAllocationScope trait"); @@ -547,7 +547,7 @@ static void print(OpAsmPrinter &p, GenericAtomicRMWOp op) { //===----------------------------------------------------------------------===// static LogicalResult verify(AtomicYieldOp op) { - Type parentType = op.getParentOp()->getResultTypes().front(); + Type parentType = op->getParentOp()->getResultTypes().front(); Type resultType = op.result().getType(); if (parentType != resultType) return op.emitOpError() << "types mismatch between yield op: " << resultType @@ -660,9 +660,7 @@ Block *BranchOp::getDest() { return getSuccessor(); } void BranchOp::setDest(Block *block) { return setSuccessor(block); } -void BranchOp::eraseOperand(unsigned index) { - getOperation()->eraseOperand(index); -} +void BranchOp::eraseOperand(unsigned index) { (*this)->eraseOperand(index); } void BranchOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { @@ -684,7 +682,7 @@ Block *BranchOp::getSuccessorForOperands(ArrayRef) { return dest(); } LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { // Check that the callee attribute was specified. - auto fnAttr = getAttrOfType("callee"); + auto fnAttr = (*this)->getAttrOfType("callee"); if (!fnAttr) return emitOpError("requires a 'callee' symbol reference attribute"); FuncOp fn = symbolTable.lookupNearestSymbolFrom(*this, fnAttr); @@ -1176,7 +1174,7 @@ static LogicalResult verify(ConstantOp &op) { // Try to find the referenced function. auto fn = - op.getParentOfType().lookupSymbol(fnAttr.getValue()); + op->getParentOfType().lookupSymbol(fnAttr.getValue()); if (!fn) return op.emitOpError() << "reference to undefined function '" << fnAttr.getValue() << "'"; @@ -2626,7 +2624,7 @@ OpFoldResult RankOp::fold(ArrayRef operands) { //===----------------------------------------------------------------------===// static LogicalResult verify(ReturnOp op) { - auto function = cast(op.getParentOp()); + auto function = cast(op->getParentOp()); // The operand number and types must match the function signature. const auto &results = function.getType().getResults(); diff --git a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp index 8c00cadf7032aa..b7de983dd3b1cc 100644 --- a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp @@ -190,7 +190,7 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) { if (lastwrite == nullptr) return; - Region *topRegion = lastwrite.getParentRegion(); + Region *topRegion = lastwrite->getParentRegion(); Operation *readAncestor = findAncestorOpInRegion(topRegion, read); assert(readAncestor && "read op should be recursively part of the top region"); diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index f15bfe90132686..f7bfc66c3fdce6 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2417,7 +2417,7 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( // Top of the function `alloc` for transient storage. Value alloc; { - FuncOp funcOp = xferOp.getParentOfType(); + FuncOp funcOp = xferOp->getParentOfType(); OpBuilder::InsertionGuard guard(b); b.setInsertionPointToStart(&funcOp.getRegion().front()); auto shape = xferOp.getVectorType().getShape(); diff --git a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp index 92c0cb52a395e6..c091c72c7702f2 100644 --- a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp @@ -86,7 +86,8 @@ mlir::translateModuleToROCDLIR(Operation *m, llvm::LLVMContext &llvmContext, // 2. Insert amdgpu-flat-workgroup-size(1, 1024) attribute. for (auto func : ModuleTranslation::getModuleBody(m).getOps()) { - if (!func.getAttrOfType(gpu::GPUDialect::getKernelFuncAttrName())) + if (!func->getAttrOfType( + gpu::GPUDialect::getKernelFuncAttrName())) continue; auto *llvmFunc = llvmModule->getFunction(func.getName()); diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index 6f144e7a0e833f..64c7ca86dc1e38 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -414,7 +414,7 @@ static bool shouldInline(ResolvedCall &resolvedCall) { // Don't allow inlining if the target is an ancestor of the call. This // prevents inlining recursively. if (resolvedCall.targetNode->getCallableRegion()->isAncestor( - resolvedCall.call.getParentRegion())) + resolvedCall.call->getParentRegion())) return false; // Otherwise, inline. diff --git a/mlir/lib/Transforms/LoopCoalescing.cpp b/mlir/lib/Transforms/LoopCoalescing.cpp index 59b2fb9333dabc..ed7bff36321e1b 100644 --- a/mlir/lib/Transforms/LoopCoalescing.cpp +++ b/mlir/lib/Transforms/LoopCoalescing.cpp @@ -25,7 +25,7 @@ struct LoopCoalescingPass : public LoopCoalescingBase { func.walk([](scf::ForOp op) { // Ignore nested loops. - if (op.getParentOfType()) + if (op->getParentOfType()) return; SmallVector loops; diff --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp index 8b1e2fa630ec66..7d18de076e4bf2 100644 --- a/mlir/lib/Transforms/Utils/InliningUtils.cpp +++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp @@ -329,7 +329,7 @@ LogicalResult mlir::inlineCall(InlinerInterface &interface, // Builder used for any conversion operations that need to be materialized. OpBuilder castBuilder(call); Location castLoc = call.getLoc(); - auto *callInterface = interface.getInterfaceFor(call.getDialect()); + const auto *callInterface = interface.getInterfaceFor(call->getDialect()); // Map the provided call operands to the arguments of the region. BlockAndValueMapping mapper; diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp index 77b6744b07a6a9..9759300f2e42ca 100644 --- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp @@ -402,7 +402,7 @@ void mlir::fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, bool mlir::getLoopNestStats(AffineForOp forOpRoot, LoopNestStats *stats) { auto walkResult = forOpRoot.walk([&](AffineForOp forOp) { auto *childForOp = forOp.getOperation(); - auto *parentForOp = forOp.getParentOp(); + auto *parentForOp = forOp->getParentOp(); if (!llvm::isa(parentForOp)) { if (!isa(parentForOp)) { LLVM_DEBUG(llvm::dbgs() << "Expected parent AffineForOp"); diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index bfbc2211e06172..1f549ca9efda11 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -163,7 +163,7 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) { auto *parentBlock = forOp->getBlock(); if (!iv.use_empty()) { if (forOp.hasConstantLowerBound()) { - OpBuilder topBuilder(forOp.getParentOfType().getBody()); + OpBuilder topBuilder(forOp->getParentOfType().getBody()); auto constOp = topBuilder.create( forOp.getLoc(), forOp.getConstantLowerBound()); iv.replaceAllUsesWith(constOp); @@ -1484,7 +1484,7 @@ mlir::isPerfectlyNested(ArrayRef loops) { auto enclosingLoop = loops.front(); for (auto loop : loops.drop_front()) { - auto parentForOp = dyn_cast(loop.getParentOp()); + auto parentForOp = dyn_cast(loop->getParentOp()); // parentForOp's body should be just this loop and the terminator. if (parentForOp != enclosingLoop || !hasTwoElements(parentForOp.getBody())) return false; @@ -3073,7 +3073,7 @@ mlir::separateFullTiles(MutableArrayRef inputNest, // Each successive for op has to be nested in the other. auto prevLoop = firstLoop; for (auto loop : inputNest.drop_front(1)) { - assert(loop.getParentOp() == prevLoop && "input not contiguously nested"); + assert(loop->getParentOp() == prevLoop && "input not contiguously nested"); prevLoop = loop; } diff --git a/mlir/test/lib/Dialect/Shape/TestShapeFunctions.cpp b/mlir/test/lib/Dialect/Shape/TestShapeFunctions.cpp index 688f24e5ec47c8..b7127c5edf326f 100644 --- a/mlir/test/lib/Dialect/Shape/TestShapeFunctions.cpp +++ b/mlir/test/lib/Dialect/Shape/TestShapeFunctions.cpp @@ -57,7 +57,7 @@ void ReportShapeFnPass::runOnOperation() { module.getBodyRegion().walk([&](FuncOp func) { // Skip ops in the shape function library. - if (isa(func.getParentOp())) + if (isa(func->getParentOp())) return; func.walk([&](Operation *op) { remarkShapeFn(op); }); diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index 4e2c5b5c6a95ee..c7e1b7f48f4378 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -726,7 +726,7 @@ struct TestResource : public SideEffects::Resource::Base { void SideEffectOp::getEffects( SmallVectorImpl &effects) { // Check for an effects attribute on the op instance. - ArrayAttr effectsAttr = getAttrOfType("effects"); + ArrayAttr effectsAttr = (*this)->getAttrOfType("effects"); if (!effectsAttr) return; @@ -761,7 +761,7 @@ void SideEffectOp::getEffects( void SideEffectOp::getEffects( SmallVectorImpl &effects) { - auto effectsAttr = getAttrOfType("effect_parameter"); + auto effectsAttr = (*this)->getAttrOfType("effect_parameter"); if (!effectsAttr) return; diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 7547500b2835d7..1579e53e5277e8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -321,7 +321,7 @@ def ConversionCallOp : TEST_Op<"conversion_call_op", /// Return the callee of this operation. CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("callee"); + return (*this)->getAttrOfType("callee"); } }]; } diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 437a72176cf6e9..5aab1bbfe46363 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -899,7 +899,7 @@ struct TestMergeSingleBlockOps matchAndRewrite(SingleBlockImplicitTerminatorOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { SingleBlockImplicitTerminatorOp parentOp = - op.getParentOfType(); + op->getParentOfType(); if (!parentOp) return failure(); Block &innerBlock = op.region().front(); @@ -936,14 +936,14 @@ struct TestMergeBlocksPatternDriver /// Only allow `test.br` within test.merge_blocks op. target.addDynamicallyLegalOp([&](TestBranchOp op) -> bool { - return op.getParentOfType(); + return op->getParentOfType(); }); /// Expect that all nested test.SingleBlockImplicitTerminator ops are /// inlined. target.addDynamicallyLegalOp( [&](SingleBlockImplicitTerminatorOp op) -> bool { - return !op.getParentOfType(); + return !op->getParentOfType(); }); DenseSet unlegalizedOps; diff --git a/mlir/test/lib/IR/TestFunc.cpp b/mlir/test/lib/IR/TestFunc.cpp index cdb4663e0b3675..10326ebe9631d0 100644 --- a/mlir/test/lib/IR/TestFunc.cpp +++ b/mlir/test/lib/IR/TestFunc.cpp @@ -68,7 +68,7 @@ struct TestFuncSetType SymbolTable symbolTable(module); for (FuncOp func : module.getOps()) { - auto sym = func.getAttrOfType("test.set_type_from"); + auto sym = func->getAttrOfType("test.set_type_from"); if (!sym) continue; func.setType(symbolTable.lookup(sym.getValue()).getType()); diff --git a/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp index cd4d91f60bc28e..4872dcc6e77c53 100644 --- a/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp +++ b/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp @@ -33,7 +33,7 @@ static void checkIfTilingParametersExist(ArrayRef band) { assert(!band.empty() && "no loops in input band"); AffineForOp topLoop = band[0]; - if (FuncOp funcOp = dyn_cast(topLoop.getParentOp())) + if (FuncOp funcOp = dyn_cast(topLoop->getParentOp())) assert(funcOp.getNumArguments() >= band.size() && "Too few tile sizes"); } @@ -44,7 +44,7 @@ static void checkIfTilingParametersExist(ArrayRef band) { static void getTilingParameters(ArrayRef band, SmallVectorImpl &tilingParameters) { AffineForOp topLoop = band[0]; - Region *funcOpRegion = topLoop.getParentRegion(); + Region *funcOpRegion = topLoop->getParentRegion(); unsigned nestDepth = band.size(); for (BlockArgument blockArgument : diff --git a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp index 1591a7435c72a4..046fad43c3bf96 100644 --- a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp @@ -192,7 +192,7 @@ struct TestLinalgGreedyFusion pm.addPass(createLoopInvariantCodeMotionPass()); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); - LogicalResult res = pm.run(getFunction().getParentOfType()); + LogicalResult res = pm.run(getFunction()->getParentOfType()); if (failed(res)) this->signalPassFailure(); } diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp index d373b3933128ad..591fac32698f61 100644 --- a/mlir/test/lib/Transforms/TestLoopMapping.cpp +++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp @@ -43,7 +43,7 @@ class TestLoopMappingPass func.walk([&processorIds, &numProcessors](scf::ForOp op) { // Ignore nested loops. - if (op.getParentRegion()->getParentOfType()) + if (op->getParentRegion()->getParentOfType()) return; mapLoopToProcessorIds(op, processorIds, numProcessors); }); diff --git a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp b/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp index 26491018233542..095a3f1e7ef49a 100644 --- a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp +++ b/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp @@ -35,7 +35,7 @@ class SimpleParametricLoopTilingPass FuncOp func = getFunction(); func.walk([this](scf::ForOp op) { // Ignore nested loops. - if (op.getParentRegion()->getParentOfType()) + if (op->getParentRegion()->getParentOfType()) return; extractFixedOuterLoops(op, sizes); }); diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index 171b5f5757782e..e170d9f9fe0acc 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -71,11 +71,11 @@ def AOp : NS_Op<"a_op", []> { // --- // DEF: void AOp::aAttrAttr(some-attr-kind attr) { -// DEF-NEXT: this->getOperation()->setAttr("aAttr", attr); +// DEF-NEXT: (*this)->setAttr("aAttr", attr); // DEF: void AOp::bAttrAttr(some-attr-kind attr) { -// DEF-NEXT: this->getOperation()->setAttr("bAttr", attr); +// DEF-NEXT: (*this)->setAttr("bAttr", attr); // DEF: void AOp::cAttrAttr(some-attr-kind attr) { -// DEF-NEXT: this->getOperation()->setAttr("cAttr", attr); +// DEF-NEXT: (*this)->setAttr("cAttr", attr); // Test build methods // --- diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index ccfb13fa34363f..04bc10d338d7ca 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -90,7 +90,8 @@ const char *adapterSegmentSizeAttrInitCode = R"( auto sizeAttr = odsAttrs.get("{0}").cast<::mlir::DenseIntElementsAttr>(); )"; const char *opSegmentSizeAttrInitCode = R"( - auto sizeAttr = getAttrOfType<::mlir::DenseIntElementsAttr>("{0}"); + auto sizeAttr = + getOperation()->getAttrOfType<::mlir::DenseIntElementsAttr>("{0}"); )"; const char *attrSizedSegmentValueRangeCalcCode = R"( unsigned start = 0; @@ -614,7 +615,7 @@ void OpEmitter::genAttrSetters() { if (!method) return; auto &body = method->body(); - body << " this->getOperation()->setAttr(\"" << name << "\", attr);"; + body << " (*this)->setAttr(\"" << name << "\", attr);"; }; for (auto &namedAttr : op.getAttributes()) {