From 1907f28b47cfe9c951df43309d121679895b0edf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com> Date: Mon, 18 May 2020 17:14:39 +0200 Subject: [PATCH 01/14] [Analyzer][StreamChecker] Fixed compile error - NFC. --- clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index d079951221d082..c94cae04523980 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -108,10 +108,10 @@ struct StreamState { return StreamState{L, Opened, ES}; } static StreamState getClosed(const FnDescription *L) { - return StreamState{L, Closed}; + return StreamState{L, Closed, {}}; } static StreamState getOpenFailed(const FnDescription *L) { - return StreamState{L, OpenFailed}; + return StreamState{L, OpenFailed, {}}; } void Profile(llvm::FoldingSetNodeID &ID) const { From 10e2e7de0c0cb73a71eb0047f0a23db1f91361dc Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Fri, 15 May 2020 16:09:20 -0700 Subject: [PATCH 02/14] [WebAssembly] iterate stack in DebugFixup from the top. Differential Revision: https://reviews.llvm.org/D80045 --- llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp index 8f1f77e23b8e31..655e30a29eff45 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp @@ -86,9 +86,9 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { // Search for register rather than assume it is on top (which it // typically is if it appears right after the def), since // DBG_VALUE's may shift under some circumstances. - size_t Depth = 0; - for (auto &Elem : Stack) { + for (auto &Elem : reverse(Stack)) { if (MO.getReg() == Elem.Reg) { + auto Depth = static_cast(&Elem - &Stack[0]); LLVM_DEBUG(dbgs() << "Debug Value VReg " << MO.getReg() << " -> Stack Relative " << Depth << "\n"); MO.ChangeToTargetIndex(WebAssembly::TI_OPERAND_STACK, Depth); @@ -98,7 +98,6 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { Elem.DebugValue = &MI; break; } - Depth++; } // If the Reg was not found, we have a DBG_VALUE outside of its // def-use range, and we leave it unmodified as reg, which means From 364c595403c00431374dbcc965b6117e33a7f140 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 6 May 2020 11:23:04 +0100 Subject: [PATCH 03/14] [SVE] Ignore scalable vectors in InterleavedLoadCombinePass I have changed the pass so that we ignore shuffle vectors with scalable vector types, and replaced VectorType with FixedVectorType in the rest of the pass. I couldn't think of an easy way to test this change, since for scalable vectors we shouldn't be using shufflevectors for interleaving. This change fixes up some type size assert warnings I found in the following test: CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll Differential Revision: https://reviews.llvm.org/D79700 --- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 64a8ff31624ce8..a0ed5eea065152 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -673,9 +673,9 @@ struct VectorInfo { ElementInfo *EI; /// Vector Type - VectorType *const VTy; + FixedVectorType *const VTy; - VectorInfo(VectorType *VTy) + VectorInfo(FixedVectorType *VTy) : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } @@ -735,7 +735,7 @@ struct VectorInfo { if (!Op) return false; - VectorType *VTy = dyn_cast(Op->getType()); + FixedVectorType *VTy = dyn_cast(Op->getType()); if (!VTy) return false; @@ -785,8 +785,8 @@ struct VectorInfo { /// \returns false if no sensible information can be gathered. static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result, const DataLayout &DL) { - VectorType *ArgTy = dyn_cast(SVI->getOperand(0)->getType()); - assert(ArgTy && "ShuffleVector Operand is not a VectorType"); + FixedVectorType *ArgTy = + cast(SVI->getOperand(0)->getType()); // Compute the left hand vector information. VectorInfo LHS(ArgTy); @@ -1201,7 +1201,7 @@ bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad, Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType(); unsigned ElementsPerSVI = InterleavedLoad.front().SVI->getType()->getNumElements(); - VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI); + FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI); SmallVector Indices; for (unsigned i = 0; i < Factor; i++) @@ -1265,8 +1265,11 @@ bool InterleavedLoadCombineImpl::run() { for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (auto SVI = dyn_cast(&I)) { + // We don't support scalable vectors in this pass. + if (isa(SVI->getType())) + continue; - Candidates.emplace_back(SVI->getType()); + Candidates.emplace_back(cast(SVI->getType())); if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) { Candidates.pop_back(); From 36cdc17f8cfeffe7edb4486c02fc97faf73b23ac Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 18 May 2020 11:40:59 -0400 Subject: [PATCH 04/14] [mlir][Vector] Make minor identity permutation map optional in transfer op printing and parsing Summary: This revision makes the use of vector transfer operatons more idiomatic by allowing to omit and inferring the permutation_map. Differential Revision: https://reviews.llvm.org/D80092 --- mlir/include/mlir/Dialect/Vector/VectorOps.h | 9 ++ mlir/include/mlir/Dialect/Vector/VectorOps.td | 97 +++++++++------ .../Conversion/VectorToSCF/VectorToSCF.cpp | 15 +-- .../Affine/Transforms/SuperVectorize.cpp | 5 +- mlir/lib/Dialect/Vector/VectorOps.cpp | 114 +++++++++++++++--- .../lower-affine-to-vector.mlir | 18 +-- .../VectorToLoops/vector-to-loops.mlir | 4 +- .../Affine/SuperVectorize/vectorize_1d.mlir | 47 ++++---- .../Affine/SuperVectorize/vectorize_2d.mlir | 20 +-- .../Affine/SuperVectorize/vectorize_3d.mlir | 2 +- mlir/test/Dialect/Vector/invalid.mlir | 59 ++++++--- mlir/test/Dialect/Vector/ops.mlir | 4 +- .../Dialect/Vector/vector-transforms.mlir | 32 ++--- 13 files changed, 282 insertions(+), 144 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index a3376d53fc9590..6394fae2137507 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_VECTOR_VECTOROPS_H #define MLIR_DIALECT_VECTOR_VECTOROPS_H +#include "mlir/IR/AffineMap.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" @@ -71,6 +72,14 @@ IntegerType getVectorSubscriptType(Builder &builder); /// the integer type required for subscripts in the vector dialect. ArrayAttr getVectorSubscriptAttr(Builder &b, ArrayRef values); +namespace impl { +/// Build the default minor identity map suitable for a vector transfer. This +/// also handles the case memref<... x vector<...>> -> vector<...> in which the +/// rank of the identity map must take the vector element type into account. +AffineMap getTransferMinorIdentityMap(MemRefType memRefType, + VectorType vectorType); +} // namespace impl + #define GET_OP_CLASSES #include "mlir/Dialect/Vector/VectorOps.h.inc" diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 4c71a168dae737..b8a47a27e41f46 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -863,6 +863,18 @@ def Vector_ExtractStridedSliceOp : let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)"; } +def Vector_TransferOpUtils { + code extraTransferDeclaration = [{ + static StringRef getPermutationMapAttrName() { return "permutation_map"; } + MemRefType getMemRefType() { + return memref().getType().cast(); + } + VectorType getVectorType() { + return vector().getType().cast(); + } + }]; +} + def Vector_TransferReadOp : Vector_Op<"transfer_read">, Arguments<(ins AnyMemRef:$memref, Variadic:$indices, @@ -884,15 +896,21 @@ def Vector_TransferReadOp : supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the - slice to match the vector shape. The size of the slice is specified by the - size of the vector, given as the return type. An `ssa-value` of the same - elemental type as the MemRef is provided as the last operand to specify - padding in the case of out-of-bounds accesses. This operation is called - 'read' by opposition to 'load' because the super-vector granularity is - generally not representable with a single hardware register. - A `vector.transfer_read` is thus a mid-level - abstraction that supports super-vectorization with non-effecting padding for - full-tile-only code. + slice to match the vector shape. The permutation map may be implicit and + ommitted from parsing and printing if it is the canonical minor identity map + (i.e. if it does not permute or broadcast any dimension). + + The size of the slice is specified by the size of the vector, given as the + return type. + + An `ssa-value` of the same elemental type as the MemRef is provided as the + last operand to specify padding in the case of out-of-bounds accesses. + + This operation is called 'read' by opposition to 'load' because the + super-vector granularity is generally not representable with a single + hardware register. A `vector.transfer_read` is thus a mid-level abstraction + that supports super-vectorization with non-effecting padding for full-tile + only operations. More precisely, let's dive deeper into the permutation_map for the following MLIR: @@ -995,19 +1013,25 @@ def Vector_TransferReadOp : }]; let builders = [ - // Builder that sets permutation map and padding to 'getMinorIdentityMap' - // and zero, respectively, by default. + // Builder that sets padding to zero. + OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " + "Value memref, ValueRange indices, AffineMap permutationMap">, + // Builder that sets permutation map (resp. padding) to + // 'getMinorIdentityMap' (resp. zero). OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " "Value memref, ValueRange indices"> ]; - let extraClassDeclaration = [{ - MemRefType getMemRefType() { - return memref().getType().cast(); - } - VectorType getVectorType() { - return vector().getType().cast(); - } + let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # + [{ + /// Build the default minor identity map suitable for a vector transfer. + /// This also handles the case memref<... x vector<...>> -> vector<...> in + /// which the rank of the identity map must take the vector element type + /// into account. + static AffineMap getTransferMinorIdentityMap( + MemRefType memRefType, VectorType vectorType) { + return impl::getTransferMinorIdentityMap(memRefType, vectorType); + } }]; } @@ -1033,10 +1057,15 @@ def Vector_TransferWriteOp : supplied as the operands `3 .. 2 + rank(memref)`. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the - slice to match the vector shape. The size of the slice is specified by the - size of the vector. This operation is called 'write' by opposition to - 'store' because the super-vector granularity is generally not representable - with a single hardware register. A `vector.transfer_write` is thus a + slice to match the vector shape. The permutation map may be implicit and + ommitted from parsing and printing if it is the canonical minor identity map + (i.e. if it does not permute or broadcast any dimension). + + The size of the slice is specified by the size of the vector. + + This operation is called 'write' by opposition to 'store' because the + super-vector granularity is generally not representable with a single + hardware register. A `vector.transfer_write` is thus a mid-level abstraction that supports super-vectorization with non-effecting padding for full-tile-only code. It is the responsibility of `vector.transfer_write`'s implementation to ensure the memory writes are @@ -1066,23 +1095,21 @@ def Vector_TransferWriteOp : }]; let builders = [ - // Builder that sets permutation map and padding to 'getMinorIdentityMap' - // by default. + // Builder that sets permutation map to 'getMinorIdentityMap'. OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " "Value memref, ValueRange indices"> ]; - let extraClassDeclaration = [{ - VectorType getVectorType() { - return vector().getType().cast(); - } - MemRefType getMemRefType() { - return memref().getType().cast(); - } - }]; - let assemblyFormat = [{ - $vector `,` $memref `[` $indices `]` attr-dict `:` type($vector) `,` - type($memref) + let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # + [{ + /// Build the default minor identity map suitable for a vector transfer. + /// This also handles the case memref<... x vector<...>> -> vector<...> in + /// which the rank of the identity map must take the vector element type + /// into account. + static AffineMap getTransferMinorIdentityMap( + MemRefType memRefType, VectorType vectorType) { + return impl::getTransferMinorIdentityMap(memRefType, vectorType); + } }]; } diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index c9cd605afb84cd..d3da7bff7b5b7a 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -187,15 +187,15 @@ LogicalResult NDTransferOpHelper::doReplace() { MemRefBoundsCapture &memrefBounds) { // If in-bounds, index into memref and lower to 1-D transfer read. auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { - auto map = AffineMap::getMinorIdentityMap( - xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); - // Lower to 1-D vector_transfer_read and let recursion handle it. - Value memref = xferOp.memref(); SmallVector indexing; indexing.reserve(leadingRank + majorRank + minorRank); indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); + // Lower to 1-D vector_transfer_read and let recursion handle it. + Value memref = xferOp.memref(); + auto map = TransferReadOp::getTransferMinorIdentityMap( + xferOp.getMemRefType(), minorVectorType); auto loaded1D = vector_transfer_read(minorVectorType, memref, indexing, AffineMapAttr::get(map), xferOp.padding()); @@ -230,14 +230,15 @@ LogicalResult NDTransferOpHelper::doReplace() { MemRefBoundsCapture &memrefBounds) { auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { // Lower to 1-D vector_transfer_write and let recursion handle it. - Value loaded1D = std_load(alloc, majorIvs); - auto map = AffineMap::getMinorIdentityMap( - xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); SmallVector indexing; indexing.reserve(leadingRank + majorRank + minorRank); indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); + // Lower to 1-D vector_transfer_write and let recursion handle it. + Value loaded1D = std_load(alloc, majorIvs); + auto map = TransferWriteOp::getTransferMinorIdentityMap( + xferOp.getMemRefType(), minorVectorType); vector_transfer_write(loaded1D, xferOp.memref(), indexing, AffineMapAttr::get(map)); }; diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index fe669624f6cb62..c72b835fc51ace 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -793,10 +793,7 @@ static LogicalResult vectorizeRootOrTerminal(Value iv, LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create( opInst->getLoc(), vectorType, memoryOp.getMemRef(), indices, - AffineMapAttr::get(permutationMap), - // TODO(b/144455320) add a proper padding value, not just 0.0 : f32 - state->folder->create(b, opInst->getLoc(), - APFloat(0.0f), b.getF32Type())); + permutationMap); state->registerReplacement(opInst, transfer.getOperation()); } else { state->registerTerminal(opInst); diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 8385b253e9fb60..94695b6473ded0 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1281,28 +1281,60 @@ static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, if (permutationMap.getNumInputs() != memrefType.getRank()) return op->emitOpError("requires a permutation_map with input dims of the " "same rank as the memref type"); + return success(); } +/// Build the default minor identity map suitable for a vector transfer. This +/// also handles the case memref<... x vector<...>> -> vector<...> in which the +/// rank of the identity map must take the vector element type into account. +AffineMap +mlir::vector::impl::getTransferMinorIdentityMap(MemRefType memRefType, + VectorType vectorType) { + int64_t elementVectorRank = 0; + VectorType elementVectorType = + memRefType.getElementType().dyn_cast(); + if (elementVectorType) + elementVectorRank += elementVectorType.getRank(); + return AffineMap::getMinorIdentityMap( + memRefType.getRank(), vectorType.getRank() - elementVectorRank, + memRefType.getContext()); +} + /// Builder that sets permutation map and padding to 'getMinorIdentityMap' and /// zero, respectively, by default. void TransferReadOp::build(OpBuilder &builder, OperationState &result, - VectorType vector, Value memref, - ValueRange indices) { - auto permMap = AffineMap::getMinorIdentityMap( - memref.getType().cast().getRank(), vector.getRank(), - builder.getContext()); + VectorType vector, Value memref, ValueRange indices, + AffineMap permutationMap) { Type elemType = vector.cast().getElementType(); Value padding = builder.create(result.location, elemType, builder.getZeroAttr(elemType)); + build(builder, result, vector, memref, indices, permutationMap, padding); +} - build(builder, result, vector, memref, indices, permMap, padding); +/// Builder that sets permutation map (resp. padding) to 'getMinorIdentityMap' +/// (resp. zero). +void TransferReadOp::build(OpBuilder &builder, OperationState &result, + VectorType vectorType, Value memref, + ValueRange indices) { + build(builder, result, vectorType, memref, indices, + getTransferMinorIdentityMap(memref.getType().cast(), + vectorType)); +} + +template +void printTransferAttrs(OpAsmPrinter &p, TransferOp op) { + SmallVector elidedAttrs; + if (op.permutation_map() == TransferOp::getTransferMinorIdentityMap( + op.getMemRefType(), op.getVectorType())) + elidedAttrs.push_back(op.getPermutationMapAttrName()); + p.printOptionalAttrDict(op.getAttrs(), elidedAttrs); } static void print(OpAsmPrinter &p, TransferReadOp op) { p << op.getOperationName() << " " << op.memref() << "[" << op.indices() - << "], " << op.padding() << " "; - p.printOptionalAttrDict(op.getAttrs()); + << "], " << op.padding(); + printTransferAttrs(p, op); p << " : " << op.getMemRefType() << ", " << op.getVectorType(); } @@ -1313,7 +1345,7 @@ static ParseResult parseTransferReadOp(OpAsmParser &parser, SmallVector indexInfo; OpAsmParser::OperandType paddingInfo; SmallVector types; - // Parsing with support for optional paddingValue. + // Parsing with support for paddingValue. if (parser.parseOperand(memrefInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(paddingInfo) || @@ -1321,12 +1353,21 @@ static ParseResult parseTransferReadOp(OpAsmParser &parser, parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); if (types.size() != 2) - return parser.emitError(typesLoc, "two types required"); + return parser.emitError(typesLoc, "requires two types"); auto indexType = parser.getBuilder().getIndexType(); MemRefType memRefType = types[0].dyn_cast(); if (!memRefType) - return parser.emitError(typesLoc, "memref type required"), failure(); - Type vectorType = types[1]; + return parser.emitError(typesLoc, "requires memref type"); + VectorType vectorType = types[1].dyn_cast(); + if (!vectorType) + return parser.emitError(typesLoc, "requires vector type"); + auto permutationAttrName = TransferReadOp::getPermutationMapAttrName(); + auto attr = result.attributes.get(permutationAttrName); + if (!attr) { + auto permMap = + TransferReadOp::getTransferMinorIdentityMap(memRefType, vectorType); + result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); + } return failure( parser.resolveOperand(memrefInfo, memRefType, result.operands) || parser.resolveOperands(indexInfo, indexType, result.operands) || @@ -1376,17 +1417,56 @@ static LogicalResult verify(TransferReadOp op) { // TransferWriteOp //===----------------------------------------------------------------------===// -/// Builder that sets permutation map and padding to 'getMinorIdentityMap' by -/// default. +/// Builder that sets permutation map to 'getMinorIdentityMap'. void TransferWriteOp::build(OpBuilder &builder, OperationState &result, Value vector, Value memref, ValueRange indices) { auto vectorType = vector.getType().cast(); - auto permMap = AffineMap::getMinorIdentityMap( - memref.getType().cast().getRank(), vectorType.getRank(), - builder.getContext()); + auto permMap = getTransferMinorIdentityMap( + memref.getType().cast(), vectorType); build(builder, result, vector, memref, indices, permMap); } +static ParseResult parseTransferWriteOp(OpAsmParser &parser, + OperationState &result) { + llvm::SMLoc typesLoc; + OpAsmParser::OperandType vectorInfo, memrefInfo; + SmallVector indexInfo; + SmallVector types; + if (parser.parseOperand(vectorInfo) || parser.parseComma() || + parser.parseOperand(memrefInfo) || + parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || + parser.parseOptionalAttrDict(result.attributes) || + parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) + return failure(); + if (types.size() != 2) + return parser.emitError(typesLoc, "requires two types"); + auto indexType = parser.getBuilder().getIndexType(); + VectorType vectorType = types[0].dyn_cast(); + if (!vectorType) + return parser.emitError(typesLoc, "requires vector type"); + MemRefType memRefType = types[1].dyn_cast(); + if (!memRefType) + return parser.emitError(typesLoc, "requires memref type"); + auto permutationAttrName = TransferWriteOp::getPermutationMapAttrName(); + auto attr = result.attributes.get(permutationAttrName); + if (!attr) { + auto permMap = + TransferWriteOp::getTransferMinorIdentityMap(memRefType, vectorType); + result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); + } + return failure( + parser.resolveOperand(vectorInfo, vectorType, result.operands) || + parser.resolveOperand(memrefInfo, memRefType, result.operands) || + parser.resolveOperands(indexInfo, indexType, result.operands)); +} + +static void print(OpAsmPrinter &p, TransferWriteOp op) { + p << op.getOperationName() << " " << op.vector() << ", " << op.memref() << "[" + << op.indices() << "]"; + printTransferAttrs(p, op); + p << " : " << op.getVectorType() << ", " << op.getMemRefType(); +} + static LogicalResult verify(TransferWriteOp op) { // Consistency of elemental types in memref and vector. MemRefType memrefType = op.getMemRefType(); diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir index f9a78aa495a5c3..7fba0996d8f560 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir @@ -1,6 +1,5 @@ // RUN: mlir-opt -lower-affine --split-input-file %s | FileCheck %s -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_load func @affine_vector_load(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -12,13 +11,12 @@ func @affine_vector_load(%arg0 : index) { // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[b:.*]] = addi %[[a]], %[[c7]] : index // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100xf32>, vector<8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] : memref<100xf32>, vector<8xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_store func @affine_vector_store(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -33,13 +31,12 @@ func @affine_vector_store(%arg0 : index) { // CHECK-NEXT: %[[b:.*]] = addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[c:.*]] = addi %[[b]], %[[c7]] : index -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] {permutation_map = #[[perm_map]]} : vector<4xf32>, memref<100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] : vector<4xf32>, memref<100xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_load func @affine_vector_load(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -51,13 +48,12 @@ func @affine_vector_load(%arg0 : index) { // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[b:.*]] = addi %[[a]], %[[c7]] : index // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100xf32>, vector<8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] : memref<100xf32>, vector<8xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_store func @affine_vector_store(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -72,13 +68,12 @@ func @affine_vector_store(%arg0 : index) { // CHECK-NEXT: %[[b:.*]] = addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[c:.*]] = addi %[[b]], %[[c7]] : index -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] {permutation_map = #[[perm_map]]} : vector<4xf32>, memref<100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] : vector<4xf32>, memref<100xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @vector_load_2d func @vector_load_2d() { %0 = alloc() : memref<100x100xf32> @@ -89,7 +84,7 @@ func @vector_load_2d() { // CHECK: scf.for %[[i0:.*]] = // CHECK: scf.for %[[i1:.*]] = // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[i0]], %[[i1]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100x100xf32>, vector<2x8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[i0]], %[[i1]]], %[[pad]] : memref<100x100xf32>, vector<2x8xf32> } } return @@ -97,7 +92,6 @@ func @vector_load_2d() { // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @vector_store_2d func @vector_store_2d() { %0 = alloc() : memref<100x100xf32> @@ -109,7 +103,7 @@ func @vector_store_2d() { // CHECK: %[[val:.*]] = constant dense // CHECK: scf.for %[[i0:.*]] = // CHECK: scf.for %[[i1:.*]] = -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[i0]], %[[i1]]] {permutation_map = #[[perm_map]]} : vector<2x8xf32>, memref<100x100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[i0]], %[[i1]]] : vector<2x8xf32>, memref<100x100xf32> } } return diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir index 491196c91efb0f..5c1e6361adb976 100644 --- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir @@ -229,7 +229,7 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<17 // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 // CHECK: scf.if %[[cond1]] { - // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] {permutation_map = #[[MAP1]]} : memref, vector<15xf32> + // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] : memref, vector<15xf32> // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> // CHECK: } else { // CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> @@ -264,7 +264,7 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 // CHECK: scf.if %[[cond1]] { // CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> - // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref + // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref // CHECK: } vector.transfer_write %vec, %A[%base, %base] {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index b577e229ba7639..10bf5009d5f630 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -2,7 +2,6 @@ // Permutation maps used in vectorization. // CHECK: #[[map_proj_d0d1_0:map[0-9]+]] = affine_map<(d0, d1) -> (0)> -// CHECK: #[[map_proj_d0d1_d1:map[0-9]+]] = affine_map<(d0, d1) -> (d1)> #map0 = affine_map<(d0) -> (d0)> #mapadd1 = affine_map<(d0) -> (d0 + 1)> @@ -13,7 +12,6 @@ // Maps introduced to vectorize fastest varying memory index. // CHECK-LABEL: func @vec1d_1 func @vec1d_1(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -22,10 +20,11 @@ func @vec1d_1(%A : memref, %B : memref) { %N = dim %A, 1 : memref %P = dim %B, 2 : memref %cst0 = constant 0 : index -// + // CHECK: for {{.*}} step 128 // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector %a0 = affine.load %A[%cst0, %cst0] : memref @@ -35,7 +34,6 @@ func @vec1d_1(%A : memref, %B : memref) { // CHECK-LABEL: func @vec1d_2 func @vec1d_2(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -46,7 +44,8 @@ func @vec1d_2(%A : memref, %B : memref) { %cst0 = constant 0 : index // // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %[[CST]] : memref, vector<128xf32> affine.for %i3 = 0 to %M { // vectorized %a3 = affine.load %A[%cst0, %i3] : memref } @@ -55,7 +54,6 @@ func @vec1d_2(%A : memref, %B : memref) { // CHECK-LABEL: func @vec1d_3 func @vec1d_3(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %arg0, 1 : memref @@ -69,7 +67,8 @@ func @vec1d_3(%A : memref, %B : memref) { // CHECK-NEXT: for [[IV9:%[arg0-9]*]] = 0 to [[ARG_N]] { // CHECK-NEXT: %[[APP9_0:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) // CHECK-NEXT: %[[APP9_1:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]], %[[CST]] : memref, vector<128xf32> affine.for %i8 = 0 to %M { // vectorized affine.for %i9 = 0 to %N { %a9 = affine.load %A[%i9, %i8 + %i9] : memref @@ -87,31 +86,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { %f2 = constant 2.0 : f32 affine.for %i0 = 0 to %M { affine.for %i1 = 0 to %N { - // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<128xf32> - // CHECK: vector.transfer_write [[C1]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[C1:.*]] = constant dense<1.000000e+00> : vector<128xf32> + // CHECK: vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref // non-scoped %f1 affine.store %f1, %A[%i0, %i1] : memref } } affine.for %i2 = 0 to %M { affine.for %i3 = 0 to %N { - // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<128xf32> - // CHECK: vector.transfer_write [[C3]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[C3:.*]] = constant dense<2.000000e+00> : vector<128xf32> + // CHECK: vector.transfer_write %[[C3]], {{.*}} : vector<128xf32>, memref // non-scoped %f2 affine.store %f2, %B[%i2, %i3] : memref } } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> - // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32> - // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<128xf32> - // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<128xf32> - // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<128xf32> - // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<128xf32> - // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<128xf32> - // CHECK: vector.transfer_write [[S8]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[A5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> + // CHECK: %[[B5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> + // CHECK: %[[S5:.*]] = addf %[[A5]], %[[B5]] : vector<128xf32> + // CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32> + // CHECK: %[[S6:.*]] = addf %[[S5]], %[[SPLAT1]] : vector<128xf32> + // CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32> + // CHECK: %[[S7:.*]] = addf %[[S5]], %[[SPLAT2]] : vector<128xf32> + // CHECK: %[[S8:.*]] = addf %[[S7]], %[[S6]] : vector<128xf32> + // CHECK: vector.transfer_write %[[S8]], {{.*}} : vector<128xf32>, memref %a5 = affine.load %A[%i4, %i5] : memref %b5 = affine.load %B[%i4, %i5] : memref %s5 = addf %a5, %b5 : f32 @@ -168,7 +167,6 @@ func @vec_rejected_2(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_3 func @vec_rejected_3(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: [[C0:%[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -180,7 +178,8 @@ func @vec_rejected_3(%A : memref, %B : memref) { // // CHECK:for [[IV4:%[arg0-9]+]] = 0 to [[ARG_M]] step 128 { // CHECK-NEXT: for [[IV5:%[arg0-9]*]] = 0 to [[ARG_N]] { -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> affine.for %i4 = 0 to %M { // vectorized affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1 %a5 = affine.load %A[%i5, %i4] : memref @@ -277,7 +276,6 @@ func @vec_rejected_7(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_8 func @vec_rejected_8(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -291,6 +289,7 @@ func @vec_rejected_8(%A : memref, %B : memref) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) +// CHECK: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %{{.*}} in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector @@ -302,7 +301,6 @@ func @vec_rejected_8(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_9 func @vec_rejected_9(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -316,6 +314,7 @@ func @vec_rejected_9(%A : memref, %B : memref) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%{{.*}}) +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir index 884907024bb115..3352644da63d8b 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir @@ -54,7 +54,7 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { affine.for %i0 = 0 to %M { affine.for %i1 = 0 to %N { // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> - // CHECK: vector.transfer_write [[C1]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[C1]], {{.*}} : vector<32x256xf32>, memref // non-scoped %f1 affine.store %f1, %A[%i0, %i1] : memref } @@ -62,22 +62,22 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { affine.for %i2 = 0 to %M { affine.for %i3 = 0 to %N { // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> - // CHECK: vector.transfer_write [[C3]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[C3]], {{.*}} : vector<32x256xf32>, memref // non-scoped %f2 affine.store %f2, %B[%i2, %i3] : memref } } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<32x256xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<32x256xf32> + // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref, vector<32x256xf32> + // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref, vector<32x256xf32> // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32> // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32> // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32> // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32> - // CHECK: vector.transfer_write [[S8]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref // %a5 = affine.load %A[%i4, %i5] : memref %b5 = affine.load %B[%i4, %i5] : memref @@ -110,7 +110,7 @@ func @vectorize_matmul(%arg0: memref, %arg1: memref, %arg2: me // VECT: {{.*}} #[[map_id1]](%[[M]]) step 4 { // VECT-NEXT: {{.*}} #[[map_id1]](%[[N]]) step 8 { // VECT: %[[VC0:.*]] = constant dense<0.000000e+00> : vector<4x8xf32> - // VECT-NEXT: vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_id2]]} : vector<4x8xf32>, memref + // VECT-NEXT: vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x8xf32>, memref affine.for %i0 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { affine.for %i1 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { %cst = constant 0.000000e+00 : f32 @@ -120,12 +120,12 @@ func @vectorize_matmul(%arg0: memref, %arg1: memref, %arg2: me // VECT: affine.for %[[I2:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[M]]) step 4 { // VECT-NEXT: affine.for %[[I3:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[N]]) step 8 { // VECT-NEXT: affine.for %[[I4:.*]] = #map5(%[[C0]]) to #[[map_id1]](%[[K]]) { - // VECT-NEXT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> - // VECT-NEXT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> + // VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> + // VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> // VECT-NEXT: %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32> - // VECT-NEXT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<4x8xf32> + // VECT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} : memref, vector<4x8xf32> // VECT-NEXT: %[[E:.*]] = addf %[[D]], %[[C]] : vector<4x8xf32> - // VECT-NEXT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] {permutation_map = #[[map_id2]]} : vector<4x8xf32>, memref + // VECT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] : vector<4x8xf32>, memref affine.for %i2 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { affine.for %i3 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { affine.for %i4 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%K) { diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir index 2980ee30d90868..5b6517ea390e5d 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir @@ -12,7 +12,7 @@ func @vec3d(%A : memref) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 64 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d0d1d2]]} : memref, vector<32x64x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} : memref, vector<32x64x256xf32> affine.for %t0 = 0 to %0 { affine.for %t1 = 0 to %0 { affine.for %i0 = 0 to %0 { diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index ab50c566f9b3a1..1b0b0e38c4d5a7 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -238,17 +238,28 @@ func @outerproduct_operand_3_result_type_generic(%arg0: vector<4xf32>, %arg1: ve func @test_vector.transfer_read(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 - // expected-error@+1 {{two types required}} + // expected-error@+1 {{requires two types}} %0 = vector.transfer_read %arg0[%c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref } // ----- -func @test_vector.transfer_read(%arg0: memref) { +func @test_vector.transfer_read(%arg0: vector<4x3xf32>) { %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires 2 indices}} - %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref, vector<128xf32> + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires memref type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : vector<4x3xf32>, vector<1x1x2x3xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<4x3xf32>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires vector type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : memref<4x3xf32>, f32 } // ----- @@ -256,8 +267,8 @@ func @test_vector.transfer_read(%arg0: memref) { func @test_vector.transfer_read(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 - // expected-error@+1 {{requires attribute 'permutation_map'}} - %0 = vector.transfer_read %arg0[%c3, %c3], %cst {perm = affine_map<(d0)->(d0)>} : memref, vector<128xf32> + // expected-error@+1 {{requires 2 indices}} + %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref, vector<128xf32> } // ----- @@ -339,9 +350,29 @@ func @test_vector.transfer_read(%arg0: memref>) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{expected 5 operand types but had 4}} - %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) {permutation_map = affine_map<()->(0)>} : (vector<128xf32>, memref, index, index) -> () + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires two types}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : memref +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires vector type}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : memref>, vector<4x3xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: vector<4x3xf32>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires memref type}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : vector<4x3xf32>, f32 } // ----- @@ -349,8 +380,8 @@ func @test_vector.transfer_write(%arg0: memref) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires 2 indices}} - vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = affine_map<()->(0)>} : vector<128xf32>, memref + // expected-error@+1 {{expected 5 operand types but had 4}} + %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) {permutation_map = affine_map<()->(0)>} : (vector<128xf32>, memref, index, index) -> () } // ----- @@ -358,8 +389,8 @@ func @test_vector.transfer_write(%arg0: memref) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires attribute 'permutation_map'}} - vector.transfer_write %cst, %arg0[%c3, %c3] {perm = affine_map<(d0)->(d0)>} : vector<128xf32>, memref + // expected-error@+1 {{requires 2 indices}} + vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = affine_map<()->(0)>} : vector<128xf32>, memref } // ----- diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 73690d6ebcc86a..aacfdf75d028fe 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -20,14 +20,14 @@ func @vector_transfer_ops(%arg0: memref, %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d0)>} : memref, vector<128xf32> // CHECK: vector.transfer_read %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : memref, vector<128xf32> - // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<1x1x4x3xf32> %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x4x3xf32> // CHECK: vector.transfer_write vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, memref // CHECK: vector.transfer_write vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, memref - // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] {permutation_map = #[[MAP0]]} : vector<1x1x4x3xf32>, memref> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref> return diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir index 2e4e9033fb81e6..8de153adf73108 100644 --- a/mlir/test/Dialect/Vector/vector-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-transforms.mlir @@ -231,26 +231,26 @@ func @contraction4x4_ikj(%arg0 : vector<4x2xf32>, %arg1 : vector<2x4xf32>, // Check LHS vector.transfer read is split for each user. -// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x2xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x2xf32>, vector<2x2xf32> +// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<2x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<2x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> -// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] : vector<2x2xf32>, memref<4x4xf32> // CHECK-NEXT: return func @contraction4x4_ikj_xfer_read(%arg0 : memref<4x2xf32>, @@ -425,10 +425,10 @@ func @cancelling_shape_cast_ops(%arg0 : vector<2x4xf32>) -> vector<2x4xf32> { // CHECK-LABEL: func @vector_transfers_vector_element_type // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[C1:.*]] = constant 1 : index -// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> -// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C1]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> -// CHECK-NEXT: vector.transfer_write %[[VTR0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {permutation_map = #[[MAP1]]} : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> -// CHECK-NEXT: vector.transfer_write %[[VTR1]], %{{.*}}[%[[C0]], %[[C1]], %[[C0]]] {permutation_map = #[[MAP1]]} : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> +// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> +// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C1]], %[[C0]]], %{{.*}} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> +// CHECK-NEXT: vector.transfer_write %[[VTR0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> +// CHECK-NEXT: vector.transfer_write %[[VTR1]], %{{.*}}[%[[C0]], %[[C1]], %[[C0]]] : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> func @vector_transfers_vector_element_type() { %c0 = constant 0 : index From 1870e787af961d1b409e18a18ddf297f02333a78 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 18 May 2020 11:51:56 -0400 Subject: [PATCH 05/14] [mlir][Vector] Add an optional "masked" boolean array attribute to vector transfer operations Summary: Vector transfer ops semantic is extended to allow specifying a per-dimension `masked` attribute. When the attribute is false on a particular dimension, lowering to LLVM emits unmasked load and store operations. Differential Revision: https://reviews.llvm.org/D80098 --- mlir/include/mlir/Dialect/Vector/VectorOps.td | 46 ++++++-- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 69 ++++++++---- .../Conversion/VectorToSCF/VectorToSCF.cpp | 52 ++++++--- .../Affine/Transforms/SuperVectorize.cpp | 3 +- mlir/lib/Dialect/Vector/VectorOps.cpp | 101 ++++++++++++------ mlir/lib/Dialect/Vector/VectorTransforms.cpp | 10 +- .../VectorToLLVM/vector-to-llvm.mlir | 18 ++++ .../VectorToLoops/vector-to-loops.mlir | 31 ++++-- mlir/test/Dialect/Vector/invalid.mlir | 10 ++ mlir/test/Dialect/Vector/ops.mlir | 4 + 10 files changed, 255 insertions(+), 89 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index b8a47a27e41f46..29e72857b291e4 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -865,7 +865,12 @@ def Vector_ExtractStridedSliceOp : def Vector_TransferOpUtils { code extraTransferDeclaration = [{ + static StringRef getMaskedAttrName() { return "masked"; } static StringRef getPermutationMapAttrName() { return "permutation_map"; } + bool isMaskedDim(unsigned dim) { + return !masked() || + masked()->cast()[dim].cast().getValue(); + } MemRefType getMemRefType() { return memref().getType().cast(); } @@ -878,14 +883,15 @@ def Vector_TransferOpUtils { def Vector_TransferReadOp : Vector_Op<"transfer_read">, Arguments<(ins AnyMemRef:$memref, Variadic:$indices, - AffineMapAttr:$permutation_map, AnyType:$padding)>, + AffineMapAttr:$permutation_map, AnyType:$padding, + OptionalAttr:$masked)>, Results<(outs AnyVector:$vector)> { let summary = "Reads a supervector from memory into an SSA vector value."; let description = [{ - The `vector.transfer_read` op performs a blocking read from a slice within - a [MemRef](../LangRef.md#memref-type) supplied as its first operand + The `vector.transfer_read` op performs a read from a slice within a + [MemRef](../LangRef.md#memref-type) supplied as its first operand into a [vector](../LangRef.md#vector-type) of the same base elemental type. A memref operand with vector element type, must have its vector element @@ -893,8 +899,9 @@ def Vector_TransferReadOp : memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). The slice is further defined by a full-rank index within the MemRef, - supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map - [attribute](../LangRef.md#attributes) is an + supplied as the operands `2 .. 1 + rank(memref)`. + + The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the slice to match the vector shape. The permutation map may be implicit and ommitted from parsing and printing if it is the canonical minor identity map @@ -906,6 +913,12 @@ def Vector_TransferReadOp : An `ssa-value` of the same elemental type as the MemRef is provided as the last operand to specify padding in the case of out-of-bounds accesses. + An optional boolean array attribute is provided to specify which dimensions + of the transfer need masking. When a dimension is specified as not requiring + masking, the `vector.transfer_read` may be lowered to simple loads. The + absence of this `masked` attribute signifies that all dimensions of the + transfer need to be masked. + This operation is called 'read' by opposition to 'load' because the super-vector granularity is generally not representable with a single hardware register. A `vector.transfer_read` is thus a mid-level abstraction @@ -1015,11 +1028,13 @@ def Vector_TransferReadOp : let builders = [ // Builder that sets padding to zero. OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " - "Value memref, ValueRange indices, AffineMap permutationMap">, + "Value memref, ValueRange indices, AffineMap permutationMap, " + "ArrayRef maybeMasked = {}">, // Builder that sets permutation map (resp. padding) to // 'getMinorIdentityMap' (resp. zero). OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " - "Value memref, ValueRange indices"> + "Value memref, ValueRange indices, " + "ArrayRef maybeMasked = {}"> ]; let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # @@ -1039,12 +1054,13 @@ def Vector_TransferWriteOp : Vector_Op<"transfer_write">, Arguments<(ins AnyVector:$vector, AnyMemRef:$memref, Variadic:$indices, - AffineMapAttr:$permutation_map)> { + AffineMapAttr:$permutation_map, + OptionalAttr:$masked)> { let summary = "The vector.transfer_write op writes a supervector to memory."; let description = [{ - The `vector.transfer_write` performs a blocking write from a + The `vector.transfer_write` op performs a write from a [vector](../LangRef.md#vector-type), supplied as its first operand, into a slice within a [MemRef](../LangRef.md#memref-type) of the same base elemental type, supplied as its second operand. @@ -1055,6 +1071,7 @@ def Vector_TransferWriteOp : The slice is further defined by a full-rank index within the MemRef, supplied as the operands `3 .. 2 + rank(memref)`. + The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the slice to match the vector shape. The permutation map may be implicit and @@ -1063,6 +1080,12 @@ def Vector_TransferWriteOp : The size of the slice is specified by the size of the vector. + An optional boolean array attribute is provided to specify which dimensions + of the transfer need masking. When a dimension is specified as not requiring + masking, the `vector.transfer_write` may be lowered to simple stores. The + absence of this `mask` attribute signifies that all dimensions of the + transfer need to be masked. + This operation is called 'write' by opposition to 'store' because the super-vector granularity is generally not representable with a single hardware register. A `vector.transfer_write` is thus a @@ -1097,7 +1120,10 @@ def Vector_TransferWriteOp : let builders = [ // Builder that sets permutation map to 'getMinorIdentityMap'. OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " - "Value memref, ValueRange indices"> + "Value memref, ValueRange indices, " + "ArrayRef maybeMasked = {}">, + OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " + "Value memref, ValueRange indices, AffineMap permutationMap">, ]; let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index eb25bf3abf85ed..975807ca86712f 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -746,12 +746,6 @@ class VectorTypeCastOpConversion : public ConvertToLLVMPattern { } }; -template -LogicalResult replaceTransferOp(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &typeConverter, Location loc, - Operation *op, ArrayRef operands, - Value dataPtr, Value mask); - LogicalResult getLLVMTypeAndAlignment(LLVMTypeConverter &typeConverter, Type type, LLVM::LLVMType &llvmType, unsigned &align) { @@ -765,12 +759,25 @@ LogicalResult getLLVMTypeAndAlignment(LLVMTypeConverter &typeConverter, return success(); } -template <> -LogicalResult replaceTransferOp( - ConversionPatternRewriter &rewriter, LLVMTypeConverter &typeConverter, - Location loc, Operation *op, ArrayRef operands, Value dataPtr, - Value mask) { - auto xferOp = cast(op); +LogicalResult +replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, Location loc, + TransferReadOp xferOp, + ArrayRef operands, Value dataPtr) { + LLVM::LLVMType vecTy; + unsigned align; + if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), + vecTy, align))) + return failure(); + rewriter.replaceOpWithNewOp(xferOp, dataPtr); + return success(); +} + +LogicalResult replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, + Location loc, TransferReadOp xferOp, + ArrayRef operands, + Value dataPtr, Value mask) { auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; VectorType fillType = xferOp.getVectorType(); Value fill = rewriter.create(loc, fillType, xferOp.padding()); @@ -783,19 +790,32 @@ LogicalResult replaceTransferOp( return failure(); rewriter.replaceOpWithNewOp( - op, vecTy, dataPtr, mask, ValueRange{fill}, + xferOp, vecTy, dataPtr, mask, ValueRange{fill}, rewriter.getI32IntegerAttr(align)); return success(); } -template <> -LogicalResult replaceTransferOp( - ConversionPatternRewriter &rewriter, LLVMTypeConverter &typeConverter, - Location loc, Operation *op, ArrayRef operands, Value dataPtr, - Value mask) { +LogicalResult +replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, Location loc, + TransferWriteOp xferOp, + ArrayRef operands, Value dataPtr) { auto adaptor = TransferWriteOpOperandAdaptor(operands); + LLVM::LLVMType vecTy; + unsigned align; + if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), + vecTy, align))) + return failure(); + rewriter.replaceOpWithNewOp(xferOp, adaptor.vector(), dataPtr); + return success(); +} - auto xferOp = cast(op); +LogicalResult replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, + Location loc, TransferWriteOp xferOp, + ArrayRef operands, + Value dataPtr, Value mask) { + auto adaptor = TransferWriteOpOperandAdaptor(operands); LLVM::LLVMType vecTy; unsigned align; if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), @@ -803,7 +823,8 @@ LogicalResult replaceTransferOp( return failure(); rewriter.replaceOpWithNewOp( - op, adaptor.vector(), dataPtr, mask, rewriter.getI32IntegerAttr(align)); + xferOp, adaptor.vector(), dataPtr, mask, + rewriter.getI32IntegerAttr(align)); return success(); } @@ -877,6 +898,10 @@ class VectorTransferConversion : public ConvertToLLVMPattern { vectorDataPtr = rewriter.create( loc, vecTy.getPointerTo(), dataPtr); + if (!xferOp.isMaskedDim(0)) + return replaceTransferOpWithLoadOrStore(rewriter, typeConverter, loc, + xferOp, operands, vectorDataPtr); + // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. unsigned vecWidth = vecTy.getVectorNumElements(); VectorType vectorCmpType = VectorType::get(vecWidth, i64Type); @@ -910,8 +935,8 @@ class VectorTransferConversion : public ConvertToLLVMPattern { mask); // 5. Rewrite as a masked read / write. - return replaceTransferOp(rewriter, typeConverter, loc, op, - operands, vectorDataPtr, mask); + return replaceTransferOpWithMasked(rewriter, typeConverter, loc, xferOp, + operands, vectorDataPtr, mask); } }; diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index d3da7bff7b5b7a..03b78491fa1222 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -157,25 +157,34 @@ void NDTransferOpHelper::emitInBounds( ValueRange majorIvs, ValueRange majorOffsets, MemRefBoundsCapture &memrefBounds, LambdaThen thenBlockBuilder, LambdaElse elseBlockBuilder) { - Value inBounds = std_constant_int(/*value=*/1, /*width=*/1); + Value inBounds; SmallVector majorIvsPlusOffsets; majorIvsPlusOffsets.reserve(majorIvs.size()); + unsigned idx = 0; for (auto it : llvm::zip(majorIvs, majorOffsets, memrefBounds.getUbs())) { Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it); using namespace mlir::edsc::op; majorIvsPlusOffsets.push_back(iv + off); - Value inBounds2 = majorIvsPlusOffsets.back() < ub; - inBounds = inBounds && inBounds2; + if (xferOp.isMaskedDim(leadingRank + idx)) { + Value inBounds2 = majorIvsPlusOffsets.back() < ub; + inBounds = (inBounds) ? (inBounds && inBounds2) : inBounds2; + } + ++idx; } - auto ifOp = ScopedContext::getBuilderRef().create( - ScopedContext::getLocation(), TypeRange{}, inBounds, - /*withElseRegion=*/std::is_same()); - BlockBuilder(&ifOp.thenRegion().front(), - Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); - if (std::is_same()) - BlockBuilder(&ifOp.elseRegion().front(), - Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); + if (inBounds) { + auto ifOp = ScopedContext::getBuilderRef().create( + ScopedContext::getLocation(), TypeRange{}, inBounds, + /*withElseRegion=*/std::is_same()); + BlockBuilder(&ifOp.thenRegion().front(), + Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); + if (std::is_same()) + BlockBuilder(&ifOp.elseRegion().front(), + Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); + } else { + // Just build the body of the then block right here. + thenBlockBuilder(majorIvsPlusOffsets); + } } template <> @@ -192,13 +201,18 @@ LogicalResult NDTransferOpHelper::doReplace() { indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); - // Lower to 1-D vector_transfer_read and let recursion handle it. + Value memref = xferOp.memref(); auto map = TransferReadOp::getTransferMinorIdentityMap( xferOp.getMemRefType(), minorVectorType); - auto loaded1D = - vector_transfer_read(minorVectorType, memref, indexing, - AffineMapAttr::get(map), xferOp.padding()); + ArrayAttr masked; + if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { + OpBuilder &b = ScopedContext::getBuilderRef(); + masked = b.getBoolArrayAttr({true}); + } + auto loaded1D = vector_transfer_read(minorVectorType, memref, indexing, + AffineMapAttr::get(map), + xferOp.padding(), masked); // Store the 1-D vector. std_store(loaded1D, alloc, majorIvs); }; @@ -229,7 +243,6 @@ LogicalResult NDTransferOpHelper::doReplace() { ValueRange majorOffsets, ValueRange minorOffsets, MemRefBoundsCapture &memrefBounds) { auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { - // Lower to 1-D vector_transfer_write and let recursion handle it. SmallVector indexing; indexing.reserve(leadingRank + majorRank + minorRank); indexing.append(leadingOffsets.begin(), leadingOffsets.end()); @@ -239,8 +252,13 @@ LogicalResult NDTransferOpHelper::doReplace() { Value loaded1D = std_load(alloc, majorIvs); auto map = TransferWriteOp::getTransferMinorIdentityMap( xferOp.getMemRefType(), minorVectorType); + ArrayAttr masked; + if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { + OpBuilder &b = ScopedContext::getBuilderRef(); + masked = b.getBoolArrayAttr({true}); + } vector_transfer_write(loaded1D, xferOp.memref(), indexing, - AffineMapAttr::get(map)); + AffineMapAttr::get(map), masked); }; // Don't write anything when out of bounds. auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) {}; diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index c72b835fc51ace..f5b98f9bf0653d 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -1017,8 +1017,7 @@ static Operation *vectorizeOneOperation(Operation *opInst, LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ permutationMap: "); LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create( - opInst->getLoc(), vectorValue, memRef, indices, - AffineMapAttr::get(permutationMap)); + opInst->getLoc(), vectorValue, memRef, indices, permutationMap); auto *res = transfer.getOperation(); LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ vectorized store: " << *res); // "Terminals" (i.e. AffineStoreOps) are erased on the spot. diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 94695b6473ded0..f347a564f446b8 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1202,6 +1202,23 @@ void ExtractStridedSliceOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// // TransferReadOp //===----------------------------------------------------------------------===// + +/// Build the default minor identity map suitable for a vector transfer. This +/// also handles the case memref<... x vector<...>> -> vector<...> in which the +/// rank of the identity map must take the vector element type into account. +AffineMap +mlir::vector::impl::getTransferMinorIdentityMap(MemRefType memRefType, + VectorType vectorType) { + int64_t elementVectorRank = 0; + VectorType elementVectorType = + memRefType.getElementType().dyn_cast(); + if (elementVectorType) + elementVectorRank += elementVectorType.getRank(); + return AffineMap::getMinorIdentityMap( + memRefType.getRank(), vectorType.getRank() - elementVectorRank, + memRefType.getContext()); +} + template static LogicalResult verifyPermutationMap(AffineMap permutationMap, EmitFun emitOpError) { @@ -1233,7 +1250,8 @@ static LogicalResult verifyPermutationMap(AffineMap permutationMap, static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, VectorType vectorType, - AffineMap permutationMap) { + AffineMap permutationMap, + ArrayAttr optionalMasked) { auto memrefElementType = memrefType.getElementType(); if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { // Memref has vector element type. @@ -1282,52 +1300,60 @@ static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, return op->emitOpError("requires a permutation_map with input dims of the " "same rank as the memref type"); - return success(); -} + if (optionalMasked) { + if (permutationMap.getNumResults() != + static_cast(optionalMasked.size())) + return op->emitOpError("expects the optional masked attr of same rank as " + "permutation_map results: ") + << AffineMapAttr::get(permutationMap); + } -/// Build the default minor identity map suitable for a vector transfer. This -/// also handles the case memref<... x vector<...>> -> vector<...> in which the -/// rank of the identity map must take the vector element type into account. -AffineMap -mlir::vector::impl::getTransferMinorIdentityMap(MemRefType memRefType, - VectorType vectorType) { - int64_t elementVectorRank = 0; - VectorType elementVectorType = - memRefType.getElementType().dyn_cast(); - if (elementVectorType) - elementVectorRank += elementVectorType.getRank(); - return AffineMap::getMinorIdentityMap( - memRefType.getRank(), vectorType.getRank() - elementVectorRank, - memRefType.getContext()); + return success(); } -/// Builder that sets permutation map and padding to 'getMinorIdentityMap' and -/// zero, respectively, by default. +/// Builder that sets padding to zero. void TransferReadOp::build(OpBuilder &builder, OperationState &result, VectorType vector, Value memref, ValueRange indices, - AffineMap permutationMap) { + AffineMap permutationMap, + ArrayRef maybeMasked) { Type elemType = vector.cast().getElementType(); Value padding = builder.create(result.location, elemType, builder.getZeroAttr(elemType)); - build(builder, result, vector, memref, indices, permutationMap, padding); + if (maybeMasked.empty()) + return build(builder, result, vector, memref, indices, permutationMap, + padding, ArrayAttr()); + ArrayAttr maskedArrayAttr = builder.getBoolArrayAttr(maybeMasked); + build(builder, result, vector, memref, indices, permutationMap, padding, + maskedArrayAttr); } /// Builder that sets permutation map (resp. padding) to 'getMinorIdentityMap' /// (resp. zero). void TransferReadOp::build(OpBuilder &builder, OperationState &result, VectorType vectorType, Value memref, - ValueRange indices) { - build(builder, result, vectorType, memref, indices, - getTransferMinorIdentityMap(memref.getType().cast(), - vectorType)); + ValueRange indices, ArrayRef maybeMasked) { + auto permMap = getTransferMinorIdentityMap( + memref.getType().cast(), vectorType); + build(builder, result, vectorType, memref, indices, permMap, maybeMasked); } template void printTransferAttrs(OpAsmPrinter &p, TransferOp op) { - SmallVector elidedAttrs; + SmallVector elidedAttrs; if (op.permutation_map() == TransferOp::getTransferMinorIdentityMap( op.getMemRefType(), op.getVectorType())) elidedAttrs.push_back(op.getPermutationMapAttrName()); + bool elideMasked = true; + if (auto maybeMasked = op.masked()) { + for (auto attr : *maybeMasked) { + if (!attr.template cast().getValue()) { + elideMasked = false; + break; + } + } + } + if (elideMasked) + elidedAttrs.push_back(op.getMaskedAttrName()); p.printOptionalAttrDict(op.getAttrs(), elidedAttrs); } @@ -1388,7 +1414,8 @@ static LogicalResult verify(TransferReadOp op) { return op.emitOpError("requires ") << memrefType.getRank() << " indices"; if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, - permutationMap))) + permutationMap, + op.masked() ? *op.masked() : ArrayAttr()))) return failure(); if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { @@ -1419,11 +1446,24 @@ static LogicalResult verify(TransferReadOp op) { /// Builder that sets permutation map to 'getMinorIdentityMap'. void TransferWriteOp::build(OpBuilder &builder, OperationState &result, - Value vector, Value memref, ValueRange indices) { + Value vector, Value memref, ValueRange indices, + ArrayRef maybeMasked) { auto vectorType = vector.getType().cast(); auto permMap = getTransferMinorIdentityMap( memref.getType().cast(), vectorType); - build(builder, result, vector, memref, indices, permMap); + if (maybeMasked.empty()) + return build(builder, result, vector, memref, indices, permMap, + ArrayAttr()); + ArrayAttr maskedArrayAttr = builder.getBoolArrayAttr(maybeMasked); + build(builder, result, vector, memref, indices, permMap, maskedArrayAttr); +} + +/// Builder that sets permutation map to 'getMinorIdentityMap'. +void TransferWriteOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value memref, ValueRange indices, + AffineMap permutationMap) { + build(builder, result, vector, memref, indices, + /*maybeMasked=*/ArrayRef{}); } static ParseResult parseTransferWriteOp(OpAsmParser &parser, @@ -1477,7 +1517,8 @@ static LogicalResult verify(TransferWriteOp op) { return op.emitOpError("requires ") << memrefType.getRank() << " indices"; if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, - permutationMap))) + permutationMap, + op.masked() ? *op.masked() : ArrayAttr()))) return failure(); return verifyPermutationMap(permutationMap, diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index af7e5ad86af854..cf1bdede90271e 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -564,9 +564,12 @@ struct SplitTransferReadOp : public OpRewritePattern { // Get VectorType for slice 'i'. auto sliceVectorType = resultTupleType.getType(index); // Create split TransferReadOp for 'sliceUser'. + // `masked` attribute propagates conservatively: if the coarse op didn't + // need masking, the fine op doesn't either. vectorTupleValues[index] = rewriter.create( loc, sliceVectorType, xferReadOp.memref(), sliceIndices, - xferReadOp.permutation_map(), xferReadOp.padding()); + xferReadOp.permutation_map(), xferReadOp.padding(), + xferReadOp.masked() ? *xferReadOp.masked() : ArrayAttr()); }; generateTransferOpSlices(memrefElementType, sourceVectorType, resultTupleType, sizes, strides, indices, rewriter, @@ -620,9 +623,12 @@ struct SplitTransferWriteOp : public OpRewritePattern { xferWriteOp.indices().end()); auto createSlice = [&](unsigned index, ArrayRef sliceIndices) { // Create split TransferWriteOp for source vector 'tupleOp.operand[i]'. + // `masked` attribute propagates conservatively: if the coarse op didn't + // need masking, the fine op doesn't either. rewriter.create( loc, tupleOp.getOperand(index), xferWriteOp.memref(), sliceIndices, - xferWriteOp.permutation_map()); + xferWriteOp.permutation_map(), + xferWriteOp.masked() ? *xferWriteOp.masked() : ArrayAttr()); }; generateTransferOpSlices(memrefElementType, resultVectorType, sourceTupleType, sizes, strides, indices, rewriter, diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 1c23072b61092a..26e3e9dbe2b1e5 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -918,6 +918,24 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - // CHECK: %[[vecPtr_b:.*]] = llvm.addrspacecast %[[gep_b]] : // CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> +func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17xf32> { + %f7 = constant 7.0: f32 + %f = vector.transfer_read %A[%base], %f7 {masked = [false]} : + memref, vector<17xf32> + return %f: vector<17xf32> +} +// CHECK-LABEL: func @transfer_read_1d_not_masked +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// +// 1. Bitcast to vector form. +// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : +// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : +// CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*"> +// +// 2. Rewrite as a load. +// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] : !llvm<"<17 x float>*"> + func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> return %0 : vector<8xi1> diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir index 5c1e6361adb976..c0bc5542e21d2a 100644 --- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir @@ -220,14 +220,12 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<17 // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32 %f7 = constant 7.0: f32 - // CHECK-DAG: %[[cond0:.*]] = constant 1 : i1 // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> // CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref // CHECK: affine.for %[[I:.*]] = 0 to 17 { // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] - // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index - // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: %[[cond1:.*]] = cmpi "slt", %[[add]], %[[dim]] : index // CHECK: scf.if %[[cond1]] { // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] : memref, vector<15xf32> // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> @@ -253,7 +251,6 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<17 // CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<17x15xf32>) { - // CHECK: %[[cond0:.*]] = constant 1 : i1 // CHECK: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> // CHECK: store %[[vec]], %[[vmemref]][] : memref> @@ -261,8 +258,7 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto // CHECK: affine.for %[[I:.*]] = 0 to 17 { // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index - // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 - // CHECK: scf.if %[[cond1]] { + // CHECK: scf.if %[[cmp]] { // CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref // CHECK: } @@ -271,3 +267,26 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto vector<17x15xf32>, memref return } + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_write_progressive_not_masked( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> +func @transfer_write_progressive_not_masked(%A : memref, %base: index, %vec: vector<17x15xf32>) { + // CHECK-NOT: scf.if + // CHECK-NEXT: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref> + // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 17 { + // CHECK-NEXT: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK-NEXT: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> + // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref + vector.transfer_write %vec, %A[%base, %base] {masked = [false, false]} : + vector<17x15xf32>, memref + return +} diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 1b0b0e38c4d5a7..c18cf38edfc90f 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -348,6 +348,16 @@ func @test_vector.transfer_read(%arg0: memref>) { // ----- +func @test_vector.transfer_read(%arg0: memref>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<2x3xf32> + // expected-error@+1 {{ expects the optional masked attr of same rank as permutation_map results: affine_map<(d0, d1) -> (d0, d1)>}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {masked = [false], permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x2x3xf32> +} + +// ----- + func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index aacfdf75d028fe..c194cbe2381172 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -22,6 +22,8 @@ func @vector_transfer_ops(%arg0: memref, %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : memref, vector<128xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<1x1x4x3xf32> %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {masked = [true, false]} : memref>, vector<1x1x4x3xf32> + %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {masked = [true, false]} : memref>, vector<1x1x4x3xf32> // CHECK: vector.transfer_write vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, memref @@ -29,6 +31,8 @@ func @vector_transfer_ops(%arg0: memref, vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, memref // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> + vector.transfer_write %5, %arg1[%c3, %c3] {masked = [true, true]} : vector<1x1x4x3xf32>, memref> return } From 681a161ff5419404ac1b3221e9a01ade25802998 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 17 May 2020 12:37:27 -0400 Subject: [PATCH 06/14] AMDGPU: Remove outdated comment --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 803b188b948836..2eb58841749bc3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4711,13 +4711,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, Target = BR->getOperand(1); } - // FIXME: This changes the types of the intrinsics instead of introducing new - // nodes with the correct types. - // e.g. llvm.amdgcn.loop - - // eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3 - // => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch - unsigned CFNode = isCFIntrinsic(Intr); if (CFNode == 0) { // This is a uniform branch so we don't need to legalize. From 4c70074e54348d8fb77f14583c6172e4377dc95e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 17 May 2020 19:06:10 -0400 Subject: [PATCH 07/14] AMDGPU/GlobalISel: Fix splitting wide VALU, non-vector loads --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 15 +- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 ++++++++++++------ 2 files changed, 221 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0a920c1f72b370..18ec745cb9a02d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1135,15 +1135,20 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, MachineIRBuilder B(MI); - unsigned SplitElts = - MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits(); - const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType()); + unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize; + const LLT LoadSplitTy = LoadTy.divide(NumSplitParts); ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank); GISelObserverWrapper Observer(&O); B.setChangeObserver(Observer); LegalizerHelper Helper(B.getMF(), Observer, B); - if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) - return false; + + if (LoadTy.isVector()) { + if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) + return false; + } else { + if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) + return false; + } MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); return true; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 998094d622dac0..53302f9554e392 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s @@ -8,6 +9,7 @@ %tmp2 = load <8 x i32>, <8 x i32> addrspace(1)* %global.not.uniform.v8i32 ret void } + define amdgpu_kernel void @load_global_v4i64_non_uniform(<4 x i64> addrspace(1)* %in) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 %global.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tmp0 @@ -36,6 +38,21 @@ %tmp2 = load <8 x i32>, <8 x i32> addrspace(4)* %constant.not.uniform.v8i32 ret void } + + define amdgpu_kernel void @load_constant_i256_non_uniform(i256 addrspace(4)* %in) { + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + %constant.not.uniform = getelementptr i256, i256 addrspace(4)* %in, i32 %tmp0 + %tmp2 = load i256, i256 addrspace(4)* %constant.not.uniform + ret void + } + + define amdgpu_kernel void @load_constant_v16i16_non_uniform(<16 x i16> addrspace(4)* %in) { + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + %constant.not.uniform = getelementptr <16 x i16>, <16 x i16> addrspace(4)* %in, i32 %tmp0 + %tmp2 = load <16 x i16>, <16 x i16> addrspace(4)* %constant.not.uniform + ret void + } + define amdgpu_kernel void @load_constant_v4i64_non_uniform(<4 x i64> addrspace(4)* %in) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 %constant.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(4)* %in, i32 %tmp0 @@ -56,6 +73,7 @@ } define amdgpu_kernel void @load_constant_v8i32_uniform() {ret void} + define amdgpu_kernel void @load_constant_v16i16_uniform() {ret void} define amdgpu_kernel void @load_constant_v4i64_uniform() {ret void} define amdgpu_kernel void @load_constant_v16i32_uniform() {ret void} define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void} @@ -84,12 +102,13 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) - ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32) ... @@ -101,13 +120,15 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) - ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) + ; CHECK-LABEL: name: load_global_v4i64_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64) ... @@ -120,18 +141,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, align 64, addrspace 1) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 64, addrspace 1) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) - ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, align 64, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 64, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) ... @@ -167,7 +189,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform - ; CHECK: (<8 x s32>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... @@ -180,7 +203,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform - ; CHECK: (<4 x s64>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... @@ -193,7 +217,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform - ; CHECK: (<16 x s32>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... @@ -206,7 +231,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform - ; CHECK: (<8 x s64>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... @@ -219,16 +245,56 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) - ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32) ... +--- +name: load_constant_i256_non_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant_i256_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, align 32, addrspace 4) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s256) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) +... + +--- +name: load_constant_v16i16_non_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: load_constant_v16i16_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<16 x s16>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) +... + --- name: load_constant_v4i64_non_uniform legalized: true @@ -237,12 +303,13 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) - ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64) ... @@ -255,18 +322,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, align 64, addrspace 4) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 64, addrspace 4) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) - ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, align 64, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 64, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32) ... @@ -279,18 +347,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, align 64, addrspace 4) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 64, addrspace 4) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) - ; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, align 64, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 64, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64) ... @@ -303,11 +372,26 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_uniform - ; CHECK: (<8 x s32>) = G_LOAD %0(p4) :: (load 32, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) ... +--- +name: load_constant_v16i16_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant_v16i16_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<16 x s16>) = G_LOAD %0 :: (load 32, addrspace 4) +... + --- name: load_constant_v4i64_uniform legalized: true @@ -316,7 +400,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_uniform - ; CHECK: (<4 x s64>) = G_LOAD %0(p4) :: (load 32, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, addrspace 4) ... @@ -329,7 +414,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_uniform - ; CHECK: (<16 x s32>) = G_LOAD %0(p4) :: (load 64, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4) ... @@ -342,7 +428,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_uniform - ; CHECK: (<8 x s64>) = G_LOAD %0(p4) :: (load 64, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4) ... @@ -353,11 +440,11 @@ legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: load_local_uniform - ; CHECK: %0:sgpr(p3) = COPY $sgpr0 - ; CHECK: %2:vgpr(p3) = COPY %0(p3) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 3) + ; CHECK-LABEL: name: load_local_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3) @@ -368,11 +455,11 @@ legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: load_region_uniform - ; CHECK: %0:sgpr(p3) = COPY $sgpr0 - ; CHECK: %2:vgpr(p3) = COPY %0(p3) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 5) + ; CHECK-LABEL: name: load_region_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) @@ -386,9 +473,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1) ... @@ -401,10 +488,10 @@ body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}} - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1) + ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1) ... @@ -416,11 +503,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4) + ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2) ... @@ -432,11 +519,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1) + ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2) ... @@ -449,8 +536,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align4 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %1:sgpr(s32) = G_LOAD %0(p4) :: (load 4, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4) ... @@ -462,11 +549,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: load_constant_i32_uniform_align2 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 2, addrspace 4) + ; CHECK-LABEL: name: load_constant_i32_uniform_align2 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2) ... @@ -480,9 +567,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align1 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 1, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1) ... @@ -513,10 +600,13 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash - ; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1 - ; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4) - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: G_CONCAT_VECTORS + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 + 16, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) ... @@ -527,14 +617,26 @@ legalized: true tracksRegLiveness: true body: | + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 + 16, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) + ; CHECK: G_BR %bb.1 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi - ; CHECK: G_PHI - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: G_CONCAT_VECTORS %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(p4) = COPY $sgpr2_sgpr3 From d1866f89472787dfac92a689700c4c4335a6add3 Mon Sep 17 00:00:00 2001 From: Pierre Oechsel Date: Mon, 18 May 2020 18:25:23 +0200 Subject: [PATCH 08/14] [MLIR] [Linalg] Add option to use the partial view after promotion. For now the promoted buffer is indexed using the `full view`. The full view might be slightly bigger than the partial view (which is accounting for boundaries). Unfortunately this does not compose easily with other transformations when multiple buffers with shapes related to each other are involved. Take `linalg.matmul A B C` (with A of size MxK, B of size KxN and C of size MxN) and suppose we are: - Tiling over M by 100 - Promoting A only This is producing a `linalg.matmul promoted_A B subview_C` where `promoted_A` is a promoted buffer of `A` of size (100xK) and `subview_C` is a subview of size mxK where m could be smaller than 100 due to boundaries thus leading to a possible incorrect behavior. We propose to: - Add a new parameter to the tiling promotion allowing to enable the use of the full tile buffer. - By default all promoted buffers will be indexed by the partial view. Note that this could be considered as a breaking change in comparison to the way the tiling promotion was working. Differential Revision: https://reviews.llvm.org/D79927 --- .../Dialect/Linalg/Transforms/Transforms.h | 23 ++++++++++++++ .../mlir/Dialect/Vector/EDSC/Intrinsics.h | 3 ++ .../Dialect/Linalg/Transforms/Promotion.cpp | 30 ++++++++++++++----- mlir/test/Dialect/Linalg/promote.mlir | 15 ++-------- .../lib/Transforms/TestLinalgTransforms.cpp | 16 +++++++--- 5 files changed, 64 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 70c3f00f52161b..e93977185fb356 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -11,6 +11,7 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/SmallBitVector.h" namespace mlir { namespace linalg { @@ -97,6 +98,28 @@ struct LinalgPromotionOptions { operandsToPromote->insert(operands.begin(), operands.end()); return *this; } + /// If ith element of `useFullTiles` is true the full view should be used for + /// the promoted buffer of the ith operand in `operandsToPromote`. Otherwise + /// the partial view will be used. + /// The decision is defaulted to `useFullTileBuffersDefault` when + /// `useFullTileBuffers` is None and for operands missing from + /// `useFullTileBuffers`. + Optional useFullTileBuffers = None; + LinalgPromotionOptions &setUseFullTileBuffers(ArrayRef useFullTiles) { + unsigned size = useFullTiles.size(); + llvm::SmallBitVector tmp(size, false); + for (unsigned i = 0; i < size; ++i) + tmp[i] = useFullTiles[i]; + useFullTileBuffers = tmp; + return *this; + } + /// If true all operands unspecified by `useFullTileBuffers` will use the full + /// view, otherwise the partial view. + bool useFullTileBuffersDefault = false; + LinalgPromotionOptions &useFullTileBuffersByDefault() { + useFullTileBuffersDefault = true; + return *this; + } /// Allow the use of dynamicaly-sized buffers. bool dynamicBuffers = false; LinalgPromotionOptions &setDynamicBuffers(unsigned dynamic) { diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h index 7fa9099a6a90ff..6b5c4be7b2f409 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -16,6 +16,9 @@ namespace intrinsics { using vector_broadcast = ValueBuilder; using vector_contract = ValueBuilder; +using vector_insert = ValueBuilder; +using vector_fma = ValueBuilder; +using vector_extract = ValueBuilder; using vector_matmul = ValueBuilder; using vector_print = OperationBuilder; using vector_transfer_read = ValueBuilder; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 5cbaa2f426dbcf..44de2a1021c274 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -56,6 +56,8 @@ struct LinalgOpInstancePromotionOptions { const LinalgPromotionOptions &options); /// SubViews to promote. SetVector subViews; + /// True if the full view should be used for the promoted buffer. + DenseMap useFullTileBuffers; /// Allow the use of dynamicaly-sized buffers. bool dynamicBuffers; /// Alignment of promoted buffer. @@ -65,20 +67,28 @@ struct LinalgOpInstancePromotionOptions { LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( LinalgOp linalgOp, const LinalgPromotionOptions &options) - : subViews(), dynamicBuffers(options.dynamicBuffers), + : subViews(), useFullTileBuffers(), dynamicBuffers(options.dynamicBuffers), alignment(options.alignment) { + unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); + auto vUseFullTileBuffers = + options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); + vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); + if (options.operandsToPromote.hasValue()) { - for (unsigned idx : options.operandsToPromote.getValue()) { - auto *op = linalgOp.getBuffer(idx).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) + for (auto it : llvm::enumerate(options.operandsToPromote.getValue())) { + auto *op = linalgOp.getBuffer(it.value()).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) { subViews.insert(sv); + useFullTileBuffers[sv] = vUseFullTileBuffers[it.index()]; + } } } else { - unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); for (unsigned idx = 0; idx < nBuffers; ++idx) { auto *op = linalgOp.getBuffer(idx).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) + if (auto sv = dyn_cast_or_null(op)) { subViews.insert(sv); + useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; + } } } } @@ -201,6 +211,9 @@ promoteSubViews(OpBuilder &b, Location loc, auto info = promotionInfoMap.find(v); if (info == promotionInfoMap.end()) continue; + // Only fill the buffer if the full local view is used + if (!options.useFullTileBuffers[v]) + continue; Value fillVal; if (auto t = subView.getType().getElementType().dyn_cast()) fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); @@ -244,7 +257,10 @@ static void promoteSubViews(OpBuilder &b, LinalgOp op, unsigned promotedIdx = 0; for (auto view : op.getInputsAndOutputBuffers()) { if (options.subViews.count(view) != 0) { - opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); + if (options.useFullTileBuffers[view]) + opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); + else + opViews.push_back(promotedBufferAndViews[promotedIdx].partialLocalView); writebackViews.emplace_back(std::make_pair( view, promotedBufferAndViews[promotedIdx].partialLocalView)); promotedIdx++; diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir index 64534733846a13..27364b05f3bd90 100644 --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -56,14 +56,11 @@ func @matmul_f32(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC:.*]] = subview %[[fullC]]{{.*}} : memref to memref -// CHECK: linalg.fill(%[[fullA]], {{.*}}) : memref, f32 -// CHECK: linalg.fill(%[[fullB]], {{.*}}) : memref, f32 -// CHECK: linalg.fill(%[[fullC]], {{.*}}) : memref, f32 // CHECK: linalg.copy(%[[vA]], %[[partialA]]) : memref, memref // CHECK: linalg.copy(%[[vB]], %[[partialB]]) : memref, memref // CHECK: linalg.copy(%[[vC]], %[[partialC]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA]], %[[fullB]], %[[fullC]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA]], %[[partialB]], %[[partialC]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC]], %[[vC]]) : memref, memref // @@ -121,14 +118,11 @@ func @matmul_f64(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC_f64:.*]] = subview %[[fullC_f64]][%{{.*}}, %{{.*}}] : memref to memref -// CHECK: linalg.fill(%[[fullA_f64]], {{.*}}) : memref, f64 -// CHECK: linalg.fill(%[[fullB_f64]], {{.*}}) : memref, f64 -// CHECK: linalg.fill(%[[fullC_f64]], {{.*}}) : memref, f64 // CHECK: linalg.copy(%[[vA_f64]], %[[partialA_f64]]) : memref, memref // CHECK: linalg.copy(%[[vB_f64]], %[[partialB_f64]]) : memref, memref // CHECK: linalg.copy(%[[vC_f64]], %[[partialC_f64]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA_f64]], %[[fullB_f64]], %[[fullC_f64]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA_f64]], %[[partialB_f64]], %[[partialC_f64]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC_f64]], %[[vC_f64]]) : memref, memref // @@ -186,14 +180,11 @@ func @matmul_i32(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC_i32:.*]] = subview %[[fullC_i32]][%{{.*}}, %{{.*}}] : memref to memref -// CHECK: linalg.fill(%[[fullA_i32]], {{.*}}) : memref, i32 -// CHECK: linalg.fill(%[[fullB_i32]], {{.*}}) : memref, i32 -// CHECK: linalg.fill(%[[fullC_i32]], {{.*}}) : memref, i32 // CHECK: linalg.copy(%[[vA_i32]], %[[partialA_i32]]) : memref, memref // CHECK: linalg.copy(%[[vB_i32]], %[[partialB_i32]]) : memref, memref // CHECK: linalg.copy(%[[vC_i32]], %[[partialC_i32]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA_i32]], %[[fullB_i32]], %[[fullC_i32]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA_i32]], %[[partialB_i32]], %[[partialC_i32]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC_i32]], %[[vC_i32]]) : memref, memref // diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 0390ac945d2f7d..87191d3e87d2b4 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -132,13 +132,20 @@ static void applyPatterns(FuncOp funcOp) { // Linalg subview operands promotion. //===--------------------------------------------------------------------===// patterns.insert>( - ctx, LinalgPromotionOptions(), + ctx, LinalgPromotionOptions().useFullTileBuffersByDefault(), LinalgMarker({"_promote_views_"}, "_views_promoted_")); patterns.insert>( - ctx, LinalgPromotionOptions().setOperandsToPromote({0}), + ctx, + LinalgPromotionOptions() + .setOperandsToPromote({0}) + .useFullTileBuffersByDefault(), LinalgMarker({"_promote_first_view_"}, "_first_view_promoted_")); patterns.insert>( - ctx, LinalgPromotionOptions().setOperandsToPromote({0}).setAlignment(32), + ctx, + LinalgPromotionOptions() + .setOperandsToPromote({0}) + .setUseFullTileBuffers({true}) + .setAlignment(32), LinalgMarker({"_promote_views_aligned_"}, "_views_aligned_promoted_")); applyPatternsAndFoldGreedily(funcOp, patterns); @@ -171,7 +178,8 @@ void fillL1TilingAndMatmulToVectorPatterns( LinalgMarker({startMarker}, "L1"))); patternsVector.emplace_back(LinalgPromotionPattern( - context, LinalgPromotionOptions(), LinalgMarker({"L1"}, "VEC"))); + context, LinalgPromotionOptions().useFullTileBuffersByDefault(), + LinalgMarker({"L1"}, "VEC"))); patternsVector.emplace_back( LinalgVectorizationPattern(context, LinalgMarker({"VEC"}))); From a4cb9bec1ca32cc1cfc25b32c05494c200793264 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Mon, 18 May 2020 18:30:39 +0200 Subject: [PATCH 09/14] [mlir] Support optional attributes in assembly formats Summary: This revision adds support for assembly formats with optional attributes. It elides optional attributes that are part of the syntax from the attribute dictionary. Reviewers: ftynse, Kayjukh Reviewed By: ftynse, Kayjukh Subscribers: mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, stephenneuendorffer, Joonsoo, grosul1, frgossen, jurahul, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80113 --- mlir/test/lib/Dialect/Test/TestOps.td | 8 +++++++- mlir/test/mlir-tblgen/op-format.mlir | 7 +++++++ mlir/tools/mlir-tblgen/OpFormatGen.cpp | 10 +++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index f3bbf0e50dca70..eb2ff83fdddfb8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1214,9 +1214,15 @@ def FormatAttrOp : TEST_Op<"format_attr_op"> { let assemblyFormat = "$attr attr-dict"; } +// Test that we elide optional attributes that are within the syntax. +def FormatOptAttrOp : TEST_Op<"format_opt_attr_op"> { + let arguments = (ins OptionalAttr:$opt_attr); + let assemblyFormat = "(`(`$opt_attr^`)`)? attr-dict"; +} + // Test that we elide attributes that are within the syntax. def FormatAttrDictWithKeywordOp : TEST_Op<"format_attr_dict_w_keyword"> { - let arguments = (ins I64Attr:$attr); + let arguments = (ins I64Attr:$attr, OptionalAttr:$opt_attr); let assemblyFormat = "attr-dict-with-keyword"; } diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir index 8d55768aced79b..066e548e17083e 100644 --- a/mlir/test/mlir-tblgen/op-format.mlir +++ b/mlir/test/mlir-tblgen/op-format.mlir @@ -12,9 +12,16 @@ test.format_literal_op keyword_$. -> :, = <> () [] {foo.some_attr} // CHECK-NOT: {attr test.format_attr_op 10 +// CHECK: test.format_opt_attr_op(10) +// CHECK-NOT: {opt_attr +test.format_opt_attr_op(10) + // CHECK: test.format_attr_dict_w_keyword attributes {attr = 10 : i64} test.format_attr_dict_w_keyword attributes {attr = 10 : i64} +// CHECK: test.format_attr_dict_w_keyword attributes {attr = 10 : i64, opt_attr = 10 : i64} +test.format_attr_dict_w_keyword attributes {attr = 10 : i64, opt_attr = 10 : i64} + // CHECK: test.format_buildable_type_op %[[I64]] %ignored = test.format_buildable_type_op %i64 diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 127b6b976cd53d..9fa87e3a842771 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -886,9 +886,17 @@ static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, OpMethodBody &body, bool withKeyword) { // Collect all of the attributes used in the format, these will be elided. SmallVector usedAttributes; - for (auto &it : fmt.elements) + for (auto &it : fmt.elements) { if (auto *attr = dyn_cast(it.get())) usedAttributes.push_back(attr->getVar()); + // Collect the optional attributes. + if (auto *opt = dyn_cast(it.get())) { + for (auto &elem : opt->getElements()) { + if (auto *attr = dyn_cast(&elem)) + usedAttributes.push_back(attr->getVar()); + } + } + } body << " p.printOptionalAttrDict" << (withKeyword ? "WithKeyword" : "") << "(getAttrs(), /*elidedAttrs=*/{"; From 23dc948d362018a8257d8288fe9beb0d27fc9b35 Mon Sep 17 00:00:00 2001 From: Alexandre Rames Date: Mon, 18 May 2020 18:44:26 +0200 Subject: [PATCH 10/14] [MLIR] Use `MLIR_INCLUDE_TESTS` to conditionally compile tests. This is equivalent to what is done for other projects (e.g. clang). Differential Revision: https://reviews.llvm.org/D80022 --- mlir/CMakeLists.txt | 11 +++++++++-- mlir/tools/mlir-opt/CMakeLists.txt | 18 ++++++++++++------ mlir/tools/mlir-opt/mlir-opt.cpp | 4 ++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 3671a97395d4ec..7c2c5978c44e62 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -34,6 +34,10 @@ add_definitions(-DMLIR_CUDA_CONVERSIONS_ENABLED=${MLIR_CUDA_CONVERSIONS_ENABLED} set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner") set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner") +option(MLIR_INCLUDE_TESTS + "Generate build targets for the MLIR unit tests." + ${LLVM_INCLUDE_TESTS}) + include_directories( "include") include_directories( ${MLIR_INCLUDE_DIR}) @@ -44,8 +48,11 @@ add_subdirectory(tools/mlir-tblgen) add_subdirectory(include/mlir) add_subdirectory(lib) -add_subdirectory(unittests) -add_subdirectory(test) +if (MLIR_INCLUDE_TESTS) + add_definitions(-DMLIR_INCLUDE_TESTS) + add_subdirectory(unittests) + add_subdirectory(test) +endif() # Tools needs to come late to ensure that MLIR_ALL_LIBS is populated. # Generally things after this point may depend on MLIR_ALL_LIBS or libMLIR.so. add_subdirectory(tools) diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 3e8ed0ebee7b51..d509b23505d12b 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -10,24 +10,30 @@ set(LLVM_LINK_COMPONENTS AsmParser ) +if(MLIR_INCLUDE_TESTS) + set(test_libs + MLIRAffineTransformsTestPasses + MLIRSPIRVTestPasses + MLIRTestDialect + MLIRTestIR + MLIRTestPass + MLIRTestTransforms + ) +endif() + set(LIBS ${dialect_libs} ${conversion_libs} + ${test_libs} MLIRLoopAnalysis - MLIRAffineTransformsTestPasses MLIRAnalysis MLIRDialect MLIREDSC MLIROptLib MLIRParser MLIRPass - MLIRSPIRVTestPasses MLIRTransforms MLIRTransformUtils - MLIRTestDialect - MLIRTestIR - MLIRTestPass - MLIRTestTransforms MLIRSupport MLIRIR ) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 218d6c03b4b80d..69b1d8d57bc56b 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -93,6 +93,7 @@ static cl::opt allowUnregisteredDialects( "allow-unregistered-dialect", cl::desc("Allow operation with no registered dialects"), cl::init(false)); +#ifdef MLIR_INCLUDE_TESTS void registerTestPasses() { registerConvertToTargetEnvPass(); registerInliner(); @@ -131,6 +132,7 @@ void registerTestPasses() { registerTestVectorToSCFPass(); registerVectorizerTestPass(); } +#endif static cl::opt showDialects("show-dialects", @@ -140,7 +142,9 @@ static cl::opt int main(int argc, char **argv) { registerAllDialects(); registerAllPasses(); +#ifdef MLIR_INCLUDE_TESTS registerTestPasses(); +#endif InitLLVM y(argc, argv); // Register any command line options. From 623b2542446a1873fb7ea3904c4fb50e2e77fe41 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Fri, 15 May 2020 13:58:40 -0700 Subject: [PATCH 11/14] [Local] Do not ignore zexts in salvageDebugInfo, PR45923 Summary: When salvaging a dead zext instruction, append a convert operation to the DIExpressions of the debug uses of the instruction, to prevent the salvaged value from being sign-extended. I confirmed that lldb prints out the correct unsigned result for "f" in the example from PR45923 with this changed applied. rdar://63246143 Reviewers: aprantl, jmorse, chrisjackson, davide Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80034 --- llvm/lib/Transforms/Utils/Local.cpp | 9 +++++---- llvm/test/Transforms/InstCombine/cast-mul-select.ll | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index ae4ef97b2fd0f6..545413c1fe035c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1698,13 +1698,14 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, }; if (auto *CI = dyn_cast(&I)) { - // No-op casts and zexts are irrelevant for debug info. - if (CI->isNoopCast(DL) || isa(&I)) + // No-op casts are irrelevant for debug info. + if (CI->isNoopCast(DL)) return SrcDIExpr; Type *Type = CI->getType(); - // Casts other than Trunc or SExt to scalar types cannot be salvaged. - if (Type->isVectorTy() || (!isa(&I) && !isa(&I))) + // Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged. + if (Type->isVectorTy() || + !(isa(&I) || isa(&I) || isa(&I))) return nullptr; Value *FromValue = CI->getOperand(0); diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll index f82d2fd285fe8f..e68f3830b5a9d5 100644 --- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll +++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll @@ -13,8 +13,8 @@ define i32 @mul(i32 %x, i32 %y) { ; we preserve the debug information in the resulting ; instruction. ; DBGINFO-LABEL: @mul( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %x -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %y +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %x, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %y, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) ; DBGINFO-NEXT: [[C:%.*]] = mul i32 {{.*}} ; DBGINFO-NEXT: [[D:%.*]] = and i32 {{.*}} ; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]] @@ -175,7 +175,7 @@ exit: ; Check that we don't drop debug info when a zext is removed. define i1 @foo(i1 zeroext %b) { ; DBGINFO-LABEL: @foo( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 %b +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 %b, {{.*}} !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) ; DBGINFO-NEXT: ret i1 %b %frombool = zext i1 %b to i8 From 2084330e41d301cf9eaa3495d8968bff70846c7b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 18 May 2020 09:55:33 -0700 Subject: [PATCH 12/14] [lldb/Reproducers] Add skipIfReproducer to more tests Mark more tests as unsupported with reproducers. --- .../step_over_breakpoint/TestStepOverBreakpoint.py | 1 + .../deleted-executable/TestDeletedExecutable.py | 1 + .../functionalities/load_unload/TestLoadUnload.py | 1 + .../postmortem/elf-core/TestLinuxCore.py | 12 ++++++++++++ .../postmortem/netbsd-core/TestNetBSDCore.py | 6 ++++++ 5 files changed, 21 insertions(+) diff --git a/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py b/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py index b20490f3cefdcc..931326b322911b 100644 --- a/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py +++ b/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py @@ -51,6 +51,7 @@ def setUp(self): self.thread = lldbutil.get_one_thread_stopped_at_breakpoint(self.process, self.breakpoint1) self.assertIsNotNone(self.thread, "Didn't stop at breakpoint 1.") + @skipIfReproducer def test_step_instruction(self): # Count instructions between breakpoint_1 and breakpoint_4 contextList = self.target.FindFunctions('main', lldb.eFunctionNameTypeAuto) diff --git a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py index ed17d9b36b6b0f..78f3feae6ff637 100644 --- a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py +++ b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py @@ -20,6 +20,7 @@ class TestDeletedExecutable(TestBase): triple=no_match('aarch64-.*-android')) # determining the architecture of the process fails @expectedFailureNetBSD + @skipIfReproducer # File synchronization is not supported during replay. def test(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py index 7188fa32a154e9..e0013ccd93fa60 100644 --- a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py @@ -95,6 +95,7 @@ def setSvr4Support(self, enabled): @not_remote_testsuite_ready @skipIfWindows # Windows doesn't have dlopen and friends, dynamic libraries work differently @expectedFailureNetBSD + @skipIfReproducer # VFS is a snapshot. def test_modules_search_paths(self): """Test target modules list after loading a different copy of the library libd.dylib, and verifies that it works with 'target modules search-paths add'.""" if self.platformIsDarwin(): diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 63bb02e5eb60f3..e0046f7108898e 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -38,29 +38,34 @@ class LinuxCoreTestCase(TestBase): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test that lldb can read the process information from an aarch64 linux core file.""" self.do_test("linux-aarch64", self._aarch64_pid, self._aarch64_regions, "a.out") @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_i386(self): """Test that lldb can read the process information from an i386 linux core file.""" self.do_test("linux-i386", self._i386_pid, self._i386_regions, "a.out") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_o32(self): """Test that lldb can read the process information from an MIPS O32 linux core file.""" self.do_test("linux-mipsel-gnuabio32", self._mips_o32_pid, self._mips_regions, "linux-mipsel-gn") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_n32(self): """Test that lldb can read the process information from an MIPS N32 linux core file """ self.do_test("linux-mips64el-gnuabin32", self._mips64_n32_pid, self._mips_regions, "linux-mips64el-") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_n64(self): """Test that lldb can read the process information from an MIPS N64 linux core file """ self.do_test("linux-mips64el-gnuabi64", self._mips64_n64_pid, @@ -68,6 +73,7 @@ def test_mips_n64(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("PowerPC") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_ppc64le(self): """Test that lldb can read the process information from an ppc64le linux core file.""" self.do_test("linux-ppc64le", self._ppc64le_pid, self._ppc64le_regions, @@ -75,6 +81,7 @@ def test_ppc64le(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_x86_64(self): """Test that lldb can read the process information from an x86_64 linux core file.""" self.do_test("linux-x86_64", self._x86_64_pid, self._x86_64_regions, @@ -82,6 +89,7 @@ def test_x86_64(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("SystemZ") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_s390x(self): """Test that lldb can read the process information from an s390x linux core file.""" self.do_test("linux-s390x", self._s390x_pid, self._s390x_regions, @@ -89,6 +97,7 @@ def test_s390x(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_same_pid_running(self): """Test that we read the information from the core correctly even if we have a running process with the same PID around""" @@ -117,6 +126,7 @@ def test_same_pid_running(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_two_cores_same_pid(self): """Test that we handle the situation if we have two core files with the same PID around""" @@ -197,6 +207,7 @@ def test_FPR_SSE(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_i386_sysroot(self): """Test that lldb can find the exe for an i386 linux core file using the sysroot.""" @@ -221,6 +232,7 @@ def test_i386_sysroot(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") @skipIfWindows + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_x86_64_sysroot(self): """Test that sysroot has more priority then local filesystem.""" diff --git a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py index f967a57e4ea719..6ecd2673534474 100644 --- a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py +++ b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py @@ -159,11 +159,13 @@ def check_stack(self, process, pid, filename): self.check_backtrace(thread, filename, backtrace) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test single-threaded aarch64 core dump.""" self.do_test("1lwp_SIGSEGV.aarch64", pid=8339, region_count=32) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test single-threaded amd64 core dump.""" self.do_test("1lwp_SIGSEGV.amd64", pid=693, region_count=21) @@ -189,11 +191,13 @@ def check_stack(self, process, pid, filename): self.assertEqual(thread.GetStopReasonDataAtIndex(0), 0) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test double-threaded aarch64 core dump where thread 2 is signalled.""" self.do_test("2lwp_t2_SIGSEGV.aarch64", pid=14142, region_count=31) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test double-threaded amd64 core dump where thread 2 is signalled.""" self.do_test("2lwp_t2_SIGSEGV.amd64", pid=622, region_count=24) @@ -219,11 +223,13 @@ def check_stack(self, process, pid, filename): self.assertEqual(thread.GetStopReasonDataAtIndex(0), signal.SIGSEGV) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test double-threaded aarch64 core dump where process is signalled.""" self.do_test("2lwp_process_SIGSEGV.aarch64", pid=1403, region_count=30) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test double-threaded amd64 core dump where process is signalled.""" self.do_test("2lwp_process_SIGSEGV.amd64", pid=665, region_count=24) From 691980ebb47127c611be6e85f27e1778d5d213d8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 18 May 2020 10:26:45 -0700 Subject: [PATCH 13/14] [llvm][NFC] Fixed non-compliant style in InlineAdvisor.h Changed OnPass{Entry|Exit} -> onPass{Entry|Exit} Also fixed a small typo in a comment. --- llvm/include/llvm/Analysis/InlineAdvisor.h | 6 +++--- llvm/lib/Transforms/IPO/Inliner.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 115bca1d32192c..118fd236bee486 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -123,12 +123,12 @@ class InlineAdvisor { /// This must be called when the Inliner pass is entered, to allow the /// InlineAdvisor update internal state, as result of function passes run /// between Inliner pass runs (for the same module). - virtual void OnPassEntry() {} + virtual void onPassEntry() {} /// This must be called when the Inliner pass is exited, as function passes /// may be run subsequently. This allows an implementation of InlineAdvisor /// to prepare for a partial update. - virtual void OnPassExit() {} + virtual void onPassExit() {} protected: InlineAdvisor() = default; @@ -163,7 +163,7 @@ class DefaultInlineAdvisor : public InlineAdvisor { std::unique_ptr getAdvice(CallBase &CB, FunctionAnalysisManager &FAM) override; - void OnPassExit() override { freeDeletedFunctions(); } + void onPassExit() override { freeDeletedFunctions(); } InlineParams Params; }; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 770ca2ea913094..862385d044815b 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -696,9 +696,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(M); InlineAdvisor &Advisor = getAdvisor(MAMProxy, M); - Advisor.OnPassEntry(); + Advisor.onPassEntry(); - auto AdvisorOnExit = make_scope_exit([&] { Advisor.OnPassExit(); }); + auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); }); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { @@ -808,7 +808,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, return FAM.getResult(F); }; - // Now process as many calls as we have within this caller in the sequnece. + // Now process as many calls as we have within this caller in the sequence. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; From 31ecef76275158c87d63772a70fbc282d025e7ab Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 18 May 2020 18:20:40 +0200 Subject: [PATCH 14/14] [SystemZ] Don't create PERMUTE nodes with an undef operand. It's better to reuse the first source value than to use an undef second operand, because that will make more resulting VPERMs have identical operands and therefore MachineCSE more successful. Review: Ulrich Weigand --- .../Target/SystemZ/SystemZISelLowering.cpp | 3 ++- llvm/test/CodeGen/SystemZ/vec-perm-14.ll | 27 +++++++++++++++++++ .../vector-constrained-fp-intrinsics.ll | 6 ++--- 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/vec-perm-14.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 0ce6f317722438..7a8b5249255f9c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); - return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], + (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll new file mode 100644 index 00000000000000..0cf3c6ef7a064c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that only one vperm of the vector compare is needed for both extracts. + +define void @fun() { +; CHECK-LABEL: fun +; CHECK: vperm +; CHECK-NOT: vperm +bb: + %tmp = load <4 x i8>, <4 x i8>* undef + %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp + %tmp2 = extractelement <4 x i1> %tmp1, i32 0 + br i1 %tmp2, label %bb1, label %bb2 + +bb1: + unreachable + +bb2: + %tmp3 = extractelement <4 x i1> %tmp1, i32 1 + br i1 %tmp3, label %bb3, label %bb4 + +bb3: + unreachable + +bb4: + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 7cea2ff8eb9c06..b7cbac89db31e9 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* % ; SZ13-LABEL: constrained_vector_fptrunc_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v1, 0(%r2), 4 +; SZ13-NEXT: ld %f0, 16(%r2) ; SZ13-NEXT: vledb %v1, %v1, 0, 0 ; SZ13-NEXT: larl %r1, .LCPI97_0 -; SZ13-NEXT: ld %f0, 16(%r2) -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: vperm %v1, %v1, %v0, %v2 ; SZ13-NEXT: ledbra %f0, 0, %f0, 0 +; SZ13-NEXT: vl %v2, 0(%r1), 3 +; SZ13-NEXT: vperm %v1, %v1, %v1, %v2 ; SZ13-NEXT: ste %f0, 8(%r3) ; SZ13-NEXT: vsteg %v1, 0(%r3), 0 ; SZ13-NEXT: br %r14