From 6023d2b4f28b6bb4401f07d0e145f8c1a2623eee Mon Sep 17 00:00:00 2001 From: Stefan Djordjevic Date: Mon, 15 Jul 2024 15:18:17 +0000 Subject: [PATCH] Adding softmax op in MLIR e2e --- include/ttmlir/Dialect/TTIR/IR/TTIROps.td | 20 +++++++++++++++++ include/ttmlir/Dialect/TTNN/IR/TTNNOps.td | 18 +++++++++++++++ include/ttmlir/Target/TTNN/program.fbs | 9 +++++++- lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp | 6 +++++ lib/Dialect/TTIR/IR/TTIROps.cpp | 20 +++++++++++++++++ lib/Dialect/TTIR/Transforms/Passes.cpp | 1 + lib/Dialect/TTNN/IR/TTNNOps.cpp | 20 +++++++++++++++++ lib/Dialect/TTNN/Transforms/Passes.cpp | 15 ++++++++++++- .../Transforms/TTNNToSerializedBinary.cpp | 16 ++++++++++++++ runtime/include/tt/runtime/detail/ttnn.h | 1 + runtime/lib/ttnn/program.cpp | 14 ++++++++++++ test/ttmlir/Dialect/TTNN/softmax/softmax.mlir | 22 +++++++++++++++++++ .../TTNN/softmax/softmax_negative_1.mlir | 10 +++++++++ .../TTNN/softmax/softmax_negative_2.mlir | 10 +++++++++ 14 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax.mlir create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index 5558d4e435..a8efff6929 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -186,6 +186,26 @@ def TTIR_ReluOp : TTIR_ElementwiseOp<"relu"> { }]; } +def TTIR_SoftmaxOp : TTIR_DPSOp<"softmax"> { + let summary = "Softmax operation."; + let description = [{ + Softmax operation. + }]; + + let arguments = (ins AnyRankedTensor:$input, + AnyRankedTensor:$output, + I32Attr:$dimension, + TT_OperandConstraintArrayAttr:$operand_constraints); + + let results = (outs AnyRankedTensor:$result); + + let extraClassDeclaration = [{ + MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); } + }]; + + let hasVerifier = 1; +} + // ANCHOR: adding_an_op_matmul_ttir def TTIR_MatmulOp : TTIR_DPSOp<"matmul"> { let summary = "Matrix multiply operation."; diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td index 97c4a61aab..840f9b64f0 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td @@ -124,6 +124,24 @@ def TTNN_ReluOp : TTNN_ElementwiseOp<"relu"> { }]; } +def TTNN_SoftmaxOp : TTNN_NamedDPSOp<"softmax"> { + let summary = "Softmax op."; + let description = [{ + Softmax operation. + }]; + + let arguments = (ins AnyRankedTensor:$input, + AnyRankedTensor:$output, + I32Attr: $dimension); + + let results = (outs AnyRankedTensor:$result); + + let extraClassDeclaration = [{ + MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); } + }]; + + let hasVerifier = 1; +} // ANCHOR: adding_an_op_matmul_ttnn def TTNN_MatmulOp : TTNN_NamedDPSOp<"matmul"> { let arguments = (ins AnyRankedTensor:$a, diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs index adc7f158a0..581fafc143 100644 --- a/include/ttmlir/Target/TTNN/program.fbs +++ b/include/ttmlir/Target/TTNN/program.fbs @@ -49,6 +49,12 @@ table ReductionOp { keep_dim: bool; } +table SoftmaxOp { + in: tt.target.TensorRef; + out: tt.target.TensorRef; + dimension: int32; +} + // ANCHOR: adding_an_op_matmul_fbs table MatmulOp { in0: tt.target.TensorRef; @@ -64,7 +70,8 @@ union OpType { FullOp, EltwiseOp, MatmulOp, - ReductionOp + ReductionOp, + SoftmaxOp } table Operation { diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp index daaf2611c8..6de2143258 100644 --- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp +++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp @@ -53,6 +53,8 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx, patterns.add>(typeConverter, ctx); patterns.add>(typeConverter, ctx); + patterns.add>(typeConverter, + ctx); patterns.add>(typeConverter, ctx); patterns.add>(typeConverter, ctx); @@ -88,6 +90,10 @@ struct ConvertTTNNToEmitCPass module.getLoc(), "ttnn/operations/reduction/generic/generic_reductions.hpp", /*isStandard=*/false); + builder.create( + module.getLoc(), + "ttnn/operations/normalization.hpp", + /*isStandard=*/false); } // TTNN -> EmitC diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp index 48e205867d..0e0d6883d4 100644 --- a/lib/Dialect/TTIR/IR/TTIROps.cpp +++ b/lib/Dialect/TTIR/IR/TTIROps.cpp @@ -26,6 +26,26 @@ ::mlir::LogicalResult mlir::tt::ttir::LayoutOp::verify() { return success(); } +::mlir::LogicalResult mlir::tt::ttir::SoftmaxOp::verify() { + ::mlir::RankedTensorType inputType = getInput().getType(); + ::mlir::RankedTensorType outputType = getOutput().getType(); + + // Shapes of input and output of a softmax operation must be the same + if (inputType.getShape() != outputType.getShape()) { + return emitOpError("Input and output shapes must be the same"); + } + + int32_t dim = getDimension(); + + // Check that the dim is within the bounds of the input tensor + if (dim >= inputType.getRank() || dim < -inputType.getRank()) { + return emitOpError( + "Dimension attribute must be within the bounds of the input tensor"); + } + + return success(); +} + // ANCHOR: adding_an_op_matmul_ttir_verify ::mlir::LogicalResult mlir::tt::ttir::MatmulOp::verify() { ::mlir::RankedTensorType inputAType = getA().getType(); diff --git a/lib/Dialect/TTIR/Transforms/Passes.cpp b/lib/Dialect/TTIR/Transforms/Passes.cpp index 75471f6e4b..7fce43ab27 100644 --- a/lib/Dialect/TTIR/Transforms/Passes.cpp +++ b/lib/Dialect/TTIR/Transforms/Passes.cpp @@ -568,6 +568,7 @@ class TTIRLayout : public impl::TTIRLayoutBase { TTIRLayoutOperandsRewriter, TTIRLayoutOperandsRewriter, TTIRLayoutOperandsRewriter, TTIRLayoutOperandsRewriter, + TTIRLayoutOperandsRewriter, TTIRLayoutOperandsRewriter, TTIRLayoutFuncReturnRewriter>( &getContext()); FrozenRewritePatternSet patternSet(std::move(patterns)); diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp index c8ed8ab192..89b0e68e61 100644 --- a/lib/Dialect/TTNN/IR/TTNNOps.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp @@ -30,6 +30,26 @@ ::mlir::LogicalResult mlir::tt::ttnn::ToMemoryConfigOp::verify() { return success(); } +::mlir::LogicalResult mlir::tt::ttnn::SoftmaxOp::verify() { + ::mlir::RankedTensorType inputType = getInput().getType(); + ::mlir::RankedTensorType outputType = getOutput().getType(); + + // Shapes of input and output of a softmax operation must be the same + if (inputType.getShape() != outputType.getShape()) { + return emitOpError("Input and output shapes must be the same"); + } + + int32_t dim = getDimension(); + + // Check that the dim is within the bounds of the input tensor + if (dim >= inputType.getRank() || dim < -inputType.getRank()) { + return emitOpError( + "Dimension attribute must be within the bounds of the input tensor"); + } + + return success(); +} + // ANCHOR: adding_an_op_matmul_ttnn_verify ::mlir::LogicalResult mlir::tt::ttnn::MatmulOp::verify() { ::mlir::RankedTensorType inputAType = getA().getType(); diff --git a/lib/Dialect/TTNN/Transforms/Passes.cpp b/lib/Dialect/TTNN/Transforms/Passes.cpp index 718c957ff8..1102c8106e 100644 --- a/lib/Dialect/TTNN/Transforms/Passes.cpp +++ b/lib/Dialect/TTNN/Transforms/Passes.cpp @@ -107,6 +107,18 @@ class TTIRToTTNNReductionOpRewriter : public OpRewritePattern { } }; +class TTIRToTTNNSoftmaxOpRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ttir::SoftmaxOp op, + PatternRewriter &rewriter) const final { + rewriter.replaceOpWithNewOp(op, op.getResult().getType(), + op.getInput(), op.getOutput(), + op.getDimension()); + return success(); + } +}; + // ANCHOR: adding_an_op_matmul_op_rewriter template class TTIRToTTNNBinaryOpRewriter : public OpRewritePattern { @@ -150,7 +162,8 @@ class ConvertTTIRToTTNN TTIRToTTNNOpRewriter, TTIRToTTNNBinaryOpRewriter, TTIRToTTNNReductionOpRewriter, - TensorEmptyToFullRewriter>(&getContext()); + TTIRToTTNNSoftmaxOpRewriter, TensorEmptyToFullRewriter>( + &getContext()); // ANCHOR_END: adding_an_op_matmul_rewrite_pattern_set FrozenRewritePatternSet patternSet(std::move(patterns)); if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) { diff --git a/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp b/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp index 6b686a4150..bb96925d18 100644 --- a/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp +++ b/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp @@ -151,6 +151,18 @@ createReductionOp(FlatbufferObjectCache &cache, ReductionOp op) { dim_arg, op.getKeepDim()); } +template +::flatbuffers::Offset<::tt::target::ttnn::SoftmaxOp> +createSoftmaxOp(FlatbufferObjectCache &cache, SoftmaxOp op) { + auto in = + cache.at<::tt::target::TensorRef>(getOperandThroughDPSOps(op.getInput())); + auto out = cache.at<::tt::target::TensorRef>( + getOperandThroughDPSOps(op.getResult())); + int32_t dimension = op.getDimension(); + + return ::tt::target::ttnn::CreateSoftmaxOp(*cache.fbb, in, out, dimension); +} + ::flatbuffers::Offset<::tt::target::ttnn::Operation> emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op, std::string const &debugString) { @@ -188,6 +200,10 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op, if (auto sumOp = dyn_cast(op); sumOp) { return createOperation(cache, createReductionOp(cache, sumOp), debugString); } + if (auto softmaxOp = dyn_cast(op); softmaxOp) { + return createOperation(cache, createSoftmaxOp(cache, softmaxOp), + debugString); + } llvm_unreachable("unhandled op in emitTTNNOperation"); } diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h index c31df0a3c1..693f008fe0 100644 --- a/runtime/include/tt/runtime/detail/ttnn.h +++ b/runtime/include/tt/runtime/detail/ttnn.h @@ -38,6 +38,7 @@ #include "ttnn/operations/core.hpp" #include "ttnn/operations/creation.hpp" #include "ttnn/operations/matmul.hpp" +#include "ttnn/operations/normalization.hpp" #pragma clang diagnostic pop #include "tt/runtime/types.h" diff --git a/runtime/lib/ttnn/program.cpp b/runtime/lib/ttnn/program.cpp index 1e2f9c0ea1..9d5572ab3a 100644 --- a/runtime/lib/ttnn/program.cpp +++ b/runtime/lib/ttnn/program.cpp @@ -119,6 +119,17 @@ run(::tt::target::ttnn::ReductionOp const *op, ::ttnn::Device &device, } } +static void +run(::tt::target::ttnn::SoftmaxOp const *op, ::ttnn::device::Device &device, + std::unordered_map &liveTensors, + std::list<::ttnn::Tensor> &tensorPool) { + ::ttnn::Tensor &in = *liveTensors.at(op->in()->global_id()); + int32_t dimension = op->dimension(); + + tensorPool.push_back(::ttnn::softmax(in, dimension)); + liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back()); +} + // ANCHOR: adding_an_op_matmul_runtime static void run(::tt::target::ttnn::MatmulOp const *op, ::ttnn::Device &device, @@ -161,6 +172,9 @@ run(::tt::target::ttnn::Operation const *op, ::ttnn::Device &device, case ::tt::target::ttnn::OpType::ReductionOp: { return run(op->type_as_ReductionOp(), device, liveTensors, tensorPool); } + case ::tt::target::ttnn::OpType::SoftmaxOp: { + return run(op->type_as_SoftmaxOp(), device, liveTensors, tensorPool); + } default: throw std::runtime_error("Unsupported operation type"); } diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir new file mode 100644 index 0000000000..f0e329b3e8 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir @@ -0,0 +1,22 @@ +// RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s +#any_device = #tt.operand_constraint +module attributes {tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { + func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> { + // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] + // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] + %0 = tensor.empty() : tensor<512x1024xbf16> + // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]] + // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]] + // Check for positive dimension attribute + %1 = "ttir.softmax"(%arg0, %0) <{dimension = 1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16> + // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] + %2 = tensor.empty() : tensor<512x1024xbf16> + // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]] + // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]] + // Check for negative dimension attribute + %3 = "ttir.softmax"(%1, %2) <{dimension = -1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16> + // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]] + // CHECK: "ttnn.close_device"[[C:.*]] + return %3 : tensor<512x1024xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir new file mode 100644 index 0000000000..eea94a5c50 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s +// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor +#any_device = #tt.operand_constraint +module attributes {tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { + func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> { + %0 = tensor.empty() : tensor<512x1024xbf16> + %1 = "ttir.softmax"(%arg0, %0) <{dimension = 2 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16> + return %1 : tensor<512x1024xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir new file mode 100644 index 0000000000..b7282d75c6 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s +// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor +#any_device = #tt.operand_constraint +module attributes {tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { + func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> { + %0 = tensor.empty() : tensor<512x1024xbf16> + %1 = "ttir.softmax"(%arg0, %0) <{dimension = -3 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16> + return %1 : tensor<512x1024xbf16> + } +}