Adding softmax op in MLIR e2e (#138)

tenstorrent · Jul 18, 2024 · cc7b56b · cc7b56b
1 parent 24d6f8a
commit cc7b56b
Show file tree

Hide file tree

Showing 14 changed files with 178 additions and 2 deletions.
diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -186,6 +186,26 @@ def TTIR_ReluOp : TTIR_ElementwiseOp<"relu"> {
     }];
 }
 
+def TTIR_SoftmaxOp : TTIR_DPSOp<"softmax"> {
+    let summary = "Softmax operation.";
+    let description = [{
+      Softmax operation.
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$output,
+                         I32Attr:$dimension,
+                         TT_OperandConstraintArrayAttr:$operand_constraints);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
+
 // ANCHOR: adding_an_op_matmul_ttir
 def TTIR_MatmulOp : TTIR_DPSOp<"matmul"> {
     let summary = "Matrix multiply operation.";

diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -124,6 +124,24 @@ def TTNN_ReluOp : TTNN_ElementwiseOp<"relu"> {
     }];
 }
 
+def TTNN_SoftmaxOp : TTNN_NamedDPSOp<"softmax"> {
+    let summary = "Softmax op.";
+    let description = [{
+      Softmax operation.
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$output,
+                         I32Attr: $dimension);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
 // ANCHOR: adding_an_op_matmul_ttnn
 def TTNN_MatmulOp : TTNN_NamedDPSOp<"matmul"> {
     let arguments = (ins AnyRankedTensor:$a,

diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
@@ -49,6 +49,12 @@ table ReductionOp {
   keep_dim: bool;
 }
 
+table SoftmaxOp {
+  in: tt.target.TensorRef;
+  out: tt.target.TensorRef;
+  dimension: int32;
+}
+
 // ANCHOR: adding_an_op_matmul_fbs
 table MatmulOp {
   in0: tt.target.TensorRef;
@@ -64,7 +70,8 @@ union OpType {
   FullOp,
   EltwiseOp,
   MatmulOp,
-  ReductionOp
+  ReductionOp,
+  SoftmaxOp
 }
 
 table Operation {

diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -53,6 +53,7 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   patterns.add<DefaultOpConversionPattern<ttnn::SubtractOp>>(typeConverter,
                                                              ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::SumOp>>(typeConverter, ctx);
+  patterns.add<DefaultOpConversionPattern<ttnn::SoftmaxOp>>(typeConverter, ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::MultiplyOp>>(typeConverter,
                                                              ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::MatmulOp>>(typeConverter, ctx);
@@ -88,6 +89,9 @@ struct ConvertTTNNToEmitCPass
           module.getLoc(),
           "ttnn/operations/reduction/generic/generic_reductions.hpp",
           /*isStandard=*/false);
+      builder.create<emitc::IncludeOp>(module.getLoc(),
+                                       "ttnn/operations/normalization.hpp",
+                                       /*isStandard=*/false);
     }
 
     // TTNN -> EmitC

diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp
@@ -26,6 +26,26 @@ ::mlir::LogicalResult mlir::tt::ttir::LayoutOp::verify() {
   return success();
 }
 
+::mlir::LogicalResult mlir::tt::ttir::SoftmaxOp::verify() {
+  ::mlir::RankedTensorType inputType = getInput().getType();
+  ::mlir::RankedTensorType outputType = getOutput().getType();
+
+  // Shapes of input and output of a softmax operation must be the same
+  if (inputType.getShape() != outputType.getShape()) {
+    return emitOpError("Input and output shapes must be the same");
+  }
+
+  int32_t dim = getDimension();
+
+  // Check that the dim is within the bounds of the input tensor
+  if (dim >= inputType.getRank() || dim < -inputType.getRank()) {
+    return emitOpError(
+        "Dimension attribute must be within the bounds of the input tensor");
+  }
+
+  return success();
+}
+
 // ANCHOR: adding_an_op_matmul_ttir_verify
 ::mlir::LogicalResult mlir::tt::ttir::MatmulOp::verify() {
   ::mlir::RankedTensorType inputAType = getA().getType();

diff --git a/lib/Dialect/TTIR/Transforms/Passes.cpp b/lib/Dialect/TTIR/Transforms/Passes.cpp
@@ -568,6 +568,7 @@ class TTIRLayout : public impl::TTIRLayoutBase<TTIRLayout> {
           TTIRLayoutOperandsRewriter<MultiplyOp>,
           TTIRLayoutOperandsRewriter<SubtractOp>,
           TTIRLayoutOperandsRewriter<ReluOp>, TTIRLayoutOperandsRewriter<SumOp>,
+          TTIRLayoutOperandsRewriter<SoftmaxOp>,
           TTIRLayoutOperandsRewriter<MatmulOp>, TTIRLayoutFuncReturnRewriter>(
           &getContext());
       FrozenRewritePatternSet patternSet(std::move(patterns));

diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp
@@ -30,6 +30,26 @@ ::mlir::LogicalResult mlir::tt::ttnn::ToMemoryConfigOp::verify() {
   return success();
 }
 
+::mlir::LogicalResult mlir::tt::ttnn::SoftmaxOp::verify() {
+  ::mlir::RankedTensorType inputType = getInput().getType();
+  ::mlir::RankedTensorType outputType = getOutput().getType();
+
+  // Shapes of input and output of a softmax operation must be the same
+  if (inputType.getShape() != outputType.getShape()) {
+    return emitOpError("Input and output shapes must be the same");
+  }
+
+  int32_t dim = getDimension();
+
+  // Check that the dim is within the bounds of the input tensor
+  if (dim >= inputType.getRank() || dim < -inputType.getRank()) {
+    return emitOpError(
+        "Dimension attribute must be within the bounds of the input tensor");
+  }
+
+  return success();
+}
+
 // ANCHOR: adding_an_op_matmul_ttnn_verify
 ::mlir::LogicalResult mlir::tt::ttnn::MatmulOp::verify() {
   ::mlir::RankedTensorType inputAType = getA().getType();

diff --git a/lib/Dialect/TTNN/Transforms/Passes.cpp b/lib/Dialect/TTNN/Transforms/Passes.cpp
@@ -107,6 +107,18 @@ class TTIRToTTNNReductionOpRewriter : public OpRewritePattern<TTIROp> {
   }
 };
 
+class TTIRToTTNNSoftmaxOpRewriter : public OpRewritePattern<ttir::SoftmaxOp> {
+  using OpRewritePattern<ttir::SoftmaxOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ttir::SoftmaxOp op,
+                                PatternRewriter &rewriter) const final {
+    rewriter.replaceOpWithNewOp<SoftmaxOp>(op, op.getResult().getType(),
+                                           op.getInput(), op.getOutput(),
+                                           op.getDimension());
+    return success();
+  }
+};
+
 // ANCHOR: adding_an_op_matmul_op_rewriter
 template <typename TTIROp, typename TTNNOp>
 class TTIRToTTNNBinaryOpRewriter : public OpRewritePattern<TTIROp> {
@@ -150,7 +162,8 @@ class ConvertTTIRToTTNN
              TTIRToTTNNOpRewriter<ttir::ReluOp, ReluOp>,
              TTIRToTTNNBinaryOpRewriter<ttir::MatmulOp, MatmulOp>,
              TTIRToTTNNReductionOpRewriter<ttir::SumOp, SumOp>,
-             TensorEmptyToFullRewriter>(&getContext());
+             TTIRToTTNNSoftmaxOpRewriter, TensorEmptyToFullRewriter>(
+            &getContext());
     // ANCHOR_END: adding_an_op_matmul_rewrite_pattern_set
     FrozenRewritePatternSet patternSet(std::move(patterns));
     if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) {

diff --git a/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp b/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp
@@ -151,6 +151,18 @@ createReductionOp(FlatbufferObjectCache &cache, ReductionOp op) {
                                                dim_arg, op.getKeepDim());
 }
 
+template <typename SoftmaxOp>
+::flatbuffers::Offset<::tt::target::ttnn::SoftmaxOp>
+createSoftmaxOp(FlatbufferObjectCache &cache, SoftmaxOp op) {
+  auto in =
+      cache.at<::tt::target::TensorRef>(getOperandThroughDPSOps(op.getInput()));
+  auto out = cache.at<::tt::target::TensorRef>(
+      getOperandThroughDPSOps(op.getResult()));
+  int32_t dimension = op.getDimension();
+
+  return ::tt::target::ttnn::CreateSoftmaxOp(*cache.fbb, in, out, dimension);
+}
+
 ::flatbuffers::Offset<::tt::target::ttnn::Operation>
 emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
                   std::string const &debugString) {
@@ -188,6 +200,10 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
   if (auto sumOp = dyn_cast<SumOp>(op); sumOp) {
     return createOperation(cache, createReductionOp(cache, sumOp), debugString);
   }
+  if (auto softmaxOp = dyn_cast<SoftmaxOp>(op); softmaxOp) {
+    return createOperation(cache, createSoftmaxOp(cache, softmaxOp),
+                           debugString);
+  }
 
   llvm_unreachable("unhandled op in emitTTNNOperation");
 }

diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h
@@ -38,6 +38,7 @@
 #include "ttnn/operations/core.hpp"
 #include "ttnn/operations/creation.hpp"
 #include "ttnn/operations/matmul.hpp"
+#include "ttnn/operations/normalization.hpp"
 #pragma clang diagnostic pop
 
 #include "tt/runtime/types.h"

diff --git a/runtime/lib/ttnn/program.cpp b/runtime/lib/ttnn/program.cpp
@@ -119,6 +119,17 @@ run(::tt::target::ttnn::ReductionOp const *op, ::ttnn::Device &device,
   }
 }
 
+static void
+run(::tt::target::ttnn::SoftmaxOp const *op, ::ttnn::device::Device &device,
+    std::unordered_map<std::uint32_t, ::ttnn::Tensor *> &liveTensors,
+    std::list<::ttnn::Tensor> &tensorPool) {
+  ::ttnn::Tensor &in = *liveTensors.at(op->in()->global_id());
+  int32_t dimension = op->dimension();
+
+  tensorPool.push_back(::ttnn::softmax(in, dimension));
+  liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
+}
+
 // ANCHOR: adding_an_op_matmul_runtime
 static void
 run(::tt::target::ttnn::MatmulOp const *op, ::ttnn::Device &device,
@@ -161,6 +172,9 @@ run(::tt::target::ttnn::Operation const *op, ::ttnn::Device &device,
   case ::tt::target::ttnn::OpType::ReductionOp: {
     return run(op->type_as_ReductionOp(), device, liveTensors, tensorPool);
   }
+  case ::tt::target::ttnn::OpType::SoftmaxOp: {
+    return run(op->type_as_SoftmaxOp(), device, liveTensors, tensorPool);
+  }
   default:
     throw std::runtime_error("Unsupported operation type");
   }

diff --git a/test/ttmlir/Dialect/TTNN/softmax/simple_softmax.mlir b/test/ttmlir/Dialect/TTNN/softmax/simple_softmax.mlir
@@ -0,0 +1,22 @@
+// RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]]
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]]
+    // Check for positive dimension attribute
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = 1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]]
+    %2 = tensor.empty() : tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]]
+    // Check for negative dimension attribute
+    %3 = "ttir.softmax"(%1, %2) <{dimension = -1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: "ttnn.close_device"[[C:.*]]
+    return %3 : tensor<512x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir
@@ -0,0 +1,10 @@
+// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s
+// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = 2 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    return %1 : tensor<512x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir
@@ -0,0 +1,10 @@
+// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s
+// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = -3 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    return %1 : tensor<512x1024xbf16>
+  }
+}