From 6023d2b4f28b6bb4401f07d0e145f8c1a2623eee Mon Sep 17 00:00:00 2001
From: Stefan Djordjevic <sdjordjevic@tenstorrent.com>
Date: Mon, 15 Jul 2024 15:18:17 +0000
Subject: [PATCH] Adding softmax op in MLIR e2e

---
 include/ttmlir/Dialect/TTIR/IR/TTIROps.td     | 20 +++++++++++++++++
 include/ttmlir/Dialect/TTNN/IR/TTNNOps.td     | 18 +++++++++++++++
 include/ttmlir/Target/TTNN/program.fbs        |  9 +++++++-
 lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp    |  6 +++++
 lib/Dialect/TTIR/IR/TTIROps.cpp               | 20 +++++++++++++++++
 lib/Dialect/TTIR/Transforms/Passes.cpp        |  1 +
 lib/Dialect/TTNN/IR/TTNNOps.cpp               | 20 +++++++++++++++++
 lib/Dialect/TTNN/Transforms/Passes.cpp        | 15 ++++++++++++-
 .../Transforms/TTNNToSerializedBinary.cpp     | 16 ++++++++++++++
 runtime/include/tt/runtime/detail/ttnn.h      |  1 +
 runtime/lib/ttnn/program.cpp                  | 14 ++++++++++++
 test/ttmlir/Dialect/TTNN/softmax/softmax.mlir | 22 +++++++++++++++++++
 .../TTNN/softmax/softmax_negative_1.mlir      | 10 +++++++++
 .../TTNN/softmax/softmax_negative_2.mlir      | 10 +++++++++
 14 files changed, 180 insertions(+), 2 deletions(-)
 create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax.mlir
 create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir
 create mode 100644 test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir

diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index 5558d4e435..a8efff6929 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -186,6 +186,26 @@ def TTIR_ReluOp : TTIR_ElementwiseOp<"relu"> {
     }];
 }
 
+def TTIR_SoftmaxOp : TTIR_DPSOp<"softmax"> {
+    let summary = "Softmax operation.";
+    let description = [{
+      Softmax operation.
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$output,
+                         I32Attr:$dimension,
+                         TT_OperandConstraintArrayAttr:$operand_constraints);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
+
 // ANCHOR: adding_an_op_matmul_ttir
 def TTIR_MatmulOp : TTIR_DPSOp<"matmul"> {
     let summary = "Matrix multiply operation.";
diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
index 97c4a61aab..840f9b64f0 100644
--- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
+++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -124,6 +124,24 @@ def TTNN_ReluOp : TTNN_ElementwiseOp<"relu"> {
     }];
 }
 
+def TTNN_SoftmaxOp : TTNN_NamedDPSOp<"softmax"> {
+    let summary = "Softmax op.";
+    let description = [{
+      Softmax operation.
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$output,
+                         I32Attr: $dimension);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
 // ANCHOR: adding_an_op_matmul_ttnn
 def TTNN_MatmulOp : TTNN_NamedDPSOp<"matmul"> {
     let arguments = (ins AnyRankedTensor:$a,
diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
index adc7f158a0..581fafc143 100644
--- a/include/ttmlir/Target/TTNN/program.fbs
+++ b/include/ttmlir/Target/TTNN/program.fbs
@@ -49,6 +49,12 @@ table ReductionOp {
   keep_dim: bool;
 }
 
+table SoftmaxOp {
+  in: tt.target.TensorRef;
+  out: tt.target.TensorRef;
+  dimension: int32;
+}
+
 // ANCHOR: adding_an_op_matmul_fbs
 table MatmulOp {
   in0: tt.target.TensorRef;
@@ -64,7 +70,8 @@ union OpType {
   FullOp,
   EltwiseOp,
   MatmulOp,
-  ReductionOp
+  ReductionOp,
+  SoftmaxOp
 }
 
 table Operation {
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
index daaf2611c8..6de2143258 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -53,6 +53,8 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   patterns.add<DefaultOpConversionPattern<ttnn::SubtractOp>>(typeConverter,
                                                              ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::SumOp>>(typeConverter, ctx);
+  patterns.add<DefaultOpConversionPattern<ttnn::SoftmaxOp>>(typeConverter,
+                                                            ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::MultiplyOp>>(typeConverter,
                                                              ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::MatmulOp>>(typeConverter, ctx);
@@ -88,6 +90,10 @@ struct ConvertTTNNToEmitCPass
           module.getLoc(),
           "ttnn/operations/reduction/generic/generic_reductions.hpp",
           /*isStandard=*/false);
+      builder.create<emitc::IncludeOp>(
+        module.getLoc(),
+        "ttnn/operations/normalization.hpp",
+        /*isStandard=*/false);
     }
 
     // TTNN -> EmitC
diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp
index 48e205867d..0e0d6883d4 100644
--- a/lib/Dialect/TTIR/IR/TTIROps.cpp
+++ b/lib/Dialect/TTIR/IR/TTIROps.cpp
@@ -26,6 +26,26 @@ ::mlir::LogicalResult mlir::tt::ttir::LayoutOp::verify() {
   return success();
 }
 
+::mlir::LogicalResult mlir::tt::ttir::SoftmaxOp::verify() {
+  ::mlir::RankedTensorType inputType = getInput().getType();
+  ::mlir::RankedTensorType outputType = getOutput().getType();
+
+  // Shapes of input and output of a softmax operation must be the same
+  if (inputType.getShape() != outputType.getShape()) {
+    return emitOpError("Input and output shapes must be the same");
+  }
+
+  int32_t dim = getDimension();
+
+  // Check that the dim is within the bounds of the input tensor
+  if (dim >= inputType.getRank() || dim < -inputType.getRank()) {
+    return emitOpError(
+        "Dimension attribute must be within the bounds of the input tensor");
+  }
+
+  return success();
+}
+
 // ANCHOR: adding_an_op_matmul_ttir_verify
 ::mlir::LogicalResult mlir::tt::ttir::MatmulOp::verify() {
   ::mlir::RankedTensorType inputAType = getA().getType();
diff --git a/lib/Dialect/TTIR/Transforms/Passes.cpp b/lib/Dialect/TTIR/Transforms/Passes.cpp
index 75471f6e4b..7fce43ab27 100644
--- a/lib/Dialect/TTIR/Transforms/Passes.cpp
+++ b/lib/Dialect/TTIR/Transforms/Passes.cpp
@@ -568,6 +568,7 @@ class TTIRLayout : public impl::TTIRLayoutBase<TTIRLayout> {
           TTIRLayoutOperandsRewriter<MultiplyOp>,
           TTIRLayoutOperandsRewriter<SubtractOp>,
           TTIRLayoutOperandsRewriter<ReluOp>, TTIRLayoutOperandsRewriter<SumOp>,
+          TTIRLayoutOperandsRewriter<SoftmaxOp>,
           TTIRLayoutOperandsRewriter<MatmulOp>, TTIRLayoutFuncReturnRewriter>(
           &getContext());
       FrozenRewritePatternSet patternSet(std::move(patterns));
diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp
index c8ed8ab192..89b0e68e61 100644
--- a/lib/Dialect/TTNN/IR/TTNNOps.cpp
+++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp
@@ -30,6 +30,26 @@ ::mlir::LogicalResult mlir::tt::ttnn::ToMemoryConfigOp::verify() {
   return success();
 }
 
+::mlir::LogicalResult mlir::tt::ttnn::SoftmaxOp::verify() {
+  ::mlir::RankedTensorType inputType = getInput().getType();
+  ::mlir::RankedTensorType outputType = getOutput().getType();
+
+  // Shapes of input and output of a softmax operation must be the same
+  if (inputType.getShape() != outputType.getShape()) {
+    return emitOpError("Input and output shapes must be the same");
+  }
+
+  int32_t dim = getDimension();
+
+  // Check that the dim is within the bounds of the input tensor
+  if (dim >= inputType.getRank() || dim < -inputType.getRank()) {
+    return emitOpError(
+        "Dimension attribute must be within the bounds of the input tensor");
+  }
+
+  return success();
+}
+
 // ANCHOR: adding_an_op_matmul_ttnn_verify
 ::mlir::LogicalResult mlir::tt::ttnn::MatmulOp::verify() {
   ::mlir::RankedTensorType inputAType = getA().getType();
diff --git a/lib/Dialect/TTNN/Transforms/Passes.cpp b/lib/Dialect/TTNN/Transforms/Passes.cpp
index 718c957ff8..1102c8106e 100644
--- a/lib/Dialect/TTNN/Transforms/Passes.cpp
+++ b/lib/Dialect/TTNN/Transforms/Passes.cpp
@@ -107,6 +107,18 @@ class TTIRToTTNNReductionOpRewriter : public OpRewritePattern<TTIROp> {
   }
 };
 
+class TTIRToTTNNSoftmaxOpRewriter : public OpRewritePattern<ttir::SoftmaxOp> {
+  using OpRewritePattern<ttir::SoftmaxOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ttir::SoftmaxOp op,
+                                PatternRewriter &rewriter) const final {
+    rewriter.replaceOpWithNewOp<SoftmaxOp>(op, op.getResult().getType(),
+                                           op.getInput(), op.getOutput(),
+                                           op.getDimension());
+    return success();
+  }
+};
+
 // ANCHOR: adding_an_op_matmul_op_rewriter
 template <typename TTIROp, typename TTNNOp>
 class TTIRToTTNNBinaryOpRewriter : public OpRewritePattern<TTIROp> {
@@ -150,7 +162,8 @@ class ConvertTTIRToTTNN
              TTIRToTTNNOpRewriter<ttir::ReluOp, ReluOp>,
              TTIRToTTNNBinaryOpRewriter<ttir::MatmulOp, MatmulOp>,
              TTIRToTTNNReductionOpRewriter<ttir::SumOp, SumOp>,
-             TensorEmptyToFullRewriter>(&getContext());
+             TTIRToTTNNSoftmaxOpRewriter, TensorEmptyToFullRewriter>(
+            &getContext());
     // ANCHOR_END: adding_an_op_matmul_rewrite_pattern_set
     FrozenRewritePatternSet patternSet(std::move(patterns));
     if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet))) {
diff --git a/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp b/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp
index 6b686a4150..bb96925d18 100644
--- a/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp
+++ b/lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp
@@ -151,6 +151,18 @@ createReductionOp(FlatbufferObjectCache &cache, ReductionOp op) {
                                                dim_arg, op.getKeepDim());
 }
 
+template <typename SoftmaxOp>
+::flatbuffers::Offset<::tt::target::ttnn::SoftmaxOp>
+createSoftmaxOp(FlatbufferObjectCache &cache, SoftmaxOp op) {
+  auto in =
+      cache.at<::tt::target::TensorRef>(getOperandThroughDPSOps(op.getInput()));
+  auto out = cache.at<::tt::target::TensorRef>(
+      getOperandThroughDPSOps(op.getResult()));
+  int32_t dimension = op.getDimension();
+
+  return ::tt::target::ttnn::CreateSoftmaxOp(*cache.fbb, in, out, dimension);
+}
+
 ::flatbuffers::Offset<::tt::target::ttnn::Operation>
 emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
                   std::string const &debugString) {
@@ -188,6 +200,10 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
   if (auto sumOp = dyn_cast<SumOp>(op); sumOp) {
     return createOperation(cache, createReductionOp(cache, sumOp), debugString);
   }
+  if (auto softmaxOp = dyn_cast<SoftmaxOp>(op); softmaxOp) {
+    return createOperation(cache, createSoftmaxOp(cache, softmaxOp),
+                           debugString);
+  }
 
   llvm_unreachable("unhandled op in emitTTNNOperation");
 }
diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h
index c31df0a3c1..693f008fe0 100644
--- a/runtime/include/tt/runtime/detail/ttnn.h
+++ b/runtime/include/tt/runtime/detail/ttnn.h
@@ -38,6 +38,7 @@
 #include "ttnn/operations/core.hpp"
 #include "ttnn/operations/creation.hpp"
 #include "ttnn/operations/matmul.hpp"
+#include "ttnn/operations/normalization.hpp"
 #pragma clang diagnostic pop
 
 #include "tt/runtime/types.h"
diff --git a/runtime/lib/ttnn/program.cpp b/runtime/lib/ttnn/program.cpp
index 1e2f9c0ea1..9d5572ab3a 100644
--- a/runtime/lib/ttnn/program.cpp
+++ b/runtime/lib/ttnn/program.cpp
@@ -119,6 +119,17 @@ run(::tt::target::ttnn::ReductionOp const *op, ::ttnn::Device &device,
   }
 }
 
+static void
+run(::tt::target::ttnn::SoftmaxOp const *op, ::ttnn::device::Device &device,
+    std::unordered_map<std::uint32_t, ::ttnn::Tensor *> &liveTensors,
+    std::list<::ttnn::Tensor> &tensorPool) {
+  ::ttnn::Tensor &in = *liveTensors.at(op->in()->global_id());
+  int32_t dimension = op->dimension();
+
+  tensorPool.push_back(::ttnn::softmax(in, dimension));
+  liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
+}
+
 // ANCHOR: adding_an_op_matmul_runtime
 static void
 run(::tt::target::ttnn::MatmulOp const *op, ::ttnn::Device &device,
@@ -161,6 +172,9 @@ run(::tt::target::ttnn::Operation const *op, ::ttnn::Device &device,
   case ::tt::target::ttnn::OpType::ReductionOp: {
     return run(op->type_as_ReductionOp(), device, liveTensors, tensorPool);
   }
+  case ::tt::target::ttnn::OpType::SoftmaxOp: {
+    return run(op->type_as_SoftmaxOp(), device, liveTensors, tensorPool);
+  }
   default:
     throw std::runtime_error("Unsupported operation type");
   }
diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir
new file mode 100644
index 0000000000..f0e329b3e8
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/softmax/softmax.mlir
@@ -0,0 +1,22 @@
+// RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]]
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]]
+    // Check for positive dimension attribute
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = 1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]]
+    %2 = tensor.empty() : tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttnn.softmax"[[C:.*]]
+    // Check for negative dimension attribute
+    %3 = "ttir.softmax"(%1, %2) <{dimension = -1 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.to_memory_config"[[C:.*]]
+    // CHECK: "ttnn.close_device"[[C:.*]]
+    return %3 : tensor<512x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir
new file mode 100644
index 0000000000..eea94a5c50
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_1.mlir
@@ -0,0 +1,10 @@
+// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s
+// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = 2 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    return %1 : tensor<512x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir
new file mode 100644
index 0000000000..b7282d75c6
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/softmax/softmax_negative_2.mlir
@@ -0,0 +1,10 @@
+// RUN: not ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s 2>&1 | FileCheck %s
+// CHECK: error: 'ttir.softmax' op Dimension attribute must be within the bounds of the input tensor
+#any_device = #tt.operand_constraint<dram|l1|tile|any_device|any_device_tile>
+module attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [<pcie|host_mmio>], [<0, 0, 0, 0>]>} {
+  func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x1024xbf16> {
+    %0 = tensor.empty() : tensor<512x1024xbf16>
+    %1 = "ttir.softmax"(%arg0, %0) <{dimension = -3 : i32, operand_constraints = [#any_device, #any_device]}> : (tensor<512x1024xbf16>, tensor<512x1024xbf16>) -> tensor<512x1024xbf16>
+    return %1 : tensor<512x1024xbf16>
+  }
+}