From f2ccda0aedf5d83fb5c1c7b2ed603468ca9088b1 Mon Sep 17 00:00:00 2001
From: Andrej Jakovljevic <ajakovljevic@tenstorrent.com>
Date: Fri, 1 Nov 2024 16:40:12 +0100
Subject: [PATCH] Add support for expm1 op (#1045)

- Add end-to-end implementation of the expm1 op
- Add stablehlo to ttir to ttnn IR conversion of the op
---
 include/ttmlir/Dialect/TTIR/IR/TTIROps.td          | 11 +++++++++++
 include/ttmlir/Dialect/TTNN/IR/TTNNOps.td          | 11 +++++++++++
 include/ttmlir/Target/TTNN/program.fbs             |  1 +
 .../StableHLOToTTIR/StableHLOToTTIRPatterns.cpp    |  2 ++
 lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp           |  1 +
 lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp         |  1 +
 lib/Target/TTNN/TTNNToFlatbuffer.cpp               |  5 +++++
 .../lib/ttnn/operations/eltwise/unary/unary.cpp    |  4 ++++
 .../StableHLOToTTIR/exponential_minus_one_op.mlir  | 12 ++++++++++++
 .../TTNN/eltwise/unary/expm1/simple_expm1.mlir     | 12 ++++++++++++
 .../Silicon/TTNN/perf_unit/test_perf_expm1.mlir    | 14 ++++++++++++++
 test/ttmlir/Silicon/TTNN/simple_eltwise.mlir       |  9 +++++++++
 12 files changed, 83 insertions(+)
 create mode 100644 test/ttmlir/Conversion/StableHLOToTTIR/exponential_minus_one_op.mlir
 create mode 100644 test/ttmlir/Dialect/TTNN/eltwise/unary/expm1/simple_expm1.mlir
 create mode 100644 test/ttmlir/Silicon/TTNN/perf_unit/test_perf_expm1.mlir
diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index 6beaa7460..92f491b03 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -293,6 +293,17 @@ def TTIR_Log1pOp: TTIR_ElementwiseUnaryOp<"log1p"> {
       }];
 }
 
+def TTIR_Expm1Op: TTIR_ElementwiseUnaryOp<"expm1"> {
+  let description = [{
+    Performs element-wise exponential minus one operation on `operand` tensor
+    and stores the result in the output tensor.
+
+    Example:
+        %a: [[0, 1], [0, 0]]
+        "ttir.exmp1"(%a, %out) -> %out: [[0, 1.71828], [0, 0]]
+  }];
+}
+
 class TTIR_ElementwiseBinaryOp<string mnemonic, list<Trait> traits = []> :
     TTIR_ElementwiseOp<mnemonic, traits> {
     let summary = "Eltwise binary op.";
diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
index 79b1f8570..4dcf05046 100644
--- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
+++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -256,6 +256,17 @@ def TTNN_Log1pOp: TTNN_ElementwiseUnaryOp<"log1p"> {
       }];
 }
 
+def TTNN_Expm1Op: TTNN_ElementwiseUnaryOp<"expm1"> {
+  let description = [{
+    Performs element-wise exponential minus one operation on `operand` tensor
+    and stores the result in the output tensor.
+
+    Example:
+        %a: [[0, 1], [0, 0]]
+        "ttnn.exmp1"(%a, %out) -> %out: [[0, 1.71828], [0, 0]]
+  }];
+}
+
 def TTNN_AddOp : TTNN_ElementwiseBinaryOp<"add"> {
     let summary = "Eltwise add.";
     let description = [{
diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
index 44ae9d8c2..11e781c62 100644
--- a/include/ttmlir/Target/TTNN/program.fbs
+++ b/include/ttmlir/Target/TTNN/program.fbs
@@ -88,6 +88,7 @@ enum EltwiseOpType: uint32 {
   Cos = 27,
   Log = 28,
   Log1p = 29,
+  Expm1 = 30
 }
 
 union EltwiseOpParams {
diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
index d94e66fe7..2986f486a 100644
--- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
+++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
@@ -861,6 +861,8 @@ void addElementwiseUnaryOpsConversionPatterns(MLIRContext *ctx,
       mlir::stablehlo::SqrtOp, mlir::tt::ttir::SqrtOp>>(typeConverter, ctx);
   patterns.add<StableHLOToTTIROpDefaultConversionPattern<
       mlir::stablehlo::Log1pOp, mlir::tt::ttir::Log1pOp>>(typeConverter, ctx);
+  patterns.add<StableHLOToTTIROpDefaultConversionPattern<
+      mlir::stablehlo::Expm1Op, mlir::tt::ttir::Expm1Op>>(typeConverter, ctx);
 }
 
 void addElementwiseBinaryOpsConversionPatterns(MLIRContext *ctx,
diff --git a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
index 27543b721..b0cfc3634 100644
--- a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
+++ b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
@@ -885,6 +885,7 @@ void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns,
            ElementwiseOpConversionPattern<ttir::CeilOp, ttnn::CeilOp>,
            ElementwiseOpConversionPattern<ttir::SinOp, ttnn::SinOp>,
            ElementwiseOpConversionPattern<ttir::CosOp, ttnn::CosOp>,
+           ElementwiseOpConversionPattern<ttir::Expm1Op, ttnn::Expm1Op>,
            ReductionOpConversionPattern<ttir::SumOp, ttnn::SumOp>,
            ReductionOpConversionPattern<ttir::MeanOp, ttnn::MeanOp>,
            ReductionOpConversionPattern<ttir::MaxOp, ttnn::MaxOp>,
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
index 561830099..f78ed65e1 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -628,6 +628,7 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
                DefaultOpConversionPattern<ttnn::CeilOp>,
                DefaultOpConversionPattern<ttnn::SinOp>,
                DefaultOpConversionPattern<ttnn::CosOp>,
+               DefaultOpConversionPattern<ttnn::Expm1Op>,
                DefaultOpConversionPattern<ttnn::LogOp>>(typeConverter, ctx);
 
   // Eltwise binary ops
diff --git a/lib/Target/TTNN/TTNNToFlatbuffer.cpp b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
index 0fbc52708..cc41042cb 100644
--- a/lib/Target/TTNN/TTNNToFlatbuffer.cpp
+++ b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
@@ -355,6 +355,8 @@ createEltwiseOp(FlatbufferObjectCache &cache, EltwiseOp op) {
     type = ::tt::target::ttnn::EltwiseOpType::Sin;
   } else if constexpr (std::is_same_v<EltwiseOp, LogOp>) {
     type = ::tt::target::ttnn::EltwiseOpType::Log;
+  } else if constexpr (std::is_same_v<EltwiseOp, Expm1Op>) {
+    type = ::tt::target::ttnn::EltwiseOpType::Expm1;
   } else {
     llvm_unreachable("unhandled EltwiseOp");
   }
@@ -604,6 +606,9 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
   if (auto logOp = dyn_cast<LogOp>(op); logOp) {
     return createOperation(cache, createEltwiseOp(cache, logOp), debugString);
   }
+  if (auto expm1Op = dyn_cast<Expm1Op>(op); expm1Op) {
+    return createOperation(cache, createEltwiseOp(cache, expm1Op), debugString);
+  }
   if (auto sigmoidOp = dyn_cast<SigmoidOp>(op); sigmoidOp) {
     return createOperation(cache, createEltwiseOp(cache, sigmoidOp),
                            debugString);
diff --git a/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp b/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
index e5b3216bd..77a89cf07 100644
--- a/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
+++ b/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
@@ -104,6 +104,10 @@ void run(const ::tt::target::ttnn::EltwiseOp *op, ProgramContext &context) {
     runEltwiseUnaryOP(op, tensorPool, ::ttnn::log);
     break;
   }
+  case ::tt::target::ttnn::EltwiseOpType::Expm1: {
+    runEltwiseUnaryOP(op, tensorPool, ::ttnn::expm1);
+    break;
+  }
   default:
     throw std::invalid_argument("Unsupported unary operation");
   }
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/exponential_minus_one_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/exponential_minus_one_op.mlir
new file mode 100644
index 000000000..179268b76
--- /dev/null
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/exponential_minus_one_op.mlir
@@ -0,0 +1,12 @@
+// REQUIRES: stablehlo
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module @jit_eltwise_expm1 attributes {} {
+  func.func public @test_expm1(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
+    %0 = stablehlo.exponential_minus_one %arg0 : tensor<13x21x3xf32>
+    // CHECK: [[VAL0:%[0-9]+]] = tensor.empty() : [[TENSOR_SIZE:tensor<[0-9]+x[0-9]+x[0-9]+xf[0-9]+>]]
+    // CHECK: [[VAL1:%[0-9]+]] = "ttir.expm1"(%arg0, [[VAL0]]) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device_tile, #any_device_tile]}> : ([[TENSOR_SIZE]], [[TENSOR_SIZE]]) -> [[TENSOR_SIZE]]
+    return %0 : tensor<13x21x3xf32>
+    // CHECK: return [[VAL1]] : [[TENSOR_SIZE]]
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/eltwise/unary/expm1/simple_expm1.mlir b/test/ttmlir/Dialect/TTNN/eltwise/unary/expm1/simple_expm1.mlir
new file mode 100644
index 000000000..59a7b2a18
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/eltwise/unary/expm1/simple_expm1.mlir
@@ -0,0 +1,12 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+    %0 = tensor.empty() : tensor<64x128xf32>
+    // CHECK: [[VAL0:%[0-9]+]] = "ttnn.empty"(%{{[0-9]+}}) <{dtype = {{.*}}, layout = {{.*}}, memory_config = {{.*}}, <{{.*}}>>, shape = #ttnn.shape<[[TENSOR_SHAPE:[0-9]+x[0-9]+]]>}>
+    %1 = "ttir.expm1"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+    // CHECK: %{{[0-9]+}} = "ttnn.expm1"(%{{[0-9]+}}, [[VAL0]]) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>, tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}) -> tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>
+    return %1 : tensor<64x128xf32>
+    // CHECK: return %{{[0-9]+}} : tensor<[[TENSOR_SHAPE]]xf32, {{.*}}>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_expm1.mlir b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_expm1.mlir
new file mode 100644
index 000000000..27cf6f80e
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_expm1.mlir
@@ -0,0 +1,14 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
+
+func.func @expm1(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+  %0 = tensor.empty() : tensor<64x128xf32>
+  // CHECK: [[VAL0:%[0-9]+]] = "ttnn.empty"(%{{[0-9]+}}) <{dtype = {{.*}}, layout = {{.*}}, memory_config = {{.*}}, <{{.*}}>>, shape = #ttnn.shape<[[TENSOR_SHAPE:[0-9]+x[0-9]+]]>}>
+  %1 = "ttir.expm1"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+  // CHECK: %{{[0-9]+}} = "ttnn.expm1"(%{{[0-9]+}}, [[VAL0]]) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>, tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}) -> tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>
+  return %1 : tensor<64x128xf32>
+  // CHECK: return %{{[0-9]+}} : tensor<[[TENSOR_SHAPE]]xf32, {{.*}}>
+}
diff --git a/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir b/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
index 71bafe10d..816a3c586 100644
--- a/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
+++ b/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
@@ -200,3 +200,12 @@ func.func @log1p(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
   return %1 : tensor<64x128xf32>
   // CHECK: return %{{[0-9]+}} : tensor<[[TENSOR_SHAPE]]xf32, {{.*}}>
 }
+
+func.func @expm1(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+  %0 = tensor.empty() : tensor<64x128xf32>
+  // CHECK: [[VAL0:%[0-9]+]] = "ttnn.empty"(%{{[0-9]+}}) <{dtype = {{.*}}, layout = {{.*}}, memory_config = {{.*}}, <{{.*}}>>, shape = #ttnn.shape<[[TENSOR_SHAPE:[0-9]+x[0-9]+]]>}>
+  %1 = "ttir.expm1"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+  // CHECK: %{{[0-9]+}} = "ttnn.expm1"(%{{[0-9]+}}, [[VAL0]]) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>, tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}) -> tensor<[[TENSOR_SHAPE]]x{{.*}}, {{.*}}>
+  return %1 : tensor<64x128xf32>
+  // CHECK: return %{{[0-9]+}} : tensor<[[TENSOR_SHAPE]]xf32, {{.*}}>
+}