From 3dbf08930b67a4efa97f4460fd0961a03ef1f8d0 Mon Sep 17 00:00:00 2001
From: Muhammad Asif Manzoor <mmanzoor@tenstorrent.com>
Date: Mon, 4 Nov 2024 14:23:49 -0500
Subject: [PATCH] Add support for isfinite and floor op. (#956)

* Add end-to-end implementation of the ops
* Add stablehlo to ttir conversion
---
 include/ttmlir/Dialect/TTIR/IR/TTIROps.td     | 28 ++++++++++++++-----
 include/ttmlir/Dialect/TTNN/IR/TTNNOps.td     | 14 ++++++++++
 include/ttmlir/Target/TTNN/program.fbs        |  4 ++-
 .../StableHLOToTTIRPatterns.cpp               |  5 ++++
 lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp      |  2 ++
 lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp    |  2 ++
 lib/Target/TTNN/TTNNToFlatbuffer.cpp          | 11 ++++++++
 .../ttnn/operations/eltwise/unary/unary.cpp   |  8 ++++++
 .../Conversion/StableHLOToTTIR/floor_op.mlir  | 11 ++++++++
 .../StableHLOToTTIR/isfinite_op.mlir          | 13 +++++++++
 .../eltwise/unary/floor/simple_floor.mlir     | 15 ++++++++++
 .../unary/isfinite/simple_isfinite.mlir       | 15 ++++++++++
 .../TTNN/perf_unit/test_perf_floor.mlir       | 17 +++++++++++
 .../TTNN/perf_unit/test_perf_isfinite.mlir    | 17 +++++++++++
 test/ttmlir/Silicon/TTNN/simple_eltwise.mlir  | 24 ++++++++++++++++
 15 files changed, 178 insertions(+), 8 deletions(-)
 create mode 100644 test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
 create mode 100644 test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
 create mode 100644 test/ttmlir/Dialect/TTNN/eltwise/unary/floor/simple_floor.mlir
 create mode 100644 test/ttmlir/Dialect/TTNN/eltwise/unary/isfinite/simple_isfinite.mlir
 create mode 100644 test/ttmlir/Silicon/TTNN/perf_unit/test_perf_floor.mlir
 create mode 100644 test/ttmlir/Silicon/TTNN/perf_unit/test_perf_isfinite.mlir

diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index 3e0d27598..381e3750f 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -211,15 +211,17 @@ def TTIR_CosOp: TTIR_ElementwiseUnaryOp<"cos"> {
     }];
 }
 
-def TTIR_SignOp: TTIR_ElementwiseUnaryOp<"sign"> {
-    let summary = "Eltwise sign operation.";
+def TTIR_FloorOp: TTIR_ElementwiseUnaryOp<"floor"> {
+    let summary = "Eltwise floor op.";
     let description = [{
-      Returns the sign of the `operand` element-wise and produces a `result`
-      tensor.
+      Eltwise floor operation.
+    }];
+}
 
-      Example:
-        %a: [[3, -2, 0], [1, -4, 4]]
-        "ttir.sign"(%a, %out) -> %out: [[1, -1, 0], [1, -1, 1]]
+def TTIR_IsFiniteOp: TTIR_ElementwiseUnaryOp<"isfinite"> {
+    let summary = "Eltwise isfinite op.";
+    let description = [{
+      Eltwise isfinite operation.
     }];
 }
 
@@ -265,6 +267,18 @@ def TTIR_SigmoidOp: TTIR_ElementwiseUnaryOp<"sigmoid"> {
     }];
 }
 
+def TTIR_SignOp: TTIR_ElementwiseUnaryOp<"sign"> {
+    let summary = "Eltwise sign operation.";
+    let description = [{
+      Returns the sign of the `operand` element-wise and produces a `result`
+      tensor.
+
+      Example:
+        %a: [[3, -2, 0], [1, -4, 4]]
+        "ttir.sign"(%a, %out) -> %out: [[1, -1, 0], [1, -1, 1]]
+    }];
+}
+
 def TTIR_SinOp: TTIR_ElementwiseUnaryOp<"sin"> {
     let summary = "Eltwise sine.";
     let description = [{
diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
index b65f58a4d..91cb51cca 100644
--- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
+++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -193,6 +193,20 @@ def TTNN_ExpOp : TTNN_ElementwiseUnaryOp<"exp"> {
     }];
 }
 
+def TTNN_FloorOp: TTNN_ElementwiseUnaryOp<"floor"> {
+    let summary = "Eltwise floor op.";
+    let description = [{
+      Eltwise floor operation.
+    }];
+}
+
+def TTNN_IsFiniteOp: TTNN_ElementwiseUnaryOp<"isfinite"> {
+    let summary = "Eltwise isfinite op.";
+    let description = [{
+      Eltwise isfinite operation.
+    }];
+}
+
 def TTNN_LogicalNotOp: TTNN_ElementwiseUnaryOp<"logical_not"> {
     let summary = "Eltwise logical not op.";
     let description = [{
diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
index 217371c58..1918fa035 100644
--- a/include/ttmlir/Target/TTNN/program.fbs
+++ b/include/ttmlir/Target/TTNN/program.fbs
@@ -90,7 +90,9 @@ enum EltwiseOpType: uint32 {
   Log1p = 29,
   Expm1 = 30,
   Sign = 31,
-  Remainder = 32
+  Remainder = 32,
+  IsFinite = 33,
+  Floor = 34,
 }
 
 union EltwiseOpParams {
diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
index 0d750aed8..4672e144b 100644
--- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
+++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
@@ -911,6 +911,11 @@ void addElementwiseUnaryOpsConversionPatterns(MLIRContext *ctx,
       mlir::stablehlo::CosineOp, mlir::tt::ttir::CosOp>>(typeConverter, ctx);
   patterns.add<StableHLOToTTIROpDefaultConversionPattern<
       mlir::stablehlo::ExpOp, mlir::tt::ttir::ExpOp>>(typeConverter, ctx);
+  patterns.add<StableHLOToTTIROpDefaultConversionPattern<
+      mlir::stablehlo::FloorOp, mlir::tt::ttir::FloorOp>>(typeConverter, ctx);
+  patterns.add<StableHLOToTTIROpDefaultConversionPattern<
+      mlir::stablehlo::IsFiniteOp, mlir::tt::ttir::IsFiniteOp>>(typeConverter,
+                                                                ctx);
   patterns.add<StableHLOToTTIROpDefaultConversionPattern<
       mlir::stablehlo::NegOp, mlir::tt::ttir::NegOp>>(typeConverter, ctx);
   patterns.add<StableHLOToTTIROpDefaultConversionPattern<
diff --git a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
index c0864c8d8..f18d4034b 100644
--- a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
+++ b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
@@ -860,6 +860,8 @@ void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns,
            ElementwiseOpConversionPattern<ttir::AbsOp, ttnn::AbsOp>,
            ElementwiseOpConversionPattern<ttir::AddOp, ttnn::AddOp>,
            ElementwiseOpConversionPattern<ttir::CbrtOp, ttnn::CbrtOp>,
+           ElementwiseOpConversionPattern<ttir::FloorOp, ttnn::FloorOp>,
+           ElementwiseOpConversionPattern<ttir::IsFiniteOp, ttnn::IsFiniteOp>,
            ElementwiseOpConversionPattern<ttir::LogicalAndOp, ttnn::LogicalAndOp>,
            ElementwiseOpConversionPattern<ttir::LogicalOrOp, ttnn::LogicalOrOp>,
            ElementwiseOpConversionPattern<ttir::LogicalNotOp, ttnn::LogicalNotOp>,
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
index c29d00757..0a04c53a8 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -616,6 +616,8 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   //
   patterns.add<DefaultOpConversionPattern<ttnn::AbsOp>,
                DefaultOpConversionPattern<ttnn::CbrtOp>,
+               DefaultOpConversionPattern<ttnn::FloorOp>,
+               DefaultOpConversionPattern<ttnn::IsFiniteOp>,
                DefaultOpConversionPattern<ttnn::LogicalNotOp>,
                DefaultOpConversionPattern<ttnn::NegOp>,
                DefaultOpConversionPattern<ttnn::ReluOp>,
diff --git a/lib/Target/TTNN/TTNNToFlatbuffer.cpp b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
index d999570d2..f5f178f41 100644
--- a/lib/Target/TTNN/TTNNToFlatbuffer.cpp
+++ b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
@@ -303,6 +303,10 @@ createEltwiseOp(FlatbufferObjectCache &cache, EltwiseOp op) {
     type = ::tt::target::ttnn::EltwiseOpType::Add;
   } else if constexpr (std::is_same_v<EltwiseOp, CbrtOp>) {
     type = ::tt::target::ttnn::EltwiseOpType::Cbrt;
+  } else if constexpr (std::is_same_v<EltwiseOp, FloorOp>) {
+    type = ::tt::target::ttnn::EltwiseOpType::Floor;
+  } else if constexpr (std::is_same_v<EltwiseOp, IsFiniteOp>) {
+    type = ::tt::target::ttnn::EltwiseOpType::IsFinite;
   } else if constexpr (std::is_same_v<EltwiseOp, LogicalAndOp>) {
     type = ::tt::target::ttnn::EltwiseOpType::LogicalAnd;
   } else if constexpr (std::is_same_v<EltwiseOp, LogicalNotOp>) {
@@ -546,6 +550,13 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
   if (auto addOp = dyn_cast<AddOp>(op); addOp) {
     return createOperation(cache, createEltwiseOp(cache, addOp), debugString);
   }
+  if (auto floorOp = dyn_cast<FloorOp>(op); floorOp) {
+    return createOperation(cache, createEltwiseOp(cache, floorOp), debugString);
+  }
+  if (auto isFiniteOp = dyn_cast<IsFiniteOp>(op); isFiniteOp) {
+    return createOperation(cache, createEltwiseOp(cache, isFiniteOp),
+                           debugString);
+  }
   if (auto andOp = dyn_cast<LogicalAndOp>(op); andOp) {
     return createOperation(cache, createEltwiseOp(cache, andOp), debugString);
   }
diff --git a/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp b/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
index c7dba6e95..404349404 100644
--- a/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
+++ b/runtime/lib/ttnn/operations/eltwise/unary/unary.cpp
@@ -62,6 +62,14 @@ void run(const ::tt::target::ttnn::EltwiseOp *op, ProgramContext &context) {
     runEltwiseUnaryOP(op, tensorPool, ::ttnn::cos);
     break;
   }
+  case ::tt::target::ttnn::EltwiseOpType::Floor: {
+    runEltwiseUnaryOP(op, tensorPool, ::ttnn::floor);
+    break;
+  }
+  case ::tt::target::ttnn::EltwiseOpType::IsFinite: {
+    runEltwiseUnaryOP(op, tensorPool, ::ttnn::isfinite);
+    break;
+  }
   case ::tt::target::ttnn::EltwiseOpType::LogicalNot: {
     runEltwiseUnaryOP(op, tensorPool, ::ttnn::logical_not);
     break;
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
new file mode 100644
index 000000000..2df91d6c5
--- /dev/null
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
@@ -0,0 +1,11 @@
+// REQUIRES: stablehlo
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module @jit_eltwise_floor attributes {} {
+  func.func public @test_floor(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xf32> {
+    %0 = stablehlo.floor %arg0 : tensor<32x32x3xf32>
+    // CHECK: %[[C:.*]] = tensor.empty[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttir.floor"[[C:.*]]
+    return %0 : tensor<32x32x3xf32>
+  }
+}
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
new file mode 100644
index 000000000..bdcef7a95
--- /dev/null
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
@@ -0,0 +1,13 @@
+// REQUIRES: stablehlo
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module @jit_eltwise_isfinite attributes {} {
+  func.func public @test_isfinite(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xi1> {
+    // CHECK: %[[E:.*]] = tensor.empty() : tensor<32x32x3xbf16>
+    // CHECK: %[[C:.*]] = "ttir.isfinite"(%arg0, %[[E]])
+    // CHECK-SAME: (tensor<32x32x3xf32>, tensor<32x32x3xbf16>) -> tensor<32x32x3xbf16>
+    %0 = stablehlo.is_finite %arg0 : (tensor<32x32x3xf32>) -> tensor<32x32x3xi1>
+    // CHECK: return %[[C]] : tensor<32x32x3xbf16>
+    return %0 : tensor<32x32x3xi1>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/eltwise/unary/floor/simple_floor.mlir b/test/ttmlir/Dialect/TTNN/eltwise/unary/floor/simple_floor.mlir
new file mode 100644
index 000000000..820e429ec
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/eltwise/unary/floor/simple_floor.mlir
@@ -0,0 +1,15 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @floor(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+    // CHECK: %{{[0-9]+}} = "ttnn.empty"
+    // CHECK-SAME: [[TENSOR:tensor<64x128xf32,]]
+    %0 = tensor.empty() : tensor<64x128xf32>
+    // CHECK: %{{[0-9]+}} = "ttnn.floor"
+    // CHECK-SAME: [[TENSOR]]
+    // CHECK-SAME: [[TENSOR]]
+    // CHECK-SAME: -> [[TENSOR]]
+    %1 = "ttir.floor"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+    return %1 : tensor<64x128xf32>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/eltwise/unary/isfinite/simple_isfinite.mlir b/test/ttmlir/Dialect/TTNN/eltwise/unary/isfinite/simple_isfinite.mlir
new file mode 100644
index 000000000..e819e68f4
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/eltwise/unary/isfinite/simple_isfinite.mlir
@@ -0,0 +1,15 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @is_finite(%arg0: tensor<64x128xf32>) -> tensor<64x128xbf16> {
+    // CHECK: %[[C:.*]] = "ttnn.empty"
+    // CHECK-SAME: [[TENSOR:tensor<64x128xbf16,]]
+    %0 = tensor.empty() : tensor<64x128xbf16>
+    // CHECK: %[[C:.*]] = "ttnn.isfinite"
+    // CHECK-SAME: tensor<64x128xf32,
+    // CHECK-SAME: [[TENSOR]]
+    // CHECK-SAME: -> [[TENSOR]]
+    %1 = "ttir.isfinite"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+    return %1 : tensor<64x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_floor.mlir b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_floor.mlir
new file mode 100644
index 000000000..fa77817a8
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_floor.mlir
@@ -0,0 +1,17 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
+
+func.func @floor(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+  // CHECK: %{{[0-9]+}} = "ttnn.empty"
+  // CHECK-SAME: [[TENSOR:tensor<64x128xf32,]]
+  %0 = tensor.empty() : tensor<64x128xf32>
+  // CHECK: %{{[0-9]+}} = "ttnn.floor"
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: -> [[TENSOR]]
+  %1 = "ttir.floor"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+  return %1 : tensor<64x128xf32>
+}
diff --git a/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_isfinite.mlir b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_isfinite.mlir
new file mode 100644
index 000000000..ce0146be4
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/perf_unit/test_perf_isfinite.mlir
@@ -0,0 +1,17 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
+
+func.func @is_finite(%arg0: tensor<64x128xf32>) -> tensor<64x128xbf16> {
+  // CHECK: %[[C:.*]] = "ttnn.empty"
+  // CHECK-SAME: [[TENSOR:tensor<64x128xbf16,]]
+  %0 = tensor.empty() : tensor<64x128xbf16>
+  // CHECK: %[[C:.*]] = "ttnn.isfinite"
+  // CHECK-SAME: tensor<64x128xf32,
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: -> [[TENSOR]]
+  %1 = "ttir.isfinite"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+  return %1 : tensor<64x128xbf16>
+}
diff --git a/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir b/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
index 7e6bcbcda..cdf0a0374 100644
--- a/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
+++ b/test/ttmlir/Silicon/TTNN/simple_eltwise.mlir
@@ -42,6 +42,30 @@ func.func @div(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<6
   return %1 : tensor<64x128xf32>
 }
 
+func.func @floor(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> {
+  // CHECK: %{{[0-9]+}} = "ttnn.empty"
+  // CHECK-SAME: [[TENSOR:tensor<64x128xf32,]]
+  %0 = tensor.empty() : tensor<64x128xf32>
+  // CHECK: %{{[0-9]+}} = "ttnn.floor"
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: -> [[TENSOR]]
+  %1 = "ttir.floor"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
+  return %1 : tensor<64x128xf32>
+}
+
+func.func @is_finite(%arg0: tensor<64x128xf32>) -> tensor<64x128xbf16> {
+  // CHECK: %[[C:.*]] = "ttnn.empty"
+  // CHECK-SAME: [[TENSOR:tensor<64x128xbf16,]]
+  %0 = tensor.empty() : tensor<64x128xbf16>
+  // CHECK: %[[C:.*]] = "ttnn.isfinite"
+  // CHECK-SAME: tensor<64x128xf32,
+  // CHECK-SAME: [[TENSOR]]
+  // CHECK-SAME: -> [[TENSOR]]
+  %1 = "ttir.isfinite"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+  return %1 : tensor<64x128xbf16>
+}
+
 func.func @minimum(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> {
   // CHECK: %[[C:.*]] = "ttnn.empty"
   // CHECK-SAME: [[TENSOR:tensor<64x128xf32,]]